8167673: [s390] The s390 port.
authorgoetz
Thu, 13 Oct 2016 14:49:34 +0200
changeset 42065 6032b31e3719
parent 42064 a530dbabe64f
child 42066 46f6db750b17
child 42069 4bf36a6af07b
8167673: [s390] The s390 port. Summary: template interpreter, C1, C2 Reviewed-by: kvn, simonis
hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp
hotspot/src/cpu/s390/vm/assembler_s390.cpp
hotspot/src/cpu/s390/vm/assembler_s390.hpp
hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp
hotspot/src/cpu/s390/vm/bytes_s390.hpp
hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp
hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp
hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp
hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp
hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp
hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp
hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp
hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp
hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp
hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp
hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp
hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp
hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp
hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp
hotspot/src/cpu/s390/vm/c1_globals_s390.hpp
hotspot/src/cpu/s390/vm/c2_globals_s390.hpp
hotspot/src/cpu/s390/vm/c2_init_s390.cpp
hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp
hotspot/src/cpu/s390/vm/compiledIC_s390.cpp
hotspot/src/cpu/s390/vm/copy_s390.hpp
hotspot/src/cpu/s390/vm/debug_s390.cpp
hotspot/src/cpu/s390/vm/depChecker_s390.hpp
hotspot/src/cpu/s390/vm/disassembler_s390.hpp
hotspot/src/cpu/s390/vm/frame_s390.cpp
hotspot/src/cpu/s390/vm/frame_s390.hpp
hotspot/src/cpu/s390/vm/frame_s390.inline.hpp
hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp
hotspot/src/cpu/s390/vm/globals_s390.hpp
hotspot/src/cpu/s390/vm/icBuffer_s390.cpp
hotspot/src/cpu/s390/vm/icache_s390.cpp
hotspot/src/cpu/s390/vm/icache_s390.hpp
hotspot/src/cpu/s390/vm/interp_masm_s390.cpp
hotspot/src/cpu/s390/vm/interp_masm_s390.hpp
hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp
hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp
hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp
hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp
hotspot/src/cpu/s390/vm/jniTypes_s390.hpp
hotspot/src/cpu/s390/vm/jni_s390.h
hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp
hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp
hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp
hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp
hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp
hotspot/src/cpu/s390/vm/methodHandles_s390.cpp
hotspot/src/cpu/s390/vm/methodHandles_s390.hpp
hotspot/src/cpu/s390/vm/nativeInst_s390.cpp
hotspot/src/cpu/s390/vm/nativeInst_s390.hpp
hotspot/src/cpu/s390/vm/registerMap_s390.hpp
hotspot/src/cpu/s390/vm/registerSaver_s390.hpp
hotspot/src/cpu/s390/vm/register_definitions_s390.cpp
hotspot/src/cpu/s390/vm/register_s390.cpp
hotspot/src/cpu/s390/vm/register_s390.hpp
hotspot/src/cpu/s390/vm/relocInfo_s390.cpp
hotspot/src/cpu/s390/vm/relocInfo_s390.hpp
hotspot/src/cpu/s390/vm/runtime_s390.cpp
hotspot/src/cpu/s390/vm/s390.ad
hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp
hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp
hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp
hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp
hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp
hotspot/src/cpu/s390/vm/templateTable_s390.cpp
hotspot/src/cpu/s390/vm/templateTable_s390.hpp
hotspot/src/cpu/s390/vm/vmStructs_s390.hpp
hotspot/src/cpu/s390/vm/vm_version_s390.cpp
hotspot/src/cpu/s390/vm/vm_version_s390.hpp
hotspot/src/cpu/s390/vm/vmreg_s390.cpp
hotspot/src/cpu/s390/vm/vmreg_s390.hpp
hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp
hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp
hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp
hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp
hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp
hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp
hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp
hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp
hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp
hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp
hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp
hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/constMethod.hpp"
+#include "oops/method.hpp"
+#include "runtime/frame.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : i = 4; break;
+    case T_LONG   : i = 5; break;
+    case T_VOID   : i = 6; break;
+    case T_FLOAT  : i = 7; break;
+    case T_DOUBLE : i = 8; break;
+    case T_OBJECT : i = 9; break;
+    case T_ARRAY  : i = 9; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
+  return i;
+}
+
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  // No special entry points that preclude compilation.
+  return true;
+}
+
+// How much stack a method top interpreter activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+
+  // We have to size the following 2 frames:
+  //
+  //   [TOP_IJAVA_FRAME_ABI]
+  //   [ENTRY_FRAME]
+  //
+  // This expands to (see frame_s390.hpp):
+  //
+  //   [TOP_IJAVA_FRAME_ABI]
+  //   [operand stack]                 > stack
+  //   [monitors]      (optional)      > monitors
+  //   [IJAVA_STATE]                   > interpreter_state
+  //   [PARENT_IJAVA_FRAME_ABI]
+  //   [callee's locals w/o arguments] \ locals
+  //   [outgoing arguments]            /
+  //   [ENTRY_FRAME_LOCALS]
+
+  int locals = method->max_locals() * BytesPerWord;
+  int interpreter_state = frame::z_ijava_state_size;
+
+  int stack = method->max_stack() * BytesPerWord;
+  int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0;
+
+  int total_bytes =
+    frame::z_top_ijava_frame_abi_size +
+    stack +
+    monitors +
+    interpreter_state +
+    frame::z_parent_ijava_frame_abi_size +
+    locals +
+    frame::z_entry_frame_locals_size;
+
+  return (total_bytes/BytesPerWord);
+}
+
+// Returns number of stackElementWords needed for the interpreter frame with the
+// given sections.
+// This overestimates the stack by one slot in case of alignments.
+int AbstractInterpreter::size_activation(int max_stack,
+                                         int temps,
+                                         int extra_args,
+                                         int monitors,
+                                         int callee_params,
+                                         int callee_locals,
+                                         bool is_top_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+
+  assert((Interpreter::stackElementSize == frame::alignment_in_bytes), "must align frame size");
+  const int abi_scratch = is_top_frame ? (frame::z_top_ijava_frame_abi_size    / Interpreter::stackElementSize) :
+                                         (frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize);
+
+  const int size =
+    max_stack                                                 +
+    (callee_locals - callee_params)                           + // Already counted in max_stack().
+    monitors * frame::interpreter_frame_monitor_size()        +
+    abi_scratch                                               +
+    frame::z_ijava_state_size / Interpreter::stackElementSize;
+
+  // Fixed size of an interpreter frame.
+  return size;
+}
+
+// Fills a sceletal interpreter frame generated during deoptimizations.
+//
+// Parameters:
+//
+// interpreter_frame != NULL:
+//   set up the method, locals, and monitors.
+//   The frame interpreter_frame, if not NULL, is guaranteed to be the
+//   right size, as determined by a previous call to this method.
+//   It is also guaranteed to be walkable even though it is in a skeletal state
+//
+// is_top_frame == true:
+//   We're processing the *oldest* interpreter frame!
+//
+// pop_frame_extra_args:
+//   If this is != 0 we are returning to a deoptimized frame by popping
+//   off the callee frame. We want to re-execute the call that called the
+//   callee interpreted, but since the return to the interpreter would pop
+//   the arguments off advance the esp by dummy popframe_extra_args slots.
+//   Popping off those will establish the stack layout as it was before the call.
+//
+
+void AbstractInterpreter::layout_activation(Method* method,
+                                            int tempcount,
+                                            int popframe_extra_args,
+                                            int moncount,
+                                            int caller_actual_parameters,
+                                            int callee_param_count,
+                                            int callee_locals_count,
+                                            frame* caller,
+                                            frame* interpreter_frame,
+                                            bool is_top_frame,
+                                            bool is_bottom_frame) {
+  // TOP_IJAVA_FRAME:
+  //
+  //    0 [TOP_IJAVA_FRAME_ABI]         -+
+  //   16 [operand stack]                | size
+  //      [monitors]      (optional)     |
+  //      [IJAVA_STATE]                 -+
+  //      Note: own locals are located in the caller frame.
+  //
+  // PARENT_IJAVA_FRAME:
+  //
+  //    0 [PARENT_IJAVA_FRAME_ABI]                    -+
+  //      [callee's locals w/o arguments]              |
+  //      [outgoing arguments]                         | size
+  //      [used part of operand stack w/o arguments]   |
+  //      [monitors]      (optional)                   |
+  //      [IJAVA_STATE]                               -+
+  //
+
+  // Now we know our caller, calc the exact frame layout and size
+  // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0).
+  intptr_t* locals_base = (caller->is_interpreted_frame())
+    ? (caller->interpreter_frame_tos_address() + caller_actual_parameters - 1)
+    : (caller->sp()                            + method->max_locals()     - 1 +
+       frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize);
+
+  intptr_t* monitor_base = (intptr_t*)((address)interpreter_frame->fp() - frame::z_ijava_state_size);
+  intptr_t* monitor      = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
+  intptr_t* operand_stack_base = monitor;
+  intptr_t* tos          = operand_stack_base - tempcount - popframe_extra_args;
+  intptr_t* top_frame_sp =
+    operand_stack_base - method->max_stack() - frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize;
+  intptr_t* sender_sp;
+  if (caller->is_interpreted_frame()) {
+    sender_sp = caller->interpreter_frame_top_frame_sp();
+  } else if (caller->is_compiled_frame()) {
+    sender_sp = caller->fp() - caller->cb()->frame_size();
+    // The bottom frame's sender_sp is its caller's unextended_sp.
+    // It was already set when its skeleton was pushed (see push_skeleton_frames()).
+    // Note: the unextended_sp is required by nmethod::orig_pc_addr().
+    assert(is_bottom_frame && (sender_sp == caller->unextended_sp()),
+           "must initialize sender_sp of bottom skeleton frame when pushing it");
+  } else {
+    assert(caller->is_entry_frame(), "is there a new frame type??");
+    sender_sp = caller->sp(); // Call_stub only uses it's fp.
+  }
+
+  interpreter_frame->interpreter_frame_set_method(method);
+  interpreter_frame->interpreter_frame_set_mirror(method->method_holder()->java_mirror());
+  interpreter_frame->interpreter_frame_set_locals(locals_base);
+  interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor);
+  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
+  interpreter_frame->interpreter_frame_set_tos_address(tos);
+  interpreter_frame->interpreter_frame_set_sender_sp(sender_sp);
+  interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all
+// assembler_s390.* files.
+
+// Convert the raw encoding form into the form expected by the
+// constructor for Address. This is called by adlc generated code.
+Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
+  assert(scale == 0, "Scale should not be used on z/Architecture. The call to make_raw is "
+         "generated by adlc and this must mirror all features of Operands from machnode.hpp.");
+  assert(disp_reloc == relocInfo::none, "not implemented on z/Architecture.");
+
+  Address madr(as_Register(base), as_Register(index), in_ByteSize(disp));
+  return madr;
+}
+
+int AbstractAssembler::code_fill_byte() {
+  return 0x00; // Illegal instruction 0x00000000.
+}
+
+// Condition code masks. Details see enum branch_condition.
+// Although this method is meant for INT CCs, the Overflow/Ordered
+// bit in the masks has to be considered. The CC might have been set
+// by a float operation, but is evaluated while calculating an integer
+// result. See elementary test TestFloat.isNotEqual(FF)Z for example.
+Assembler::branch_condition Assembler::inverse_condition(Assembler::branch_condition cc) {
+  Assembler::branch_condition unordered_bit = (Assembler::branch_condition)(cc & bcondNotOrdered);
+  Assembler::branch_condition inverse_cc;
+
+  // Some are commented out to avoid duplicate labels.
+  switch (cc) {
+    case bcondNever       : inverse_cc = bcondAlways;      break;  //  0 -> 15
+    case bcondAlways      : inverse_cc = bcondNever;       break;  // 15 ->  0
+
+    case bcondOverflow    : inverse_cc = bcondNotOverflow; break;  //  1 -> 14
+    case bcondNotOverflow : inverse_cc = bcondOverflow;    break;  // 14 ->  1
+
+    default :
+      switch ((Assembler::branch_condition)(cc & bcondOrdered)) {
+        case bcondEqual       : inverse_cc = bcondNotEqual;  break;  //  8 ->  6
+        // case bcondZero        :
+        // case bcondAllZero     :
+
+        case bcondNotEqual    : inverse_cc = bcondEqual;     break;  //  6 ->  8
+        // case bcondNotZero     :
+        // case bcondMixed       :
+
+        case bcondLow         : inverse_cc = bcondNotLow;    break;  //  4 -> 10
+        // case bcondNegative    :
+
+        case bcondNotLow      : inverse_cc = bcondLow;       break;  // 10 ->  4
+        // case bcondNotNegative :
+
+        case bcondHigh        : inverse_cc = bcondNotHigh;   break;  //  2 -> 12
+        // case bcondPositive    :
+
+        case bcondNotHigh     : inverse_cc = bcondHigh;      break;  // 12 ->  2
+        // case bcondNotPositive :
+
+        default :
+          fprintf(stderr, "inverse_condition(%d)\n", (int)cc);
+          fflush(stderr);
+          ShouldNotReachHere();
+          return bcondNever;
+      }
+      // If cc is even, inverse_cc must be odd.
+      if (!unordered_bit) {
+        inverse_cc = (Assembler::branch_condition)(inverse_cc | bcondNotOrdered);
+      }
+      break;
+  }
+  return inverse_cc;
+}
+
+Assembler::branch_condition Assembler::inverse_float_condition(Assembler::branch_condition cc) {
+  Assembler::branch_condition  inverse_cc;
+
+  switch (cc) {
+    case bcondNever       : inverse_cc = bcondAlways;      break;  //  0
+    case bcondAlways      : inverse_cc = bcondNever;       break;  // 15
+
+    case bcondNotOrdered  : inverse_cc = bcondOrdered;     break;  // 14
+    case bcondOrdered     : inverse_cc = bcondNotOrdered;  break;  //  1
+
+    case bcondEqual                      : inverse_cc = (branch_condition)(bcondNotEqual + bcondNotOrdered);  break; //  8
+    case bcondNotEqual + bcondNotOrdered : inverse_cc = bcondEqual;  break;                                          //  7
+
+    case bcondLow      + bcondNotOrdered : inverse_cc = (branch_condition)(bcondHigh + bcondEqual);      break;      //  5
+    case bcondNotLow                     : inverse_cc = (branch_condition)(bcondLow  + bcondNotOrdered); break;      // 10
+
+    case bcondHigh                       : inverse_cc = (branch_condition)(bcondLow  + bcondNotOrdered + bcondEqual); break;  //  2
+    case bcondNotHigh  + bcondNotOrdered : inverse_cc = bcondHigh; break;                                                     // 13
+
+    default :
+      fprintf(stderr, "inverse_float_condition(%d)\n", (int)cc);
+      fflush(stderr);
+      ShouldNotReachHere();
+      return bcondNever;
+  }
+  return inverse_cc;
+}
+
+#ifdef ASSERT
+void Assembler::print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) {
+  out->flush();
+  switch (ilen) {
+    case 2:  out->print_cr("inst = %4.4x, %s",    (unsigned short)inst, msg); break;
+    case 4:  out->print_cr("inst = %8.8x, %s\n",    (unsigned int)inst, msg); break;
+    case 6:  out->print_cr("inst = %12.12lx, %s\n",               inst, msg); break;
+    default: out->print_cr("inst = %16.16lx, %s\n",               inst, msg); break;
+  }
+  out->flush();
+}
+
+void Assembler::dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg) {
+  out->cr();
+  out->print_cr("-------------------------------");
+  out->print_cr("--  %s", msg);
+  out->print_cr("-------------------------------");
+  out->print_cr("Hex dump    of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range);
+  os::print_hex_dump(out, pc-range, pc+range, 2);
+
+  out->cr();
+  out->print_cr("Disassembly of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range);
+  Disassembler::decode(pc, pc + range, out);
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2530 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ASSEMBLER_S390_HPP
+#define CPU_S390_VM_ASSEMBLER_S390_HPP
+
+#undef  LUCY_DBG
+
+#define NearLabel Label
+
+// Immediate is an abstraction to represent the various immediate
+// operands which exist on z/Architecture. Neither this class nor
+// instances hereof have an own state. It consists of methods only.
+class Immediate VALUE_OBJ_CLASS_SPEC {
+
+ public:
+    static bool is_simm(int64_t x, unsigned int nbits) {
+      // nbits < 2   --> false
+      // nbits >= 64 --> true
+      assert(2 <= nbits && nbits < 64, "Don't call, use statically known result.");
+      const int64_t min      = -(1L << (nbits-1));
+      const int64_t maxplus1 =  (1L << (nbits-1));
+      return min <= x && x < maxplus1;
+    }
+    static bool is_simm32(int64_t x) {
+      return is_simm(x, 32);
+    }
+    static bool is_simm20(int64_t x) {
+      return is_simm(x, 20);
+    }
+    static bool is_simm16(int64_t x) {
+      return is_simm(x, 16);
+    }
+    static bool is_simm8(int64_t x) {
+      return is_simm(x,  8);
+    }
+
+    // Test if x is within signed immediate range for nbits.
+    static bool is_uimm(int64_t x, unsigned int nbits) {
+      // nbits == 0  --> false
+      // nbits >= 64 --> true
+      assert(1 <= nbits && nbits < 64, "don't call, use statically known result");
+      const uint64_t xu       = (unsigned long)x;
+      const uint64_t maxplus1 = 1UL << nbits;
+      return xu < maxplus1; // Unsigned comparison. Negative inputs appear to be very large.
+    }
+    static bool is_uimm32(int64_t x) {
+      return is_uimm(x, 32);
+    }
+    static bool is_uimm16(int64_t x) {
+      return is_uimm(x, 16);
+    }
+    static bool is_uimm12(int64_t x) {
+      return is_uimm(x, 12);
+    }
+    static bool is_uimm8(int64_t x) {
+      return is_uimm(x,  8);
+    }
+};
+
+// Displacement is an abstraction to represent the various
+// displacements which exist with addresses on z/ArchiTecture.
+// Neither this class nor instances hereof have an own state. It
+// consists of methods only.
+class Displacement VALUE_OBJ_CLASS_SPEC {
+
+ public: // These tests are used outside the (Macro)Assembler world, e.g. in ad-file.
+
+  static bool is_longDisp(int64_t x) {  // Fits in a 20-bit displacement field.
+    return Immediate::is_simm20(x);
+  }
+  static bool is_shortDisp(int64_t x) { // Fits in a 12-bit displacement field.
+    return Immediate::is_uimm12(x);
+  }
+  static bool is_validDisp(int64_t x) { // Is a valid displacement, regardless of length constraints.
+    return is_longDisp(x);
+  }
+};
+
+// RelAddr is an abstraction to represent relative addresses in the
+// form they are used on z/Architecture for instructions which access
+// their operand with pc-relative addresses. Neither this class nor
+// instances hereof have an own state. It consists of methods only.
+class RelAddr VALUE_OBJ_CLASS_SPEC {
+
+ private: // No public use at all. Solely for (Macro)Assembler.
+
+  static bool is_in_range_of_RelAddr(address target, address pc, bool shortForm) {
+    // Guard against illegal branch targets, e.g. -1. Occurrences in
+    // CompiledStaticCall and ad-file. Do not assert (it's a test
+    // function!). Just return false in case of illegal operands.
+    if ((((uint64_t)target) & 0x0001L) != 0) return false;
+    if ((((uint64_t)pc)     & 0x0001L) != 0) return false;
+
+    if (shortForm) {
+      return Immediate::is_simm((int64_t)(target-pc), 17); // Relative short addresses can reach +/- 2**16 bytes.
+    } else {
+      return Immediate::is_simm((int64_t)(target-pc), 33); // Relative long addresses can reach +/- 2**32 bytes.
+    }
+  }
+
+  static bool is_in_range_of_RelAddr16(address target, address pc) {
+    return is_in_range_of_RelAddr(target, pc, true);
+  }
+  static bool is_in_range_of_RelAddr16(ptrdiff_t distance) {
+    return is_in_range_of_RelAddr((address)distance, 0, true);
+  }
+
+  static bool is_in_range_of_RelAddr32(address target, address pc) {
+    return is_in_range_of_RelAddr(target, pc, false);
+  }
+  static bool is_in_range_of_RelAddr32(ptrdiff_t distance) {
+    return is_in_range_of_RelAddr((address)distance, 0, false);
+  }
+
+  static int pcrel_off(address target, address pc, bool shortForm) {
+    assert(((uint64_t)target & 0x0001L) == 0, "target of a relative address must be aligned");
+    assert(((uint64_t)pc     & 0x0001L) == 0, "origin of a relative address must be aligned");
+
+    if ((target == NULL) || (target == pc)) {
+      return 0;  // Yet unknown branch destination.
+    } else {
+      guarantee(is_in_range_of_RelAddr(target, pc, shortForm), "target not within reach");
+      return (int)((target - pc)>>1);
+    }
+  }
+
+  static int pcrel_off16(address target, address pc) {
+    return pcrel_off(target, pc, true);
+  }
+  static int pcrel_off16(ptrdiff_t distance) {
+    return pcrel_off((address)distance, 0, true);
+  }
+
+  static int pcrel_off32(address target, address pc) {
+    return pcrel_off(target, pc, false);
+  }
+  static int pcrel_off32(ptrdiff_t distance) {
+    return pcrel_off((address)distance, 0, false);
+  }
+
+  static ptrdiff_t inv_pcrel_off16(int offset) {
+    return ((ptrdiff_t)offset)<<1;
+  }
+
+  static ptrdiff_t inv_pcrel_off32(int offset) {
+    return ((ptrdiff_t)offset)<<1;
+  }
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class NativeGeneralJump;
+};
+
+// Address is an abstraction used to represent a memory location
+// as passed to Z assembler instructions.
+//
+// Note: A register location is represented via a Register, not
+// via an address for efficiency & simplicity reasons.
+class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _base;    // Base register.
+  Register _index;   // Index register
+  intptr_t _disp;    // Constant displacement.
+
+ public:
+  Address() :
+    _base(noreg),
+    _index(noreg),
+    _disp(0) {}
+
+  Address(Register base, Register index, intptr_t disp = 0) :
+    _base(base),
+    _index(index),
+    _disp(disp) {}
+
+  Address(Register base, intptr_t disp = 0) :
+    _base(base),
+    _index(noreg),
+    _disp(disp) {}
+
+  Address(Register base, RegisterOrConstant roc, intptr_t disp = 0) :
+    _base(base),
+    _index(noreg),
+    _disp(disp) {
+    if (roc.is_constant()) _disp += roc.as_constant(); else _index = roc.as_register();
+  }
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+  Address(Register base, ByteSize disp) :
+    _base(base),
+    _index(noreg),
+    _disp(in_bytes(disp)) {}
+
+  Address(Register base, Register index, ByteSize disp) :
+    _base(base),
+    _index(index),
+    _disp(in_bytes(disp)) {}
+#endif
+
+  // Aborts if disp is a register and base and index are set already.
+  Address plus_disp(RegisterOrConstant disp) const {
+    Address a = (*this);
+    a._disp += disp.constant_or_zero();
+    if (disp.is_register()) {
+      if (a._index == noreg) {
+        a._index = disp.as_register();
+      } else {
+        guarantee(_base == noreg, "can not encode"); a._base = disp.as_register();
+      }
+    }
+    return a;
+  }
+
+  // A call to this is generated by adlc for replacement variable $xxx$$Address.
+  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
+
+  bool is_same_address(Address a) const {
+    return _base == a._base && _index == a._index && _disp == a._disp;
+  }
+
+  // testers
+  bool has_base()  const { return _base  != noreg; }
+  bool has_index() const { return _index != noreg; }
+  bool has_disp()  const { return true; } // There is no "invalid" value.
+
+  bool is_disp12() const { return Immediate::is_uimm12(disp()); }
+  bool is_disp20() const { return Immediate::is_simm20(disp()); }
+  bool is_RSform()  { return has_base() && !has_index() && is_disp12(); }
+  bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
+  bool is_RXform()  { return has_base() &&  has_index() && is_disp12(); }
+  bool is_RXEform() { return has_base() &&  has_index() && is_disp12(); }
+  bool is_RXYform() { return has_base() &&  has_index() && is_disp20(); }
+
+  bool uses(Register r) { return _base == r || _index == r; };
+
+  // accessors
+  Register base()      const { return _base; }
+  Register baseOrR0()  const { assert(_base  != Z_R0, ""); return _base  == noreg ? Z_R0 : _base; }
+  Register index()     const { return _index; }
+  Register indexOrR0() const { assert(_index != Z_R0, ""); return _index == noreg ? Z_R0 : _index; }
+  intptr_t disp() const { return _disp; }
+  // Specific version for short displacement instructions.
+  int      disp12() const {
+    assert(is_disp12(), "displacement out of range for uimm12");
+    return _disp;
+  }
+  // Specific version for long displacement instructions.
+  int      disp20() const {
+    assert(is_disp20(), "displacement out of range for simm20");
+    return _disp;
+  }
+  intptr_t value() const { return _disp; }
+
+  friend class Assembler;
+};
+
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+ private:
+  address          _address;
+  RelocationHolder _rspec;
+
+  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+    switch (rtype) {
+    case relocInfo::external_word_type:
+      return external_word_Relocation::spec(addr);
+    case relocInfo::internal_word_type:
+      return internal_word_Relocation::spec(addr);
+    case relocInfo::opt_virtual_call_type:
+      return opt_virtual_call_Relocation::spec();
+    case relocInfo::static_call_type:
+      return static_call_Relocation::spec();
+    case relocInfo::runtime_call_w_cp_type:
+      return runtime_call_w_cp_Relocation::spec();
+    case relocInfo::none:
+      return RelocationHolder();
+    default:
+      ShouldNotReachHere();
+      return RelocationHolder();
+    }
+  }
+
+ protected:
+  // creation
+  AddressLiteral() : _address(NULL), _rspec(NULL) {}
+
+ public:
+  AddressLiteral(address addr, RelocationHolder const& rspec)
+    : _address(addr),
+      _rspec(rspec) {}
+
+  // Some constructors to avoid casting at the call site.
+  AddressLiteral(jobject obj, RelocationHolder const& rspec)
+    : _address((address) obj),
+      _rspec(rspec) {}
+
+  AddressLiteral(intptr_t value, RelocationHolder const& rspec)
+    : _address((address) value),
+      _rspec(rspec) {}
+
+  AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  // Some constructors to avoid casting at the call site.
+  AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(oop addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  intptr_t value() const { return (intptr_t) _address; }
+
+  const relocInfo::relocType rtype() const { return _rspec.type(); }
+  const RelocationHolder&    rspec() const { return _rspec; }
+
+  RelocationHolder rspec(int offset) const {
+    return offset == 0 ? _rspec : _rspec.plus(offset);
+  }
+};
+
+// Convenience classes
+class ExternalAddress: public AddressLiteral {
+ private:
+  static relocInfo::relocType reloc_for_target(address target) {
+    // Sometimes ExternalAddress is used for values which aren't
+    // exactly addresses, like the card table base.
+    // External_word_type can't be used for values in the first page
+    // so just skip the reloc in that case.
+    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+  }
+
+ public:
+  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(          target)) {}
+  ExternalAddress(oop*    target) : AddressLiteral(target, reloc_for_target((address) target)) {}
+};
+
+// Argument is an abstraction used to represent an outgoing actual
+// argument or an incoming formal parameter, whether it resides in
+// memory or in a register, in a manner consistent with the
+// z/Architecture Application Binary Interface, or ABI. This is often
+// referred to as the native or C calling convention.
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+  int _number;
+  bool _is_in;
+
+ public:
+  enum {
+    // Only 5 registers may contain integer parameters.
+    n_register_parameters = 5,
+    // Can have up to 4 floating registers.
+    n_float_register_parameters = 4
+  };
+
+  // creation
+  Argument(int number, bool is_in) : _number(number), _is_in(is_in) {}
+  Argument(int number) : _number(number) {}
+
+  int number() const { return _number; }
+
+  Argument successor() const { return Argument(number() + 1); }
+
+  // Locating register-based arguments:
+  bool is_register() const { return _number < n_register_parameters; }
+
+  // Locating Floating Point register-based arguments:
+  bool is_float_register() const { return _number < n_float_register_parameters; }
+
+  FloatRegister as_float_register() const {
+    assert(is_float_register(), "must be a register argument");
+    return as_FloatRegister((number() *2) + 1);
+  }
+
+  FloatRegister as_double_register() const {
+    assert(is_float_register(), "must be a register argument");
+    return as_FloatRegister((number() *2));
+  }
+
+  Register as_register() const {
+    assert(is_register(), "must be a register argument");
+    return as_Register(number() + Z_ARG1->encoding());
+  }
+
+  // debugging
+  const char* name() const;
+
+  friend class Assembler;
+};
+
+
+// The z/Architecture Assembler: Pure assembler doing NO optimizations
+// on the instruction level; i.e., what you write is what you get. The
+// Assembler is generating code into a CodeBuffer.
+class Assembler : public AbstractAssembler {
+ protected:
+
+  friend class AbstractAssembler;
+  friend class AddressLiteral;
+
+  // Code patchers need various routines like inv_wdisp().
+  friend class NativeInstruction;
+#ifndef COMPILER2
+  friend class NativeGeneralJump;
+#endif
+  friend class Relocation;
+
+ public:
+
+// Addressing
+
+// address calculation
+#define LA_ZOPC     (unsigned  int)(0x41  << 24)
+#define LAY_ZOPC    (unsigned long)(0xe3L << 40 | 0x71L)
+#define LARL_ZOPC   (unsigned long)(0xc0L << 40 | 0x00L << 32)
+
+
+// Data Transfer
+
+// register to register transfer
+#define LR_ZOPC     (unsigned  int)(24 << 8)
+#define LBR_ZOPC    (unsigned  int)(0xb926 << 16)
+#define LHR_ZOPC    (unsigned  int)(0xb927 << 16)
+#define LGBR_ZOPC   (unsigned  int)(0xb906 << 16)
+#define LGHR_ZOPC   (unsigned  int)(0xb907 << 16)
+#define LGFR_ZOPC   (unsigned  int)(0xb914 << 16)
+#define LGR_ZOPC    (unsigned  int)(0xb904 << 16)
+
+#define LLHR_ZOPC   (unsigned  int)(0xb995 << 16)
+#define LLGCR_ZOPC  (unsigned  int)(0xb984 << 16)
+#define LLGHR_ZOPC  (unsigned  int)(0xb985 << 16)
+#define LLGTR_ZOPC  (unsigned  int)(185 << 24 | 23 << 16)
+#define LLGFR_ZOPC  (unsigned  int)(185 << 24 | 22 << 16)
+
+#define LTR_ZOPC    (unsigned  int)(18 << 8)
+#define LTGFR_ZOPC  (unsigned  int)(185 << 24 | 18 << 16)
+#define LTGR_ZOPC   (unsigned  int)(185 << 24 | 2 << 16)
+
+#define LER_ZOPC    (unsigned  int)(56 << 8)
+#define LEDBR_ZOPC  (unsigned  int)(179 << 24 | 68 << 16)
+#define LEXBR_ZOPC  (unsigned  int)(179 << 24 | 70 << 16)
+#define LDEBR_ZOPC  (unsigned  int)(179 << 24 | 4 << 16)
+#define LDR_ZOPC    (unsigned  int)(40 << 8)
+#define LDXBR_ZOPC  (unsigned  int)(179 << 24 | 69 << 16)
+#define LXEBR_ZOPC  (unsigned  int)(179 << 24 | 6 << 16)
+#define LXDBR_ZOPC  (unsigned  int)(179 << 24 | 5 << 16)
+#define LXR_ZOPC    (unsigned  int)(179 << 24 | 101 << 16)
+#define LTEBR_ZOPC  (unsigned  int)(179 << 24 | 2 << 16)
+#define LTDBR_ZOPC  (unsigned  int)(179 << 24 | 18 << 16)
+#define LTXBR_ZOPC  (unsigned  int)(179 << 24 | 66 << 16)
+
+#define LRVR_ZOPC   (unsigned  int)(0xb91f << 16)
+#define LRVGR_ZOPC  (unsigned  int)(0xb90f << 16)
+
+#define LDGR_ZOPC   (unsigned  int)(0xb3c1 << 16)                // z10
+#define LGDR_ZOPC   (unsigned  int)(0xb3cd << 16)                // z10
+
+#define LOCR_ZOPC   (unsigned  int)(0xb9f2 << 16)                // z196
+#define LOCGR_ZOPC  (unsigned  int)(0xb9e2 << 16)                // z196
+
+// immediate to register transfer
+#define IIHH_ZOPC   (unsigned  int)(165 << 24)
+#define IIHL_ZOPC   (unsigned  int)(165 << 24 | 1 << 16)
+#define IILH_ZOPC   (unsigned  int)(165 << 24 | 2 << 16)
+#define IILL_ZOPC   (unsigned  int)(165 << 24 | 3 << 16)
+#define IIHF_ZOPC   (unsigned long)(0xc0L << 40 | 8L << 32)
+#define IILF_ZOPC   (unsigned long)(0xc0L << 40 | 9L << 32)
+#define LLIHH_ZOPC  (unsigned  int)(165 << 24 | 12 << 16)
+#define LLIHL_ZOPC  (unsigned  int)(165 << 24 | 13 << 16)
+#define LLILH_ZOPC  (unsigned  int)(165 << 24 | 14 << 16)
+#define LLILL_ZOPC  (unsigned  int)(165 << 24 | 15 << 16)
+#define LLIHF_ZOPC  (unsigned long)(0xc0L << 40 | 14L << 32)
+#define LLILF_ZOPC  (unsigned long)(0xc0L << 40 | 15L << 32)
+#define LHI_ZOPC    (unsigned  int)(167 << 24 | 8 << 16)
+#define LGHI_ZOPC   (unsigned  int)(167 << 24 | 9 << 16)
+#define LGFI_ZOPC   (unsigned long)(0xc0L << 40 | 1L << 32)
+
+#define LZER_ZOPC   (unsigned  int)(0xb374 << 16)
+#define LZDR_ZOPC   (unsigned  int)(0xb375 << 16)
+
+// LOAD: memory to register transfer
+#define LB_ZOPC     (unsigned long)(227L << 40 | 118L)
+#define LH_ZOPC     (unsigned  int)(72 << 24)
+#define LHY_ZOPC    (unsigned long)(227L << 40 | 120L)
+#define L_ZOPC      (unsigned  int)(88 << 24)
+#define LY_ZOPC     (unsigned long)(227L << 40 | 88L)
+#define LT_ZOPC     (unsigned long)(0xe3L << 40 | 0x12L)
+#define LGB_ZOPC    (unsigned long)(227L << 40 | 119L)
+#define LGH_ZOPC    (unsigned long)(227L << 40 | 21L)
+#define LGF_ZOPC    (unsigned long)(227L << 40 | 20L)
+#define LG_ZOPC     (unsigned long)(227L << 40 | 4L)
+#define LTG_ZOPC    (unsigned long)(0xe3L << 40 | 0x02L)
+#define LTGF_ZOPC   (unsigned long)(0xe3L << 40 | 0x32L)
+
+#define LLC_ZOPC    (unsigned long)(0xe3L << 40 | 0x94L)
+#define LLH_ZOPC    (unsigned long)(0xe3L << 40 | 0x95L)
+#define LLGT_ZOPC   (unsigned long)(227L << 40 | 23L)
+#define LLGC_ZOPC   (unsigned long)(227L << 40 | 144L)
+#define LLGH_ZOPC   (unsigned long)(227L << 40 | 145L)
+#define LLGF_ZOPC   (unsigned long)(227L << 40 | 22L)
+
+#define IC_ZOPC     (unsigned  int)(0x43  << 24)
+#define ICY_ZOPC    (unsigned long)(0xe3L << 40 | 0x73L)
+#define ICM_ZOPC    (unsigned  int)(0xbf  << 24)
+#define ICMY_ZOPC   (unsigned long)(0xebL << 40 | 0x81L)
+#define ICMH_ZOPC   (unsigned long)(0xebL << 40 | 0x80L)
+
+#define LRVH_ZOPC   (unsigned long)(0xe3L << 40 | 0x1fL)
+#define LRV_ZOPC    (unsigned long)(0xe3L << 40 | 0x1eL)
+#define LRVG_ZOPC   (unsigned long)(0xe3L << 40 | 0x0fL)
+
+
+// LOAD relative: memory to register transfer
+#define LHRL_ZOPC   (unsigned long)(0xc4L << 40 | 0x05L << 32)  // z10
+#define LRL_ZOPC    (unsigned long)(0xc4L << 40 | 0x0dL << 32)  // z10
+#define LGHRL_ZOPC  (unsigned long)(0xc4L << 40 | 0x04L << 32)  // z10
+#define LGFRL_ZOPC  (unsigned long)(0xc4L << 40 | 0x0cL << 32)  // z10
+#define LGRL_ZOPC   (unsigned long)(0xc4L << 40 | 0x08L << 32)  // z10
+
+#define LLHRL_ZOPC  (unsigned long)(0xc4L << 40 | 0x02L << 32)  // z10
+#define LLGHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x06L << 32)  // z10
+#define LLGFRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0eL << 32)  // z10
+
+#define LOC_ZOPC    (unsigned long)(0xebL << 40 | 0xf2L)        // z196
+#define LOCG_ZOPC   (unsigned long)(0xebL << 40 | 0xe2L)        // z196
+
+#define LMG_ZOPC    (unsigned long)(235L << 40 | 4L)
+
+#define LE_ZOPC     (unsigned  int)(0x78 << 24)
+#define LEY_ZOPC    (unsigned long)(237L << 40 | 100L)
+#define LDEB_ZOPC   (unsigned long)(237L << 40 | 4)
+#define LD_ZOPC     (unsigned  int)(0x68 << 24)
+#define LDY_ZOPC    (unsigned long)(237L << 40 | 101L)
+#define LXEB_ZOPC   (unsigned long)(237L << 40 | 6)
+#define LXDB_ZOPC   (unsigned long)(237L << 40 | 5)
+
+// STORE: register to memory transfer
+#define STC_ZOPC    (unsigned  int)(0x42 << 24)
+#define STCY_ZOPC   (unsigned long)(227L << 40 | 114L)
+#define STH_ZOPC    (unsigned  int)(64 << 24)
+#define STHY_ZOPC   (unsigned long)(227L << 40 | 112L)
+#define ST_ZOPC     (unsigned  int)(80 << 24)
+#define STY_ZOPC    (unsigned long)(227L << 40 | 80L)
+#define STG_ZOPC    (unsigned long)(227L << 40 | 36L)
+
+#define STCM_ZOPC   (unsigned long)(0xbeL << 24)
+#define STCMY_ZOPC  (unsigned long)(0xebL << 40 | 0x2dL)
+#define STCMH_ZOPC  (unsigned long)(0xebL << 40 | 0x2cL)
+
+// STORE relative: memory to register transfer
+#define STHRL_ZOPC  (unsigned long)(0xc4L << 40 | 0x07L << 32)  // z10
+#define STRL_ZOPC   (unsigned long)(0xc4L << 40 | 0x0fL << 32)  // z10
+#define STGRL_ZOPC  (unsigned long)(0xc4L << 40 | 0x0bL << 32)  // z10
+
+#define STOC_ZOPC   (unsigned long)(0xebL << 40 | 0xf3L)        // z196
+#define STOCG_ZOPC  (unsigned long)(0xebL << 40 | 0xe3L)        // z196
+
+#define STMG_ZOPC   (unsigned long)(235L << 40 | 36L)
+
+#define STE_ZOPC    (unsigned  int)(0x70 << 24)
+#define STEY_ZOPC   (unsigned long)(237L << 40 | 102L)
+#define STD_ZOPC    (unsigned  int)(0x60 << 24)
+#define STDY_ZOPC   (unsigned long)(237L << 40 | 103L)
+
+// MOVE: immediate to memory transfer
+#define MVHHI_ZOPC  (unsigned long)(0xe5L << 40 | 0x44L << 32)   // z10
+#define MVHI_ZOPC   (unsigned long)(0xe5L << 40 | 0x4cL << 32)   // z10
+#define MVGHI_ZOPC  (unsigned long)(0xe5L << 40 | 0x48L << 32)   // z10
+
+
+//  ALU operations
+
+// Load Positive
+#define LPR_ZOPC    (unsigned  int)(16 << 8)
+#define LPGFR_ZOPC  (unsigned  int)(185 << 24 | 16 << 16)
+#define LPGR_ZOPC   (unsigned  int)(185 << 24)
+#define LPEBR_ZOPC  (unsigned  int)(179 << 24)
+#define LPDBR_ZOPC  (unsigned  int)(179 << 24 | 16 << 16)
+#define LPXBR_ZOPC  (unsigned  int)(179 << 24 | 64 << 16)
+
+// Load Negative
+#define LNR_ZOPC    (unsigned  int)(17 << 8)
+#define LNGFR_ZOPC  (unsigned  int)(185 << 24 | 17 << 16)
+#define LNGR_ZOPC   (unsigned  int)(185 << 24 | 1 << 16)
+#define LNEBR_ZOPC  (unsigned  int)(179 << 24 | 1 << 16)
+#define LNDBR_ZOPC  (unsigned  int)(179 << 24 | 17 << 16)
+#define LNXBR_ZOPC  (unsigned  int)(179 << 24 | 65 << 16)
+
+// Load Complement
+#define LCR_ZOPC    (unsigned  int)(19 << 8)
+#define LCGFR_ZOPC  (unsigned  int)(185 << 24 | 19 << 16)
+#define LCGR_ZOPC   (unsigned  int)(185 << 24 | 3 << 16)
+#define LCEBR_ZOPC  (unsigned  int)(179 << 24 | 3 << 16)
+#define LCDBR_ZOPC  (unsigned  int)(179 << 24 | 19 << 16)
+#define LCXBR_ZOPC  (unsigned  int)(179 << 24 | 67 << 16)
+
+// Add
+// RR, signed
+#define AR_ZOPC     (unsigned  int)(26 << 8)
+#define AGFR_ZOPC   (unsigned  int)(0xb9 << 24 | 0x18 << 16)
+#define AGR_ZOPC    (unsigned  int)(0xb9 << 24 | 0x08 << 16)
+// RRF, signed
+#define ARK_ZOPC    (unsigned  int)(0xb9 << 24 | 0x00f8 << 16)
+#define AGRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00e8 << 16)
+// RI, signed
+#define AHI_ZOPC    (unsigned  int)(167 << 24 | 10 << 16)
+#define AFI_ZOPC    (unsigned long)(0xc2L << 40 | 9L << 32)
+#define AGHI_ZOPC   (unsigned  int)(167 << 24 | 11 << 16)
+#define AGFI_ZOPC   (unsigned long)(0xc2L << 40 | 8L << 32)
+// RIE, signed
+#define AHIK_ZOPC   (unsigned long)(0xecL << 40 | 0x00d8L)
+#define AGHIK_ZOPC  (unsigned long)(0xecL << 40 | 0x00d9L)
+#define AIH_ZOPC    (unsigned long)(0xccL << 40 | 0x08L << 32)
+// RM, signed
+#define AHY_ZOPC    (unsigned long)(227L << 40 | 122L)
+#define A_ZOPC      (unsigned  int)(90 << 24)
+#define AY_ZOPC     (unsigned long)(227L << 40 | 90L)
+#define AGF_ZOPC    (unsigned long)(227L << 40 | 24L)
+#define AG_ZOPC     (unsigned long)(227L << 40 | 8L)
+// In-memory arithmetic (add signed, add logical with signed immediate).
+// MI, signed
+#define ASI_ZOPC    (unsigned long)(0xebL << 40 | 0x6aL)
+#define AGSI_ZOPC   (unsigned long)(0xebL << 40 | 0x7aL)
+
+// RR, Logical
+#define ALR_ZOPC    (unsigned  int)(30 << 8)
+#define ALGFR_ZOPC  (unsigned  int)(185 << 24 | 26 << 16)
+#define ALGR_ZOPC   (unsigned  int)(185 << 24 | 10 << 16)
+#define ALCGR_ZOPC  (unsigned  int)(185 << 24 | 136 << 16)
+// RRF, Logical
+#define ALRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00fa << 16)
+#define ALGRK_ZOPC  (unsigned  int)(0xb9 << 24 | 0x00ea << 16)
+// RI, Logical
+#define ALFI_ZOPC   (unsigned long)(0xc2L << 40 | 0x0bL << 32)
+#define ALGFI_ZOPC  (unsigned long)(0xc2L << 40 | 0x0aL << 32)
+// RIE, Logical
+#define ALHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00daL)
+#define ALGHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00dbL)
+// RM, Logical
+#define AL_ZOPC     (unsigned  int)(0x5e << 24)
+#define ALY_ZOPC    (unsigned long)(227L << 40 | 94L)
+#define ALGF_ZOPC   (unsigned long)(227L << 40 | 26L)
+#define ALG_ZOPC    (unsigned long)(227L << 40 | 10L)
+// In-memory arithmetic (add signed, add logical with signed immediate).
+// MI, Logical
+#define ALSI_ZOPC   (unsigned long)(0xebL << 40 | 0x6eL)
+#define ALGSI_ZOPC  (unsigned long)(0xebL << 40 | 0x7eL)
+
+// RR, BFP
+#define AEBR_ZOPC   (unsigned  int)(179 << 24 | 10 << 16)
+#define ADBR_ZOPC   (unsigned  int)(179 << 24 | 26 << 16)
+#define AXBR_ZOPC   (unsigned  int)(179 << 24 | 74 << 16)
+// RM, BFP
+#define AEB_ZOPC    (unsigned long)(237L << 40 | 10)
+#define ADB_ZOPC    (unsigned long)(237L << 40 | 26)
+
+// Subtract
+// RR, signed
+#define SR_ZOPC     (unsigned  int)(27 << 8)
+#define SGFR_ZOPC   (unsigned  int)(185 << 24 | 25 << 16)
+#define SGR_ZOPC    (unsigned  int)(185 << 24 | 9 << 16)
+// RRF, signed
+#define SRK_ZOPC    (unsigned  int)(0xb9 << 24 | 0x00f9 << 16)
+#define SGRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00e9 << 16)
+//   RM, signed
+#define SH_ZOPC     (unsigned  int)(0x4b << 24)
+#define SHY_ZOPC    (unsigned long)(227L << 40 | 123L)
+#define S_ZOPC      (unsigned  int)(0x5B << 24)
+#define SY_ZOPC     (unsigned long)(227L << 40 | 91L)
+#define SGF_ZOPC    (unsigned long)(227L << 40 | 25)
+#define SG_ZOPC     (unsigned long)(227L << 40 | 9)
+// RR, Logical
+#define SLR_ZOPC    (unsigned  int)(31 << 8)
+#define SLGFR_ZOPC  (unsigned  int)(185 << 24 | 27 << 16)
+#define SLGR_ZOPC   (unsigned  int)(185 << 24 | 11 << 16)
+// RIL, Logical
+#define SLFI_ZOPC   (unsigned long)(0xc2L << 40 | 0x05L << 32)
+#define SLGFI_ZOPC  (unsigned long)(0xc2L << 40 | 0x04L << 32)
+// RRF, Logical
+#define SLRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00fb << 16)
+#define SLGRK_ZOPC  (unsigned  int)(0xb9 << 24 | 0x00eb << 16)
+// RM, Logical
+#define SLY_ZOPC    (unsigned long)(227L << 40 | 95L)
+#define SLGF_ZOPC   (unsigned long)(227L << 40 | 27L)
+#define SLG_ZOPC    (unsigned long)(227L << 40 | 11L)
+
+// RR, BFP
+#define SEBR_ZOPC   (unsigned  int)(179 << 24 | 11 << 16)
+#define SDBR_ZOPC   (unsigned  int)(179 << 24 | 27 << 16)
+#define SXBR_ZOPC   (unsigned  int)(179 << 24 | 75 << 16)
+// RM, BFP
+#define SEB_ZOPC    (unsigned long)(237L << 40 | 11)
+#define SDB_ZOPC    (unsigned long)(237L << 40 | 27)
+
+// Multiply
+// RR, signed
+#define MR_ZOPC     (unsigned  int)(28 << 8)
+#define MSR_ZOPC    (unsigned  int)(178 << 24 | 82 << 16)
+#define MSGFR_ZOPC  (unsigned  int)(185 << 24 | 28 << 16)
+#define MSGR_ZOPC   (unsigned  int)(185 << 24 | 12 << 16)
+// RI, signed
+#define MHI_ZOPC    (unsigned  int)(167 << 24 | 12 << 16)
+#define MGHI_ZOPC   (unsigned  int)(167 << 24 | 13 << 16)
+#define MSFI_ZOPC   (unsigned long)(0xc2L << 40 | 0x01L << 32)   // z10
+#define MSGFI_ZOPC  (unsigned long)(0xc2L << 40 | 0x00L << 32)   // z10
+// RM, signed
+#define M_ZOPC      (unsigned  int)(92 << 24)
+#define MS_ZOPC     (unsigned  int)(0x71 << 24)
+#define MHY_ZOPC    (unsigned long)(0xe3L<< 40 | 0x7cL)
+#define MSY_ZOPC    (unsigned long)(227L << 40 | 81L)
+#define MSGF_ZOPC   (unsigned long)(227L << 40 | 28L)
+#define MSG_ZOPC    (unsigned long)(227L << 40 | 12L)
+// RR, unsigned
+#define MLR_ZOPC    (unsigned  int)(185 << 24 | 150 << 16)
+#define MLGR_ZOPC   (unsigned  int)(185 << 24 | 134 << 16)
+// RM, unsigned
+#define ML_ZOPC     (unsigned long)(227L << 40 | 150L)
+#define MLG_ZOPC    (unsigned long)(227L << 40 | 134L)
+
+// RR, BFP
+#define MEEBR_ZOPC  (unsigned  int)(179 << 24 | 23 << 16)
+#define MDEBR_ZOPC  (unsigned  int)(179 << 24 | 12 << 16)
+#define MDBR_ZOPC   (unsigned  int)(179 << 24 | 28 << 16)
+#define MXDBR_ZOPC  (unsigned  int)(179 << 24 | 7 << 16)
+#define MXBR_ZOPC   (unsigned  int)(179 << 24 | 76 << 16)
+// RM, BFP
+#define MEEB_ZOPC   (unsigned long)(237L << 40 | 23)
+#define MDEB_ZOPC   (unsigned long)(237L << 40 | 12)
+#define MDB_ZOPC    (unsigned long)(237L << 40 | 28)
+#define MXDB_ZOPC   (unsigned long)(237L << 40 | 7)
+
+// Divide
+// RR, signed
+#define DSGFR_ZOPC  (unsigned  int)(0xb91d << 16)
+#define DSGR_ZOPC   (unsigned  int)(0xb90d << 16)
+// RM, signed
+#define D_ZOPC      (unsigned  int)(93 << 24)
+#define DSGF_ZOPC   (unsigned long)(227L << 40 | 29L)
+#define DSG_ZOPC    (unsigned long)(227L << 40 | 13L)
+// RR, unsigned
+#define DLR_ZOPC    (unsigned  int)(185 << 24 | 151 << 16)
+#define DLGR_ZOPC   (unsigned  int)(185 << 24 | 135 << 16)
+// RM, unsigned
+#define DL_ZOPC     (unsigned long)(227L << 40 | 151L)
+#define DLG_ZOPC    (unsigned long)(227L << 40 | 135L)
+
+// RR, BFP
+#define DEBR_ZOPC   (unsigned  int)(179 << 24 | 13 << 16)
+#define DDBR_ZOPC   (unsigned  int)(179 << 24 | 29 << 16)
+#define DXBR_ZOPC   (unsigned  int)(179 << 24 | 77 << 16)
+// RM, BFP
+#define DEB_ZOPC    (unsigned long)(237L << 40 | 13)
+#define DDB_ZOPC    (unsigned long)(237L << 40 | 29)
+
+// Square Root
+// RR, BFP
+#define SQEBR_ZOPC  (unsigned  int)(0xb314 << 16)
+#define SQDBR_ZOPC  (unsigned  int)(0xb315 << 16)
+#define SQXBR_ZOPC  (unsigned  int)(0xb316 << 16)
+// RM, BFP
+#define SQEB_ZOPC   (unsigned long)(237L << 40 | 20)
+#define SQDB_ZOPC   (unsigned long)(237L << 40 | 21)
+
+// Compare and Test
+// RR, signed
+#define CR_ZOPC     (unsigned  int)(25 << 8)
+#define CGFR_ZOPC   (unsigned  int)(185 << 24 | 48 << 16)
+#define CGR_ZOPC    (unsigned  int)(185 << 24 | 32 << 16)
+// RI, signed
+#define CHI_ZOPC    (unsigned  int)(167 << 24 | 14 << 16)
+#define CFI_ZOPC    (unsigned long)(0xc2L << 40 | 0xdL << 32)
+#define CGHI_ZOPC   (unsigned  int)(167 << 24 | 15 << 16)
+#define CGFI_ZOPC   (unsigned long)(0xc2L << 40 | 0xcL << 32)
+// RM, signed
+#define CH_ZOPC     (unsigned  int)(0x49 << 24)
+#define CHY_ZOPC    (unsigned long)(227L << 40 | 121L)
+#define C_ZOPC      (unsigned  int)(0x59 << 24)
+#define CY_ZOPC     (unsigned long)(227L << 40 | 89L)
+#define CGF_ZOPC    (unsigned long)(227L << 40 | 48L)
+#define CG_ZOPC     (unsigned long)(227L << 40 | 32L)
+// RR, unsigned
+#define CLR_ZOPC    (unsigned  int)(21 << 8)
+#define CLGFR_ZOPC  (unsigned  int)(185 << 24 | 49 << 16)
+#define CLGR_ZOPC   (unsigned  int)(185 << 24 | 33 << 16)
+// RIL, unsigned
+#define CLFI_ZOPC   (unsigned long)(0xc2L << 40 | 0xfL << 32)
+#define CLGFI_ZOPC  (unsigned long)(0xc2L << 40 | 0xeL << 32)
+// RM, unsigned
+#define CL_ZOPC     (unsigned  int)(0x55 << 24)
+#define CLY_ZOPC    (unsigned long)(227L << 40 | 85L)
+#define CLGF_ZOPC   (unsigned long)(227L << 40 | 49L)
+#define CLG_ZOPC    (unsigned long)(227L << 40 | 33L)
+// RI, unsigned
+#define TMHH_ZOPC   (unsigned  int)(167 << 24 | 2 << 16)
+#define TMHL_ZOPC   (unsigned  int)(167 << 24 | 3 << 16)
+#define TMLH_ZOPC   (unsigned  int)(167 << 24)
+#define TMLL_ZOPC   (unsigned  int)(167 << 24 | 1 << 16)
+
+// RR, BFP
+#define CEBR_ZOPC   (unsigned  int)(179 << 24 | 9 << 16)
+#define CDBR_ZOPC   (unsigned  int)(179 << 24 | 25 << 16)
+#define CXBR_ZOPC   (unsigned  int)(179 << 24 | 73 << 16)
+// RM, BFP
+#define CEB_ZOPC    (unsigned long)(237L << 40 | 9)
+#define CDB_ZOPC    (unsigned long)(237L << 40 | 25)
+
+// Shift
+// arithmetic
+#define SLA_ZOPC    (unsigned  int)(139 << 24)
+#define SLAG_ZOPC   (unsigned long)(235L << 40 | 11L)
+#define SRA_ZOPC    (unsigned  int)(138 << 24)
+#define SRAG_ZOPC   (unsigned long)(235L << 40 | 10L)
+// logical
+#define SLL_ZOPC    (unsigned  int)(137 << 24)
+#define SLLG_ZOPC   (unsigned long)(235L << 40 | 13L)
+#define SRL_ZOPC    (unsigned  int)(136 << 24)
+#define SRLG_ZOPC   (unsigned long)(235L << 40 | 12L)
+
+// Rotate, then AND/XOR/OR/insert
+// rotate
+#define RLL_ZOPC    (unsigned long)(0xebL << 40 | 0x1dL)         // z10
+#define RLLG_ZOPC   (unsigned long)(0xebL << 40 | 0x1cL)         // z10
+// rotate and {AND|XOR|OR|INS}
+#define RNSBG_ZOPC  (unsigned long)(0xecL << 40 | 0x54L)         // z196
+#define RXSBG_ZOPC  (unsigned long)(0xecL << 40 | 0x57L)         // z196
+#define ROSBG_ZOPC  (unsigned long)(0xecL << 40 | 0x56L)         // z196
+#define RISBG_ZOPC  (unsigned long)(0xecL << 40 | 0x55L)         // z196
+
+// AND
+// RR, signed
+#define NR_ZOPC     (unsigned  int)(20 << 8)
+#define NGR_ZOPC    (unsigned  int)(185 << 24 | 128 << 16)
+// RRF, signed
+#define NRK_ZOPC    (unsigned  int)(0xb9 << 24 | 0x00f4 << 16)
+#define NGRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00e4 << 16)
+// RI, signed
+#define NIHH_ZOPC   (unsigned  int)(165 << 24 | 4 << 16)
+#define NIHL_ZOPC   (unsigned  int)(165 << 24 | 5 << 16)
+#define NILH_ZOPC   (unsigned  int)(165 << 24 | 6 << 16)
+#define NILL_ZOPC   (unsigned  int)(165 << 24 | 7 << 16)
+#define NIHF_ZOPC   (unsigned long)(0xc0L << 40 | 10L << 32)
+#define NILF_ZOPC   (unsigned long)(0xc0L << 40 | 11L << 32)
+// RM, signed
+#define N_ZOPC      (unsigned  int)(0x54 << 24)
+#define NY_ZOPC     (unsigned long)(227L << 40 | 84L)
+#define NG_ZOPC     (unsigned long)(227L << 40 | 128L)
+
+// OR
+// RR, signed
+#define OR_ZOPC     (unsigned  int)(22 << 8)
+#define OGR_ZOPC    (unsigned  int)(185 << 24 | 129 << 16)
+// RRF, signed
+#define ORK_ZOPC    (unsigned  int)(0xb9 << 24 | 0x00f6 << 16)
+#define OGRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00e6 << 16)
+// RI, signed
+#define OIHH_ZOPC   (unsigned  int)(165 << 24 | 8 << 16)
+#define OIHL_ZOPC   (unsigned  int)(165 << 24 | 9 << 16)
+#define OILH_ZOPC   (unsigned  int)(165 << 24 | 10 << 16)
+#define OILL_ZOPC   (unsigned  int)(165 << 24 | 11 << 16)
+#define OIHF_ZOPC   (unsigned long)(0xc0L << 40 | 12L << 32)
+#define OILF_ZOPC   (unsigned long)(0xc0L << 40 | 13L << 32)
+// RM, signed
+#define O_ZOPC      (unsigned  int)(0x56 << 24)
+#define OY_ZOPC     (unsigned long)(227L << 40 | 86L)
+#define OG_ZOPC     (unsigned long)(227L << 40 | 129L)
+
+// XOR
+// RR, signed
+#define XR_ZOPC     (unsigned  int)(23 << 8)
+#define XGR_ZOPC    (unsigned  int)(185 << 24 | 130 << 16)
+// RRF, signed
+#define XRK_ZOPC    (unsigned  int)(0xb9 << 24 | 0x00f7 << 16)
+#define XGRK_ZOPC   (unsigned  int)(0xb9 << 24 | 0x00e7 << 16)
+// RI, signed
+#define XIHF_ZOPC   (unsigned long)(0xc0L << 40 | 6L << 32)
+#define XILF_ZOPC   (unsigned long)(0xc0L << 40 | 7L << 32)
+// RM, signed
+#define X_ZOPC      (unsigned  int)(0x57 << 24)
+#define XY_ZOPC     (unsigned long)(227L << 40 | 87L)
+#define XG_ZOPC     (unsigned long)(227L << 40 | 130L)
+
+
+// Data Conversion
+
+// INT to BFP
+#define CEFBR_ZOPC  (unsigned  int)(179 << 24 | 148 << 16)
+#define CDFBR_ZOPC  (unsigned  int)(179 << 24 | 149 << 16)
+#define CXFBR_ZOPC  (unsigned  int)(179 << 24 | 150 << 16)
+#define CEGBR_ZOPC  (unsigned  int)(179 << 24 | 164 << 16)
+#define CDGBR_ZOPC  (unsigned  int)(179 << 24 | 165 << 16)
+#define CXGBR_ZOPC  (unsigned  int)(179 << 24 | 166 << 16)
+// BFP to INT
+#define CFEBR_ZOPC  (unsigned  int)(179 << 24 | 152 << 16)
+#define CFDBR_ZOPC  (unsigned  int)(179 << 24 | 153 << 16)
+#define CFXBR_ZOPC  (unsigned  int)(179 << 24 | 154 << 16)
+#define CGEBR_ZOPC  (unsigned  int)(179 << 24 | 168 << 16)
+#define CGDBR_ZOPC  (unsigned  int)(179 << 24 | 169 << 16)
+#define CGXBR_ZOPC  (unsigned  int)(179 << 24 | 170 << 16)
+// INT to DEC
+#define CVD_ZOPC    (unsigned  int)(0x4e << 24)
+#define CVDY_ZOPC   (unsigned long)(0xe3L << 40 | 0x26L)
+#define CVDG_ZOPC   (unsigned long)(0xe3L << 40 | 0x2eL)
+
+
+// BFP Control
+
+#define SRNM_ZOPC   (unsigned  int)(178 << 24 | 153 << 16)
+#define EFPC_ZOPC   (unsigned  int)(179 << 24 | 140 << 16)
+#define SFPC_ZOPC   (unsigned  int)(179 << 24 | 132 << 16)
+#define STFPC_ZOPC  (unsigned  int)(178 << 24 | 156 << 16)
+#define LFPC_ZOPC   (unsigned  int)(178 << 24 | 157 << 16)
+
+
+// Branch Instructions
+
+// Register
+#define BCR_ZOPC    (unsigned  int)(7 << 8)
+#define BALR_ZOPC   (unsigned  int)(5 << 8)
+#define BASR_ZOPC   (unsigned  int)(13 << 8)
+#define BCTGR_ZOPC  (unsigned long)(0xb946 << 16)
+// Absolute
+#define BC_ZOPC     (unsigned  int)(71 << 24)
+#define BAL_ZOPC    (unsigned  int)(69 << 24)
+#define BAS_ZOPC    (unsigned  int)(77 << 24)
+#define BXH_ZOPC    (unsigned  int)(134 << 24)
+#define BXHG_ZOPC   (unsigned long)(235L << 40 | 68)
+// Relative
+#define BRC_ZOPC    (unsigned  int)(167 << 24 | 4 << 16)
+#define BRCL_ZOPC   (unsigned long)(192L << 40 | 4L << 32)
+#define BRAS_ZOPC   (unsigned  int)(167 << 24 | 5 << 16)
+#define BRASL_ZOPC  (unsigned long)(192L << 40 | 5L << 32)
+#define BRCT_ZOPC   (unsigned  int)(167 << 24 | 6 << 16)
+#define BRCTG_ZOPC  (unsigned  int)(167 << 24 | 7 << 16)
+#define BRXH_ZOPC   (unsigned  int)(132 << 24)
+#define BRXHG_ZOPC  (unsigned long)(236L << 40 | 68)
+#define BRXLE_ZOPC  (unsigned  int)(133 << 24)
+#define BRXLG_ZOPC  (unsigned long)(236L << 40 | 69)
+
+
+// Compare and Branch Instructions
+
+// signed comp reg/reg, branch Absolute
+#define CRB_ZOPC    (unsigned long)(0xecL << 40 | 0xf6L)         // z10
+#define CGRB_ZOPC   (unsigned long)(0xecL << 40 | 0xe4L)         // z10
+// signed comp reg/reg, branch Relative
+#define CRJ_ZOPC    (unsigned long)(0xecL << 40 | 0x76L)         // z10
+#define CGRJ_ZOPC   (unsigned long)(0xecL << 40 | 0x64L)         // z10
+// signed comp reg/imm, branch absolute
+#define CIB_ZOPC    (unsigned long)(0xecL << 40 | 0xfeL)         // z10
+#define CGIB_ZOPC   (unsigned long)(0xecL << 40 | 0xfcL)         // z10
+// signed comp reg/imm, branch relative
+#define CIJ_ZOPC    (unsigned long)(0xecL << 40 | 0x7eL)         // z10
+#define CGIJ_ZOPC   (unsigned long)(0xecL << 40 | 0x7cL)         // z10
+
+// unsigned comp reg/reg, branch Absolute
+#define CLRB_ZOPC   (unsigned long)(0xecL << 40 | 0xf7L)         // z10
+#define CLGRB_ZOPC  (unsigned long)(0xecL << 40 | 0xe5L)         // z10
+// unsigned comp reg/reg, branch Relative
+#define CLRJ_ZOPC   (unsigned long)(0xecL << 40 | 0x77L)         // z10
+#define CLGRJ_ZOPC  (unsigned long)(0xecL << 40 | 0x65L)         // z10
+// unsigned comp reg/imm, branch absolute
+#define CLIB_ZOPC   (unsigned long)(0xecL << 40 | 0xffL)         // z10
+#define CLGIB_ZOPC  (unsigned long)(0xecL << 40 | 0xfdL)         // z10
+// unsigned comp reg/imm, branch relative
+#define CLIJ_ZOPC   (unsigned long)(0xecL << 40 | 0x7fL)         // z10
+#define CLGIJ_ZOPC  (unsigned long)(0xecL << 40 | 0x7dL)         // z10
+
+// comp reg/reg, trap
+#define CRT_ZOPC    (unsigned  int)(0xb972 << 16)                // z10
+#define CGRT_ZOPC   (unsigned  int)(0xb960 << 16)                // z10
+#define CLRT_ZOPC   (unsigned  int)(0xb973 << 16)                // z10
+#define CLGRT_ZOPC  (unsigned  int)(0xb961 << 16)                // z10
+// comp reg/imm, trap
+#define CIT_ZOPC    (unsigned long)(0xecL << 40 | 0x72L)         // z10
+#define CGIT_ZOPC   (unsigned long)(0xecL << 40 | 0x70L)         // z10
+#define CLFIT_ZOPC  (unsigned long)(0xecL << 40 | 0x73L)         // z10
+#define CLGIT_ZOPC  (unsigned long)(0xecL << 40 | 0x71L)         // z10
+
+
+// Direct Memory Operations
+
+// Compare
+#define CLI_ZOPC    (unsigned  int)(0x95  << 24)
+#define CLIY_ZOPC   (unsigned long)(0xebL << 40 | 0x55L)
+#define CLC_ZOPC    (unsigned long)(0xd5L << 40)
+#define CLCL_ZOPC   (unsigned  int)(0x0f  <<  8)
+#define CLCLE_ZOPC  (unsigned  int)(0xa9  << 24)
+#define CLCLU_ZOPC  (unsigned long)(0xebL << 40 | 0x8fL)
+
+// Move
+#define MVI_ZOPC    (unsigned  int)(0x92  << 24)
+#define MVIY_ZOPC   (unsigned long)(0xebL << 40 | 0x52L)
+#define MVC_ZOPC    (unsigned long)(0xd2L << 40)
+#define MVCL_ZOPC   (unsigned  int)(0x0e  <<  8)
+#define MVCLE_ZOPC  (unsigned  int)(0xa8  << 24)
+
+// Test
+#define TM_ZOPC     (unsigned  int)(0x91  << 24)
+#define TMY_ZOPC    (unsigned long)(0xebL << 40 | 0x51L)
+
+// AND
+#define NI_ZOPC     (unsigned  int)(0x94  << 24)
+#define NIY_ZOPC    (unsigned long)(0xebL << 40 | 0x54L)
+#define NC_ZOPC     (unsigned long)(0xd4L << 40)
+
+// OR
+#define OI_ZOPC     (unsigned  int)(0x96  << 24)
+#define OIY_ZOPC    (unsigned long)(0xebL << 40 | 0x56L)
+#define OC_ZOPC     (unsigned long)(0xd6L << 40)
+
+// XOR
+#define XI_ZOPC     (unsigned  int)(0x97  << 24)
+#define XIY_ZOPC    (unsigned long)(0xebL << 40 | 0x57L)
+#define XC_ZOPC     (unsigned long)(0xd7L << 40)
+
+// Search String
+#define SRST_ZOPC   (unsigned  int)(178 << 24 | 94 << 16)
+#define SRSTU_ZOPC  (unsigned  int)(185 << 24 | 190 << 16)
+
+// Translate characters
+#define TROO_ZOPC   (unsigned  int)(0xb9 << 24 | 0x93 << 16)
+#define TROT_ZOPC   (unsigned  int)(0xb9 << 24 | 0x92 << 16)
+#define TRTO_ZOPC   (unsigned  int)(0xb9 << 24 | 0x91 << 16)
+#define TRTT_ZOPC   (unsigned  int)(0xb9 << 24 | 0x90 << 16)
+
+
+// Miscellaneous Operations
+
+// Execute
+#define EX_ZOPC     (unsigned  int)(68L << 24)
+#define EXRL_ZOPC   (unsigned long)(0xc6L << 40 | 0x00L << 32)  // z10
+
+// Compare and Swap
+#define CS_ZOPC     (unsigned  int)(0xba << 24)
+#define CSY_ZOPC    (unsigned long)(0xebL << 40 | 0x14L)
+#define CSG_ZOPC    (unsigned long)(0xebL << 40 | 0x30L)
+
+// Interlocked-Update
+#define LAA_ZOPC    (unsigned long)(0xebL << 40 | 0xf8L)         // z196
+#define LAAG_ZOPC   (unsigned long)(0xebL << 40 | 0xe8L)         // z196
+#define LAAL_ZOPC   (unsigned long)(0xebL << 40 | 0xfaL)         // z196
+#define LAALG_ZOPC  (unsigned long)(0xebL << 40 | 0xeaL)         // z196
+#define LAN_ZOPC    (unsigned long)(0xebL << 40 | 0xf4L)         // z196
+#define LANG_ZOPC   (unsigned long)(0xebL << 40 | 0xe4L)         // z196
+#define LAX_ZOPC    (unsigned long)(0xebL << 40 | 0xf7L)         // z196
+#define LAXG_ZOPC   (unsigned long)(0xebL << 40 | 0xe7L)         // z196
+#define LAO_ZOPC    (unsigned long)(0xebL << 40 | 0xf6L)         // z196
+#define LAOG_ZOPC   (unsigned long)(0xebL << 40 | 0xe6L)         // z196
+
+// System Functions
+#define STCK_ZOPC   (unsigned  int)(0xb2 << 24 | 0x05 << 16)
+#define STCKF_ZOPC  (unsigned  int)(0xb2 << 24 | 0x7c << 16)
+#define STFLE_ZOPC  (unsigned  int)(0xb2 << 24 | 0xb0 << 16)
+#define ECTG_ZOPC   (unsigned long)(0xc8L <<40 | 0x01L << 32)    // z10
+#define ECAG_ZOPC   (unsigned long)(0xebL <<40 | 0x4cL)          // z10
+
+// Execution Prediction
+#define PFD_ZOPC    (unsigned long)(0xe3L <<40 | 0x36L)          // z10
+#define PFDRL_ZOPC  (unsigned long)(0xc6L <<40 | 0x02L << 32)    // z10
+#define BPP_ZOPC    (unsigned long)(0xc7L <<40)                  // branch prediction preload  -- EC12
+#define BPRP_ZOPC   (unsigned long)(0xc5L <<40)                  // branch prediction preload  -- EC12
+
+// Transaction Control
+#define TBEGIN_ZOPC  (unsigned long)(0xe560L << 32)              // tx begin                   -- EC12
+#define TBEGINC_ZOPC (unsigned long)(0xe561L << 32)              // tx begin (constrained)     -- EC12
+#define TEND_ZOPC    (unsigned  int)(0xb2f8  << 16)              // tx end                     -- EC12
+#define TABORT_ZOPC  (unsigned  int)(0xb2fc  << 16)              // tx abort                   -- EC12
+#define ETND_ZOPC    (unsigned  int)(0xb2ec  << 16)              // tx nesting depth           -- EC12
+#define PPA_ZOPC     (unsigned  int)(0xb2e8  << 16)              // tx processor assist        -- EC12
+
+// Crypto and Checksum
+#define CKSM_ZOPC   (unsigned  int)(0xb2 << 24 | 0x41 << 16)     // checksum. This is NOT CRC32
+#define KM_ZOPC     (unsigned  int)(0xb9 << 24 | 0x2e << 16)     // cipher
+#define KMC_ZOPC    (unsigned  int)(0xb9 << 24 | 0x2f << 16)     // cipher
+#define KIMD_ZOPC   (unsigned  int)(0xb9 << 24 | 0x3e << 16)     // SHA (msg digest)
+#define KLMD_ZOPC   (unsigned  int)(0xb9 << 24 | 0x3f << 16)     // SHA (msg digest)
+#define KMAC_ZOPC   (unsigned  int)(0xb9 << 24 | 0x1e << 16)     // Message Authentication Code
+
+// Various
+#define TCEB_ZOPC   (unsigned long)(237L << 40 | 16)
+#define TCDB_ZOPC   (unsigned long)(237L << 40 | 17)
+#define TAM_ZOPC    (unsigned long)(267)
+
+#define FLOGR_ZOPC  (unsigned  int)(0xb9 << 24 | 0x83 << 16)
+#define POPCNT_ZOPC (unsigned  int)(0xb9e1 << 16)
+#define AHHHR_ZOPC  (unsigned  int)(0xb9c8 << 16)
+#define AHHLR_ZOPC  (unsigned  int)(0xb9d8 << 16)
+
+
+// OpCode field masks
+
+#define RI_MASK     (unsigned  int)(0xff  << 24 | 0x0f << 16)
+#define RRE_MASK    (unsigned  int)(0xff  << 24 | 0xff << 16)
+#define RSI_MASK    (unsigned  int)(0xff  << 24)
+#define RIE_MASK    (unsigned long)(0xffL << 40 | 0xffL)
+#define RIL_MASK    (unsigned long)(0xffL << 40 | 0x0fL << 32)
+
+#define BASR_MASK   (unsigned  int)(0xff << 8)
+#define BCR_MASK    (unsigned  int)(0xff << 8)
+#define BRC_MASK    (unsigned  int)(0xff << 24 | 0x0f << 16)
+#define LGHI_MASK   (unsigned  int)(0xff << 24 | 0x0f << 16)
+#define LLI_MASK    (unsigned  int)(0xff << 24 | 0x0f << 16)
+#define II_MASK     (unsigned  int)(0xff << 24 | 0x0f << 16)
+#define LLIF_MASK   (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define IIF_MASK    (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define BRASL_MASK  (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define TM_MASK     (unsigned  int)(0xff << 24)
+#define TMY_MASK    (unsigned long)(0xffL << 40 | 0xffL)
+#define LB_MASK     (unsigned long)(0xffL << 40 | 0xffL)
+#define LH_MASK     (unsigned int)(0xff << 24)
+#define L_MASK      (unsigned int)(0xff << 24)
+#define LY_MASK     (unsigned long)(0xffL << 40 | 0xffL)
+#define LG_MASK     (unsigned long)(0xffL << 40 | 0xffL)
+#define LLGH_MASK   (unsigned long)(0xffL << 40 | 0xffL)
+#define LLGF_MASK   (unsigned long)(0xffL << 40 | 0xffL)
+#define SLAG_MASK   (unsigned long)(0xffL << 40 | 0xffL)
+#define LARL_MASK   (unsigned long)(0xff0fL << 32)
+#define LGRL_MASK   (unsigned long)(0xff0fL << 32)
+#define LE_MASK     (unsigned int)(0xff << 24)
+#define LD_MASK     (unsigned int)(0xff << 24)
+#define ST_MASK     (unsigned int)(0xff << 24)
+#define STC_MASK    (unsigned int)(0xff << 24)
+#define STG_MASK    (unsigned long)(0xffL << 40 | 0xffL)
+#define STH_MASK    (unsigned int)(0xff << 24)
+#define STE_MASK    (unsigned int)(0xff << 24)
+#define STD_MASK    (unsigned int)(0xff << 24)
+#define CMPBRANCH_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define REL_LONG_MASK  (unsigned long)(0xff0fL << 32)
+
+ public:
+  // Condition code masks. Details:
+  // - Mask bit#3 must be zero for all compare and branch/trap instructions to ensure
+  //   future compatibility.
+  // - For all arithmetic instructions which set the condition code, mask bit#3
+  //   indicates overflow ("unordered" in float operations).
+  // - "unordered" float comparison results have to be treated as low.
+  // - When overflow/unordered is detected, none of the branch conditions is true,
+  //   except for bcondOverflow/bcondNotOrdered and bcondAlways.
+  // - For INT comparisons, the inverse condition can be calculated as (14-cond).
+  // - For FLOAT comparisons, the inverse condition can be calculated as (15-cond).
+  enum branch_condition {
+    bcondNever       =  0,
+    bcondAlways      = 15,
+
+    // Specific names. Make use of lightweight sync.
+    // Full and lightweight sync operation.
+    bcondFullSync    = 15,
+    bcondLightSync   = 14,
+    bcondNop         =  0,
+
+    // arithmetic compare instructions
+    // arithmetic load and test, insert instructions
+    // Mask bit#3 must be zero for future compatibility.
+    bcondEqual       =  8,
+    bcondNotEqual    =  6,
+    bcondLow         =  4,
+    bcondNotLow      = 10,
+    bcondHigh        =  2,
+    bcondNotHigh     = 12,
+    // arithmetic calculation instructions
+    // Mask bit#3 indicates overflow if detected by instr.
+    // Mask bit#3 = 0 (overflow is not handled by compiler).
+    bcondOverflow    =  1,
+    bcondNotOverflow = 14,
+    bcondZero        =  bcondEqual,
+    bcondNotZero     =  bcondNotEqual,
+    bcondNegative    =  bcondLow,
+    bcondNotNegative =  bcondNotLow,
+    bcondPositive    =  bcondHigh,
+    bcondNotPositive =  bcondNotHigh,
+    bcondNotOrdered  =  1,  // float comparisons
+    bcondOrdered     = 14,  // float comparisons
+    bcondLowOrNotOrdered  =  bcondLow|bcondNotOrdered,  // float comparisons
+    bcondHighOrNotOrdered =  bcondHigh|bcondNotOrdered, // float comparisons
+    // unsigned arithmetic calculation instructions
+    // Mask bit#0 is not used by these instructions.
+    // There is no indication of overflow for these instr.
+    bcondLogZero             =  2,
+    bcondLogNotZero          =  5,
+    bcondLogNotZero_Borrow   =  4,
+    bcondLogNotZero_NoBorrow =  1,
+    // string search instructions
+    bcondFound       =  4,
+    bcondNotFound    =  2,
+    bcondInterrupted =  1,
+    // bit test instructions
+    bcondAllZero     =  8,
+    bcondMixed       =  6,
+    bcondAllOne      =  1,
+    bcondNotAllZero  =  7 // for tmll
+  };
+
+  enum Condition {
+    // z/Architecture
+    negative         = 0,
+    less             = 0,
+    positive         = 1,
+    greater          = 1,
+    zero             = 2,
+    equal            = 2,
+    summary_overflow = 3,
+  };
+
+  // Rounding mode for float-2-int conversions.
+  enum RoundingMode {
+    current_mode      = 0,   // Mode taken from FPC register.
+    biased_to_nearest = 1,
+    to_nearest        = 4,
+    to_zero           = 5,
+    to_plus_infinity  = 6,
+    to_minus_infinity = 7
+  };
+
+  // Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
+  static branch_condition inverse_condition(branch_condition cc);
+  static branch_condition inverse_float_condition(branch_condition cc);
+
+
+  //-----------------------------------------------
+  // instruction property getter methods
+  //-----------------------------------------------
+
+  // Calculate length of instruction.
+  static int instr_len(unsigned char *instr);
+
+  // Longest instructions are 6 bytes on z/Architecture.
+  static int instr_maxlen() { return 6; }
+
+  // Average instruction is 4 bytes on z/Architecture (just a guess).
+  static int instr_avglen() { return 4; }
+
+  // Shortest instructions are 2 bytes on z/Architecture.
+  static int instr_minlen() { return 2; }
+
+  // Move instruction at pc right-justified into passed long int.
+  // Return instr len in bytes as function result.
+  static unsigned int get_instruction(unsigned char *pc, unsigned long *instr);
+
+  // Move instruction in passed (long int) into storage at pc.
+  // This code is _NOT_ MT-safe!!
+  static void set_instruction(unsigned char *pc, unsigned long instr, unsigned int len) {
+    memcpy(pc, ((unsigned char *)&instr)+sizeof(unsigned long)-len, len);
+  }
+
+
+  //------------------------------------------
+  // instruction field test methods
+  //------------------------------------------
+
+  // Only used once in s390.ad to implement Matcher::is_short_branch_offset().
+  static bool is_within_range_of_RelAddr16(address target, address origin) {
+    return RelAddr::is_in_range_of_RelAddr16(target, origin);
+  }
+
+
+  //----------------------------------
+  // some diagnostic output
+  //----------------------------------
+
+  static void print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) PRODUCT_RETURN;
+  static void dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg = " ") PRODUCT_RETURN;
+
+ protected:
+
+  //-------------------------------------------------------
+  // instruction field helper methods (internal)
+  //-------------------------------------------------------
+
+  // Return a mask of 1s between hi_bit and lo_bit (inclusive).
+  static long fmask(unsigned int hi_bit, unsigned int lo_bit) {
+    assert(hi_bit >= lo_bit && hi_bit < 48, "bad bits");
+    return ((1L<<(hi_bit-lo_bit+1)) - 1) << lo_bit;
+  }
+
+  // extract u_field
+  // unsigned value
+  static long inv_u_field(long x, int hi_bit, int lo_bit) {
+    return (x & fmask(hi_bit, lo_bit)) >> lo_bit;
+  }
+
+  // extract s_field
+  // Signed value, may need sign extension.
+  static long inv_s_field(long x, int hi_bit, int lo_bit) {
+    x = inv_u_field(x, hi_bit, lo_bit);
+    // Highest extracted bit set -> sign extension.
+    return (x >= (1L<<(hi_bit-lo_bit)) ? x | ((-1L)<<(hi_bit-lo_bit)) : x);
+  }
+
+  // Extract primary opcode from instruction.
+  static int z_inv_op(int  x) { return inv_u_field(x, 31, 24); }
+  static int z_inv_op(long x) { return inv_u_field(x, 47, 40); }
+
+  static int inv_reg( long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-4); }  // Regs are encoded in 4 bits.
+  static int inv_mask(long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-8); }  // Mask is 8 bits long.
+  static int inv_simm16_48(long x) { return (inv_s_field(x, 31, 16)); }                         // 6-byte instructions only
+  static int inv_simm16(long x)    { return (inv_s_field(x, 15,  0)); }                         // 4-byte instructions only
+  static int inv_simm20(long x)    { return (inv_u_field(x, 27, 16) |                           // 6-byte instructions only
+                                             inv_s_field(x, 15, 8)<<12); }
+  static int inv_simm32(long x)    { return (inv_s_field(x, 31,  0)); }                         // 6-byte instructions only
+  static int inv_uimm12(long x)    { return (inv_u_field(x, 11,  0)); }                         // 4-byte instructions only
+
+  // Encode u_field from long value.
+  static long u_field(long x, int hi_bit, int lo_bit) {
+    long r = x << lo_bit;
+    assert((r & ~fmask(hi_bit, lo_bit))   == 0, "value out of range");
+    assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking");
+    return r;
+  }
+
+ public:
+
+  //--------------------------------------------------
+  // instruction field construction methods
+  //--------------------------------------------------
+
+  // Compute relative address (32 bit) for branch.
+  // Only used once in nativeInst_s390.cpp.
+  static intptr_t z_pcrel_off(address dest, address pc) {
+    return RelAddr::pcrel_off32(dest, pc);
+  }
+
+  // Extract 20-bit signed displacement.
+  // Only used in disassembler_s390.cpp for temp enhancements.
+  static int inv_simm20_xx(address iLoc) {
+    unsigned long instr = 0;
+    unsigned long iLen  = get_instruction(iLoc, &instr);
+    return inv_simm20(instr);
+  }
+
+  // unsigned immediate, in low bits, nbits long
+  static long uimm(long x, int nbits) {
+    assert(Immediate::is_uimm(x, nbits), "unsigned constant out of range");
+    return x & fmask(nbits - 1, 0);
+  }
+
+  // Cast '1' to long to avoid sign extension if nbits = 32.
+  // signed immediate, in low bits, nbits long
+  static long simm(long x, int nbits) {
+    assert(Immediate::is_simm(x, nbits), "value out of range");
+    return x & fmask(nbits - 1, 0);
+  }
+
+  static long imm(int64_t x, int nbits) {
+    // Assert that x can be represented with nbits bits ignoring the sign bits,
+    // i.e. the more higher bits should all be 0 or 1.
+    assert((x >> nbits) == 0 || (x >> nbits) == -1, "value out of range");
+    return x & fmask(nbits-1, 0);
+  }
+
+  // A 20-bit displacement is only in instructions of the
+  // RSY, RXY, or SIY format. In these instructions, the D
+  // field consists of a DL (low) field in bit positions 20-31
+  // and of a DH (high) field in bit positions 32-39. The
+  // value of the displacement is formed by appending the
+  // contents of the DH field to the left of the contents of
+  // the DL field.
+  static long simm20(int64_t ui20) {
+    assert(Immediate::is_simm(ui20, 20), "value out of range");
+    return ( ((ui20        & 0xfffL) << (48-32)) |  // DL
+            (((ui20 >> 12) &  0xffL) << (48-40)));  // DH
+  }
+
+  static long reg(Register r, int s, int len)  { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
+  static long reg(int r, int s, int len)       { return u_field(r,             (len-s)-1, (len-s)-4); }
+  static long regt(Register r, int s, int len) { return reg(r, s, len); }
+  static long regz(Register r, int s, int len) { assert(r != Z_R0, "cannot use register R0 in memory access"); return reg(r, s, len); }
+
+  static long uimm4( int64_t ui4,  int s, int len) { return uimm(ui4,   4) << (len-s-4);  }
+  static long uimm6( int64_t ui6,  int s, int len) { return uimm(ui6,   6) << (len-s-6);  }
+  static long uimm8( int64_t ui8,  int s, int len) { return uimm(ui8,   8) << (len-s-8);  }
+  static long uimm12(int64_t ui12, int s, int len) { return uimm(ui12, 12) << (len-s-12); }
+  static long uimm16(int64_t ui16, int s, int len) { return uimm(ui16, 16) << (len-s-16); }
+  static long uimm32(int64_t ui32, int s, int len) { return uimm((unsigned)ui32, 32) << (len-s-32); } // prevent sign extension
+
+  static long simm8( int64_t si8,  int s, int len) { return simm(si8,   8) << (len-s-8);  }
+  static long simm12(int64_t si12, int s, int len) { return simm(si12, 12) << (len-s-12); }
+  static long simm16(int64_t si16, int s, int len) { return simm(si16, 16) << (len-s-16); }
+  static long simm24(int64_t si24, int s, int len) { return simm(si24, 24) << (len-s-24); }
+  static long simm32(int64_t si32, int s, int len) { return simm(si32, 32) << (len-s-32); }
+
+  static long imm8( int64_t i8,  int s, int len)   { return imm(i8,   8) << (len-s-8);  }
+  static long imm12(int64_t i12, int s, int len)   { return imm(i12, 12) << (len-s-12); }
+  static long imm16(int64_t i16, int s, int len)   { return imm(i16, 16) << (len-s-16); }
+  static long imm24(int64_t i24, int s, int len)   { return imm(i24, 24) << (len-s-24); }
+  static long imm32(int64_t i32, int s, int len)   { return imm(i32, 32) << (len-s-32); }
+
+  static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
+  static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
+
+  // Rounding mode for float-2-int conversions.
+  static long rounding_mode(RoundingMode m, int s, int len) {
+    assert(m != 2 && m != 3, "invalid mode");
+    return uimm(m, 4) << (len-s-4);
+  }
+
+  //--------------------------------------------
+  // instruction field getter methods
+  //--------------------------------------------
+
+  static int get_imm32(address a, int instruction_number) {
+    int imm;
+    int *p =((int *)(a + 2 + 6 * instruction_number));
+    imm = *p;
+    return imm;
+  }
+
+  static short get_imm16(address a, int instruction_number) {
+    short imm;
+    short *p =((short *)a) + 2 * instruction_number + 1;
+    imm = *p;
+    return imm;
+  }
+
+
+  //--------------------------------------------
+  // instruction field setter methods
+  //--------------------------------------------
+
+  static void set_imm32(address a, int64_t s) {
+    assert(Immediate::is_simm32(s) || Immediate::is_uimm32(s), "to big");
+    int* p = (int *) (a + 2);
+    *p = s;
+  }
+
+  static void set_imm16(int* instr, int64_t s) {
+    assert(Immediate::is_simm16(s) || Immediate::is_uimm16(s), "to big");
+    short* p = ((short *)instr) + 1;
+    *p = s;
+  }
+
+ public:
+
+  static unsigned int align(unsigned int x, unsigned int a) { return ((x + (a - 1)) & ~(a - 1)); }
+  static bool    is_aligned(unsigned int x, unsigned int a) { return (0 == x % a); }
+
+  inline void emit_16(int x);
+  inline void emit_32(int x);
+  inline void emit_48(long x);
+
+  // Compare and control flow instructions
+  // =====================================
+
+  // See also commodity routines compare64_and_branch(), compare32_and_branch().
+
+  // compare instructions
+  // compare register
+  inline void z_cr(  Register r1, Register r2);                          // compare (r1, r2)        ; int32
+  inline void z_cgr( Register r1, Register r2);                          // compare (r1, r2)        ; int64
+  inline void z_cgfr(Register r1, Register r2);                          // compare (r1, r2)        ; int64 <--> int32
+   // compare immediate
+  inline void z_chi( Register r1, int64_t i2);                           // compare (r1, i2_imm16)  ; int32
+  inline void z_cfi( Register r1, int64_t i2);                           // compare (r1, i2_imm32)  ; int32
+  inline void z_cghi(Register r1, int64_t i2);                           // compare (r1, i2_imm16)  ; int64
+  inline void z_cgfi(Register r1, int64_t i2);                           // compare (r1, i2_imm32)  ; int64
+   // compare memory
+  inline void z_ch(  Register r1, const Address &a);                     // compare (r1, *(a))               ; int32 <--> int16
+  inline void z_ch(  Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32 <--> int16
+  inline void z_c(   Register r1, const Address &a);                     // compare (r1, *(a))               ; int32
+  inline void z_c(   Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32
+  inline void z_cy(  Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; int32
+  inline void z_cy(  Register r1, int64_t d2, Register b2);              // compare (r1, *(d2_uimm20+x2+b2)) ; int32
+  inline void z_cy(  Register r1, const Address& a);                     // compare (r1, *(a))               ; int32
+   //inline void z_cgf(Register r1,const Address &a);                    // compare (r1, *(a))               ; int64 <--> int32
+   //inline void z_cgf(Register r1,int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2)) ; int64 <--> int32
+  inline void z_cg(  Register r1, const Address &a);                     // compare (r1, *(a))               ; int64
+  inline void z_cg(  Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm20+x2+b2))  ; int64
+
+   // compare logical instructions
+   // compare register
+  inline void z_clr(  Register r1, Register r2);                         // compare (r1, r2)                 ; uint32
+  inline void z_clgr( Register r1, Register r2);                         // compare (r1, r2)                 ; uint64
+   // compare immediate
+  inline void z_clfi( Register r1, int64_t i2);                          // compare (r1, i2_uimm32)          ; uint32
+  inline void z_clgfi(Register r1, int64_t i2);                          // compare (r1, i2_uimm32)          ; uint64
+  inline void z_cl(   Register r1, const Address &a);                    // compare (r1, *(a)                ; uint32
+  inline void z_cl(   Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2)  ; uint32
+  inline void z_cly(  Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm20+x2+b2)) ; uint32
+  inline void z_cly(  Register r1, int64_t d2, Register b2);             // compare (r1, *(d2_uimm20+x2+b2)) ; uint32
+  inline void z_cly(  Register r1, const Address& a);                    // compare (r1, *(a))               ; uint32
+  inline void z_clg(  Register r1, const Address &a);                    // compare (r1, *(a)                ; uint64
+  inline void z_clg(  Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_imm20+x2+b2)   ; uint64
+
+  // test under mask
+  inline void z_tmll(Register r1, int64_t i2);           // test under mask, see docu
+  inline void z_tmlh(Register r1, int64_t i2);           // test under mask, see docu
+  inline void z_tmhl(Register r1, int64_t i2);           // test under mask, see docu
+  inline void z_tmhh(Register r1, int64_t i2);           // test under mask, see docu
+
+  // branch instructions
+  inline void z_bc(  branch_condition m1, int64_t d2, Register x2, Register b2);// branch  m1 ? pc = (d2_uimm12+x2+b2)
+  inline void z_bcr( branch_condition m1, Register r2);                         // branch (m1 && r2!=R0) ? pc = r2
+  inline void z_brc( branch_condition i1, int64_t i2);                          // branch  i1 ? pc = pc + i2_imm16
+  inline void z_brc( branch_condition i1, address a);                           // branch  i1 ? pc = a
+  inline void z_brc( branch_condition i1, Label& L);                            // branch  i1 ? pc = Label
+  //inline void z_brcl(branch_condition i1, int64_t i2);                        // branch  i1 ? pc = pc + i2_imm32
+  inline void z_brcl(branch_condition i1, address a);                           // branch  i1 ? pc = a
+  inline void z_brcl(branch_condition i1, Label& L);                            // branch  i1 ? pc = Label
+  inline void z_bctgr(Register r1, Register r2);         // branch on count r1 -= 1; (r1!=0) ? pc = r2  ; r1 is int64
+
+  // branch unconditional / always
+  inline void z_br(Register r2);                         // branch to r2, nop if r2 == Z_R0
+
+
+  // See also commodity routines compare64_and_branch(), compare32_and_branch().
+  // signed comparison and branch
+  inline void z_crb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4      ; int32  -- z10
+  inline void z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4      ; int64  -- z10
+  inline void z_crj( Register r1, Register r2, branch_condition m3, Label& L);                // (r1 m3 r2) ? goto L          ; int32  -- z10
+  inline void z_crj( Register r1, Register r2, branch_condition m3, address a4);              // (r1 m3 r2) ? goto (pc+a4<<1) ; int32  -- z10
+  inline void z_cgrj(Register r1, Register r2, branch_condition m3, Label& L);                // (r1 m3 r2) ? goto L          ; int64  -- z10
+  inline void z_cgrj(Register r1, Register r2, branch_condition m3, address a4);              // (r1 m3 r2) ? goto (pc+a4<<1) ; int64  -- z10
+  inline void z_cib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4);  // (r1 m3 i2_imm8) ? goto b4+d4      ; int32  -- z10
+  inline void z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4);  // (r1 m3 i2_imm8) ? goto b4+d4      ; int64  -- z10
+  inline void z_cij( Register r1, int64_t i2, branch_condition m3, Label& L);                 // (r1 m3 i2_imm8) ? goto L          ; int32  -- z10
+  inline void z_cij( Register r1, int64_t i2, branch_condition m3, address a4);               // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int32  -- z10
+  inline void z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L);                 // (r1 m3 i2_imm8) ? goto L          ; int64  -- z10
+  inline void z_cgij(Register r1, int64_t i2, branch_condition m3, address a4);               // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int64  -- z10
+  // unsigned comparison and branch
+  inline void z_clrb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4      ; uint32  -- z10
+  inline void z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4      ; uint64  -- z10
+  inline void z_clrj( Register r1, Register r2, branch_condition m3, Label& L);               // (r1 m3 r2) ? goto L          ; uint32  -- z10
+  inline void z_clrj( Register r1, Register r2, branch_condition m3, address a4);             // (r1 m3 r2) ? goto (pc+a4<<1) ; uint32  -- z10
+  inline void z_clgrj(Register r1, Register r2, branch_condition m3, Label& L);               // (r1 m3 r2) ? goto L          ; uint64  -- z10
+  inline void z_clgrj(Register r1, Register r2, branch_condition m3, address a4);             // (r1 m3 r2) ? goto (pc+a4<<1) ; uint64  -- z10
+  inline void z_clib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4      ; uint32  -- z10
+  inline void z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4      ; uint64  -- z10
+  inline void z_clij( Register r1, int64_t i2, branch_condition m3, Label& L);                // (r1 m3 i2_uimm8) ? goto L          ; uint32  -- z10
+  inline void z_clij( Register r1, int64_t i2, branch_condition m3, address a4);              // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint32  -- z10
+  inline void z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L);                // (r1 m3 i2_uimm8) ? goto L          ; uint64  -- z10
+  inline void z_clgij(Register r1, int64_t i2, branch_condition m3, address a4);              // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint64  -- z10
+
+  // Compare and trap instructions.
+  // signed comparison
+  inline void z_crt(Register r1,  Register r2, int64_t m3);  // (r1 m3 r2)        ? trap ; int32  -- z10
+  inline void z_cgrt(Register r1, Register r2, int64_t m3);  // (r1 m3 r2)        ? trap ; int64  -- z10
+  inline void z_cit(Register r1,  int64_t i2, int64_t m3);   // (r1 m3 i2_imm16)  ? trap ; int32  -- z10
+  inline void z_cgit(Register r1, int64_t i2, int64_t m3);   // (r1 m3 i2_imm16)  ? trap ; int64  -- z10
+  // unsigned comparison
+  inline void z_clrt(Register r1,  Register r2, int64_t m3); // (r1 m3 r2)        ? trap ; uint32 -- z10
+  inline void z_clgrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2)        ? trap ; uint64 -- z10
+  inline void z_clfit(Register r1,  int64_t i2, int64_t m3); // (r1 m3 i2_uimm16) ? trap ; uint32 -- z10
+  inline void z_clgit(Register r1, int64_t i2, int64_t m3);  // (r1 m3 i2_uimm16) ? trap ; uint64 -- z10
+
+  inline void z_illtrap();
+  inline void z_illtrap(int id);
+  inline void z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern);
+
+
+  // load address, add for addresses
+  // ===============================
+
+  // The versions without suffix z assert that the base reg is != Z_R0.
+  // Z_R0 is interpreted as constant '0'. The variants with Address operand
+  // check this automatically, so no two versions are needed.
+  inline void z_layz(Register r1, int64_t d2, Register x2, Register b2); // Special version. Allows Z_R0 as base reg.
+  inline void z_lay(Register r1, const Address &a);                      // r1 = a
+  inline void z_lay(Register r1, int64_t d2, Register x2, Register b2);  // r1 = d2_imm20+x2+b2
+  inline void z_laz(Register r1, int64_t d2, Register x2, Register b2);  // Special version. Allows Z_R0 as base reg.
+  inline void z_la(Register r1, const Address &a);                       // r1 = a                ; unsigned immediate!
+  inline void z_la(Register r1, int64_t d2, Register x2, Register b2);   // r1 = d2_uimm12+x2+b2  ; unsigned immediate!
+  inline void z_larl(Register r1, int64_t i2);                           // r1 = pc + i2_imm32<<1;
+  inline void z_larl(Register r1, address a2);                           // r1 = pc + i2_imm32<<1;
+
+  // Load instructions for integers
+  // ==============================
+
+  // Address as base + index + offset
+  inline void z_lb( Register r1, const Address &a);                     // load r1 = *(a)              ; int32 <- int8
+  inline void z_lb( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int8
+  inline void z_lh( Register r1, const Address &a);                     // load r1 = *(a)              ; int32 <- int16
+  inline void z_lh( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32 <- int16
+  inline void z_lhy(Register r1, const Address &a);                     // load r1 = *(a)              ; int32 <- int16
+  inline void z_lhy(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int16
+  inline void z_l(  Register r1, const Address& a);                     // load r1 = *(a)              ; int32
+  inline void z_l(  Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32
+  inline void z_ly( Register r1, const Address& a);                     // load r1 = *(a)              ; int32
+  inline void z_ly( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32
+
+  inline void z_lgb(Register r1, const Address &a);                     // load r1 = *(a)              ; int64 <- int8
+  inline void z_lgb(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int8
+  inline void z_lgh(Register r1, const Address &a);                     // load r1 = *(a)              ; int64 <- int16
+  inline void z_lgh(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm12+x2+b2) ; int64 <- int16
+  inline void z_lgf(Register r1, const Address &a);                     // load r1 = *(a)              ; int64 <- int32
+  inline void z_lgf(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int32
+  inline void z_lg( Register r1, const Address& a);                     // load r1 = *(a)              ; int64 <- int64
+  inline void z_lg( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int64
+
+  // load and test
+  inline void z_lt(  Register r1, const Address &a);                    // load and test r1 = *(a)              ; int32
+  inline void z_lt(  Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int32
+  inline void z_ltg( Register r1, const Address &a);                    // load and test r1 = *(a)              ; int64
+  inline void z_ltg( Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64
+  inline void z_ltgf(Register r1, const Address &a);                    // load and test r1 = *(a)              ; int64 <- int32
+  inline void z_ltgf(Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64 <- int32
+
+  // load unsigned integer - zero extended
+  inline void z_llc( Register r1, const Address& a);                    // load r1 = *(a)              ; uint32 <- uint8
+  inline void z_llc( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint8
+  inline void z_llh( Register r1, const Address& a);                    // load r1 = *(a)              ; uint32 <- uint16
+  inline void z_llh( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint16
+  inline void z_llgc(Register r1, const Address& a);                    // load r1 = *(a)              ; uint64 <- uint8
+  inline void z_llgc(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint8
+  inline void z_llgc( Register r1, int64_t d2, Register b2);            // load r1 = *(d2_imm20+b2)    ; uint64 <- uint8
+  inline void z_llgh(Register r1, const Address& a);                    // load r1 = *(a)              ; uint64 <- uint16
+  inline void z_llgh(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint16
+  inline void z_llgf(Register r1, const Address& a);                    // load r1 = *(a)              ; uint64 <- uint32
+  inline void z_llgf(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint32
+
+  // pc relative addressing
+  inline void z_lhrl( Register r1, int64_t i2);   // load r1 = *(pc + i2_imm32<<1) ; int32 <- int16    -- z10
+  inline void z_lrl(  Register r1, int64_t i2);   // load r1 = *(pc + i2_imm32<<1) ; int32             -- z10
+  inline void z_lghrl(Register r1, int64_t i2);   // load r1 = *(pc + i2_imm32<<1) ; int64 <- int16    -- z10
+  inline void z_lgfrl(Register r1, int64_t i2);   // load r1 = *(pc + i2_imm32<<1) ; int64 <- int32    -- z10
+  inline void z_lgrl( Register r1, int64_t i2);   // load r1 = *(pc + i2_imm32<<1) ; int64             -- z10
+
+  inline void z_llhrl( Register r1, int64_t i2);  // load r1 = *(pc + i2_imm32<<1) ; uint32 <- uint16  -- z10
+  inline void z_llghrl(Register r1, int64_t i2);  // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint16  -- z10
+  inline void z_llgfrl(Register r1, int64_t i2);  // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint32  -- z10
+
+  // Store instructions for integers
+  // ===============================
+
+  // Address as base + index + offset
+  inline void z_stc( Register r1, const Address &d);                     // store *(a)               = r1  ; int8
+  inline void z_stc( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1  ; int8
+  inline void z_stcy(Register r1, const Address &d);                     // store *(a)               = r1  ; int8
+  inline void z_stcy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2)  = r1  ; int8
+  inline void z_sth( Register r1, const Address &d);                     // store *(a)               = r1  ; int16
+  inline void z_sth( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1  ; int16
+  inline void z_sthy(Register r1, const Address &d);                     // store *(a)               = r1  ; int16
+  inline void z_sthy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2)  = r1  ; int16
+  inline void z_st(  Register r1, const Address &d);                     // store *(a)               = r1  ; int32
+  inline void z_st(  Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1  ; int32
+  inline void z_sty( Register r1, const Address &d);                     // store *(a)               = r1  ; int32
+  inline void z_sty( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2)  = r1  ; int32
+  inline void z_stg( Register r1, const Address &d);                     // store *(a)               = r1  ; int64
+  inline void z_stg( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1  ; int64
+
+  inline void z_stcm( Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+  inline void z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+  inline void z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+
+  // pc relative addressing
+  inline void z_sthrl(Register r1, int64_t i2);   // store *(pc + i2_imm32<<1) = r1 ; int16  -- z10
+  inline void z_strl( Register r1, int64_t i2);   // store *(pc + i2_imm32<<1) = r1 ; int32  -- z10
+  inline void z_stgrl(Register r1, int64_t i2);   // store *(pc + i2_imm32<<1) = r1 ; int64  -- z10
+
+
+  // Load and store immediates
+  // =========================
+
+  // load immediate
+  inline void z_lhi( Register r1, int64_t i2);                  // r1 = i2_imm16    ; int32 <- int16
+  inline void z_lghi(Register r1, int64_t i2);                  // r1 = i2_imm16    ; int64 <- int16
+  inline void z_lgfi(Register r1, int64_t i2);                  // r1 = i2_imm32    ; int64 <- int32
+
+  inline void z_llihf(Register r1, int64_t i2);                 // r1 = i2_imm32    ; uint64 <- (uint32<<32)
+  inline void z_llilf(Register r1, int64_t i2);                 // r1 = i2_imm32    ; uint64 <- uint32
+  inline void z_llihh(Register r1, int64_t i2);                 // r1 = i2_imm16    ; uint64 <- (uint16<<48)
+  inline void z_llihl(Register r1, int64_t i2);                 // r1 = i2_imm16    ; uint64 <- (uint16<<32)
+  inline void z_llilh(Register r1, int64_t i2);                 // r1 = i2_imm16    ; uint64 <- (uint16<<16)
+  inline void z_llill(Register r1, int64_t i2);                 // r1 = i2_imm16    ; uint64 <- uint16
+
+  // insert immediate
+  inline void z_ic(  Register r1, int64_t d2, Register x2, Register b2); // insert character
+  inline void z_icy( Register r1, int64_t d2, Register x2, Register b2); // insert character
+  inline void z_icm( Register r1, int64_t m3, int64_t d2, Register b2);  // insert character under mask
+  inline void z_icmy(Register r1, int64_t m3, int64_t d2, Register b2);  // insert character under mask
+  inline void z_icmh(Register r1, int64_t m3, int64_t d2, Register b2);  // insert character under mask
+
+  inline void z_iihh(Register r1, int64_t i2);                  // insert immediate  r1[ 0-15] = i2_imm16
+  inline void z_iihl(Register r1, int64_t i2);                  // insert immediate  r1[16-31] = i2_imm16
+  inline void z_iilh(Register r1, int64_t i2);                  // insert immediate  r1[32-47] = i2_imm16
+  inline void z_iill(Register r1, int64_t i2);                  // insert immediate  r1[48-63] = i2_imm16
+  inline void z_iihf(Register r1, int64_t i2);                  // insert immediate  r1[32-63] = i2_imm32
+  inline void z_iilf(Register r1, int64_t i2);                  // insert immediate  r1[ 0-31] = i2_imm32
+
+  // store immediate
+  inline void z_mvhhi(const Address &d, int64_t i2);            // store *(d)           = i2_imm16 ; int16
+  inline void z_mvhhi(int64_t d1, Register b1, int64_t i2);     // store *(d1_imm12+b1) = i2_imm16 ; int16
+  inline void z_mvhi( const Address &d, int64_t i2);            // store *(d)           = i2_imm16 ; int32
+  inline void z_mvhi( int64_t d1, Register b1, int64_t i2);     // store *(d1_imm12+b1) = i2_imm16 ; int32
+  inline void z_mvghi(const Address &d, int64_t i2);            // store *(d)           = i2_imm16 ; int64
+  inline void z_mvghi(int64_t d1, Register b1, int64_t i2);     // store *(d1_imm12+b1) = i2_imm16 ; int64
+
+  // Move and Convert instructions
+  // =============================
+
+  // move, sign extend
+  inline void z_lbr(Register r1, Register r2);             // move r1 = r2 ; int32  <- int8
+  inline void z_lhr( Register r1, Register r2);            // move r1 = r2 ; int32  <- int16
+  inline void z_lr(Register r1, Register r2);              // move r1 = r2 ; int32, no sign extension
+  inline void z_lgbr(Register r1, Register r2);            // move r1 = r2 ; int64  <- int8
+  inline void z_lghr(Register r1, Register r2);            // move r1 = r2 ; int64  <- int16
+  inline void z_lgfr(Register r1, Register r2);            // move r1 = r2 ; int64  <- int32
+  inline void z_lgr(Register r1, Register r2);             // move r1 = r2 ; int64
+  // move, zero extend
+  inline void z_llhr( Register r1, Register r2);           // move r1 = r2 ; uint32 <- uint16
+  inline void z_llgcr(Register r1, Register r2);           // move r1 = r2 ; uint64 <- uint8
+  inline void z_llghr(Register r1, Register r2);           // move r1 = r2 ; uint64 <- uint16
+  inline void z_llgfr(Register r1, Register r2);           // move r1 = r2 ; uint64 <- uint32
+
+  // move and test register
+  inline void z_ltr(Register r1, Register r2);             // load/move and test r1 = r2; int32
+  inline void z_ltgr(Register r1, Register r2);            // load/move and test r1 = r2; int64
+  inline void z_ltgfr(Register r1, Register r2);           // load/move and test r1 = r2; int64 <-- int32
+
+  // move and byte-reverse
+  inline void z_lrvr( Register r1, Register r2);           // move and reverse byte order r1 = r2; int32
+  inline void z_lrvgr(Register r1, Register r2);           // move and reverse byte order r1 = r2; int64
+
+
+  // Arithmetic instructions (Integer only)
+  // ======================================
+  // For float arithmetic instructions scroll further down
+  // Add logical differs in the condition codes set!
+
+  // add registers
+  inline void z_ar(   Register r1, Register r2);                      // add         r1 = r1 + r2  ; int32
+  inline void z_agr(  Register r1, Register r2);                      // add         r1 = r1 + r2  ; int64
+  inline void z_agfr( Register r1, Register r2);                      // add         r1 = r1 + r2  ; int64 <- int32
+  inline void z_ark(  Register r1, Register r2, Register r3);         // add         r1 = r2 + r3  ; int32
+  inline void z_agrk( Register r1, Register r2, Register r3);         // add         r1 = r2 + r3  ; int64
+
+  inline void z_alr(  Register r1, Register r2);                      // add logical r1 = r1 + r2  ; int32
+  inline void z_algr( Register r1, Register r2);                      // add logical r1 = r1 + r2  ; int64
+  inline void z_algfr(Register r1, Register r2);                      // add logical r1 = r1 + r2  ; int64 <- int32
+  inline void z_alrk( Register r1, Register r2, Register r3);         // add logical r1 = r2 + r3  ; int32
+  inline void z_algrk(Register r1, Register r2, Register r3);         // add logical r1 = r2 + r3  ; int64
+  inline void z_alcgr(Register r1, Register r2);                      // add logical with carry r1 = r1 + r2 + c  ; int64
+
+  // add immediate
+  inline void z_ahi(  Register r1, int64_t i2);                       // add         r1 = r1 + i2_imm16 ; int32
+  inline void z_afi(  Register r1, int64_t i2);                       // add         r1 = r1 + i2_imm32 ; int32
+  inline void z_alfi( Register r1, int64_t i2);                       // add         r1 = r1 + i2_imm32 ; int32
+  inline void z_aghi( Register r1, int64_t i2);                       // add logical r1 = r1 + i2_imm16 ; int64
+  inline void z_agfi( Register r1, int64_t i2);                       // add         r1 = r1 + i2_imm32 ; int64
+  inline void z_algfi(Register r1, int64_t i2);                       // add logical r1 = r1 + i2_imm32 ; int64
+  inline void z_ahik( Register r1, Register r3, int64_t i2);          // add         r1 = r3 + i2_imm16 ; int32
+  inline void z_aghik(Register r1, Register r3, int64_t i2);          // add         r1 = r3 + i2_imm16 ; int64
+  inline void z_aih(  Register r1, int64_t i2);                       // add         r1 = r1 + i2_imm32 ; int32 (HiWord)
+
+  // add memory
+  inline void z_a( Register r1, int64_t d2, Register x2, Register b2);  // add r1 = r1 + *(d2_uimm12+s2+b2) ; int32
+  inline void z_ay(  Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2)  ; int32
+  inline void z_ag(  Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2)  ; int64
+  inline void z_agf( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2)  ; int64 <- int32
+  inline void z_al(  Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_uimm12+x2+b2) ; int32
+  inline void z_aly( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2)  ; int32
+  inline void z_alg( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2)  ; int64
+  inline void z_algf(Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2)  ; int64 <- int32
+  inline void z_a(   Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int32
+  inline void z_ay(  Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int32
+  inline void z_al(  Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int32
+  inline void z_aly( Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int32
+  inline void z_ag(  Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int64
+  inline void z_agf( Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int64 <- int32
+  inline void z_alg( Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int64
+  inline void z_algf(Register r1, const Address& a);                  // add r1 = r1 + *(a)               ; int64 <- int32
+
+
+  inline void z_alhsik( Register r1, Register r3, int64_t i2);    // add logical r1 = r3 + i2_imm16   ; int32
+  inline void z_alghsik(Register r1, Register r3, int64_t i2);    // add logical r1 = r3 + i2_imm16   ; int64
+
+  inline void z_asi(  int64_t d1, Register b1, int64_t i2);       // add           *(d1_imm20+b1) += i2_imm8 ; int32   -- z10
+  inline void z_agsi( int64_t d1, Register b1, int64_t i2);       // add           *(d1_imm20+b1) += i2_imm8 ; int64   -- z10
+  inline void z_alsi( int64_t d1, Register b1, int64_t i2);       // add logical   *(d1_imm20+b1) += i2_imm8 ; uint32  -- z10
+  inline void z_algsi(int64_t d1, Register b1, int64_t i2);       // add logical   *(d1_imm20+b1) += i2_imm8 ; uint64  -- z10
+  inline void z_asi(  const Address& d, int64_t i2);              // add           *(d) += i2_imm8           ; int32   -- z10
+  inline void z_agsi( const Address& d, int64_t i2);              // add           *(d) += i2_imm8           ; int64   -- z10
+  inline void z_alsi( const Address& d, int64_t i2);              // add logical   *(d) += i2_imm8           ; uint32  -- z10
+  inline void z_algsi(const Address& d, int64_t i2);              // add logical   *(d) += i2_imm8           ; uint64  -- z10
+
+  // negate
+  inline void z_lcr(  Register r1, Register r2 = noreg);              // neg r1 = -r2   ; int32
+  inline void z_lcgr( Register r1, Register r2 = noreg);              // neg r1 = -r2   ; int64
+  inline void z_lcgfr(Register r1, Register r2);                      // neg r1 = -r2   ; int64 <- int32
+  inline void z_lnr(  Register r1, Register r2 = noreg);              // neg r1 = -|r2| ; int32
+  inline void z_lngr( Register r1, Register r2 = noreg);              // neg r1 = -|r2| ; int64
+  inline void z_lngfr(Register r1, Register r2);                      // neg r1 = -|r2| ; int64 <- int32
+
+  // subtract intstructions
+  // sub registers
+  inline void z_sr(   Register r1, Register r2);                      // sub         r1 = r1 - r2                ; int32
+  inline void z_sgr(  Register r1, Register r2);                      // sub         r1 = r1 - r2                ; int64
+  inline void z_sgfr( Register r1, Register r2);                      // sub         r1 = r1 - r2                ; int64 <- int32
+  inline void z_srk(  Register r1, Register r2, Register r3);         // sub         r1 = r2 - r3                ; int32
+  inline void z_sgrk( Register r1, Register r2, Register r3);         // sub         r1 = r2 - r3                ; int64
+
+  inline void z_slr(  Register r1, Register r2);                      // sub logical r1 = r1 - r2                ; int32
+  inline void z_slgr( Register r1, Register r2);                      // sub logical r1 = r1 - r2                ; int64
+  inline void z_slgfr(Register r1, Register r2);                      // sub logical r1 = r1 - r2                ; int64 <- int32
+  inline void z_slrk( Register r1, Register r2, Register r3);         // sub logical r1 = r2 - r3                ; int32
+  inline void z_slgrk(Register r1, Register r2, Register r3);         // sub logical r1 = r2 - r3                ; int64
+  inline void z_slfi( Register r1, int64_t i2);                       // sub logical r1 = r1 - i2_uimm32         ; int32
+  inline void z_slgfi(Register r1, int64_t i2);                       // add logical r1 = r1 - i2_uimm32         ; int64
+
+  // sub memory
+  inline void z_s(   Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 - *(d2_imm12+x2+b2) ; int32
+  inline void z_sy(  Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 + *(d2_imm20+s2+b2) ; int32
+  inline void z_sg(  Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 - *(d2_imm12+x2+b2) ; int64
+  inline void z_sgf( Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 - *(d2_imm12+x2+b2) ; int64 - int32
+  inline void z_slg( Register r1, int64_t d2, Register x2, Register b2);  // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64
+  inline void z_slgf(Register r1, int64_t d2, Register x2, Register b2);  // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64 - uint32
+  inline void z_s(   Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; int32
+  inline void z_sy(  Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; int32
+  inline void z_sg(  Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; int64
+  inline void z_sgf( Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; int64 - int32
+  inline void z_slg( Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; uint64
+  inline void z_slgf(Register r1, const Address& a);                      // sub         r1 = r1 - *(a)              ; uint64 - uint32
+
+  inline void z_sh(  Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16
+  inline void z_shy( Register r1, int64_t d2, Register x2, Register b2);  // sub         r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16
+  inline void z_sh(  Register r1, const Address &a);                      // sub         r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16
+  inline void z_shy( Register r1, const Address &a);                      // sub         r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16
+
+  // Multiplication instructions
+  // mul registers
+  inline void z_msr(  Register r1, Register r2);                          // mul r1 = r1 * r2          ; int32
+  inline void z_msgr( Register r1, Register r2);                          // mul r1 = r1 * r2          ; int64
+  inline void z_msgfr(Register r1, Register r2);                          // mul r1 = r1 * r2          ; int64 <- int32
+  inline void z_mlr(  Register r1, Register r2);                          // mul r1 = r1 * r2          ; int32 unsigned
+  inline void z_mlgr( Register r1, Register r2);                          // mul r1 = r1 * r2          ; int64 unsigned
+  // mul register - memory
+  inline void z_mhy( Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_msy( Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_msg( Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_msgf(Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_ml(  Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_mlg( Register r1, int64_t d2, Register x2, Register b2);  // mul r1 = r1 * *(d2+x2+b2)
+  inline void z_mhy( Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+  inline void z_msy( Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+  inline void z_msg( Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+  inline void z_msgf(Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+  inline void z_ml(  Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+  inline void z_mlg( Register r1, const Address& a);                      // mul r1 = r1 * *(a)
+
+  inline void z_msfi( Register r1, int64_t i2);   // mult r1 = r1 * i2_imm32;   int32  -- z10
+  inline void z_msgfi(Register r1, int64_t i2);   // mult r1 = r1 * i2_imm32;   int64  -- z10
+  inline void z_mhi(  Register r1, int64_t i2);   // mult r1 = r1 * i2_imm16;   int32
+  inline void z_mghi( Register r1, int64_t i2);   // mult r1 = r1 * i2_imm16;   int64
+
+  // Division instructions
+  inline void z_dsgr( Register r1, Register r2);      // div  r1 = r1 / r2               ; int64/int32 needs reg pair!
+  inline void z_dsgfr(Register r1, Register r2);      // div  r1 = r1 / r2               ; int64/int32 needs reg pair!
+
+
+  // Logic instructions
+  // ===================
+
+  // and
+  inline void z_n(   Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_ny(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_ng(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_n(   Register r1, const Address& a);
+  inline void z_ny(  Register r1, const Address& a);
+  inline void z_ng(  Register r1, const Address& a);
+
+  inline void z_nr(  Register r1, Register r2);               // and r1 = r1 & r2         ; int32
+  inline void z_ngr( Register r1, Register r2);               // and r1 = r1 & r2         ; int64
+  inline void z_nrk( Register r1, Register r2, Register r3);  // and r1 = r2 & r3         ; int32
+  inline void z_ngrk(Register r1, Register r2, Register r3);  // and r1 = r2 & r3         ; int64
+
+  inline void z_nihh(Register r1, int64_t i2);                // and r1 = r1 & i2_imm16   ; and only for bits  0-15
+  inline void z_nihl(Register r1, int64_t i2);                // and r1 = r1 & i2_imm16   ; and only for bits 16-31
+  inline void z_nilh(Register r1, int64_t i2);                // and r1 = r1 & i2_imm16   ; and only for bits 32-47
+  inline void z_nill(Register r1, int64_t i2);                // and r1 = r1 & i2_imm16   ; and only for bits 48-63
+  inline void z_nihf(Register r1, int64_t i2);                // and r1 = r1 & i2_imm32   ; and only for bits  0-31
+  inline void z_nilf(Register r1, int64_t i2);                // and r1 = r1 & i2_imm32   ; and only for bits 32-63  see also MacroAssembler::nilf.
+
+  // or
+  inline void z_o(   Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_oy(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_og(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_o(   Register r1, const Address& a);
+  inline void z_oy(  Register r1, const Address& a);
+  inline void z_og(  Register r1, const Address& a);
+
+  inline void z_or(  Register r1, Register r2);               // or r1 = r1 | r2; int32
+  inline void z_ogr( Register r1, Register r2);               // or r1 = r1 | r2; int64
+  inline void z_ork( Register r1, Register r2, Register r3);  // or r1 = r2 | r3         ; int32
+  inline void z_ogrk(Register r1, Register r2, Register r3);  // or r1 = r2 | r3         ; int64
+
+  inline void z_oihh(Register r1, int64_t i2);                // or r1 = r1 | i2_imm16   ; or only for bits  0-15
+  inline void z_oihl(Register r1, int64_t i2);                // or r1 = r1 | i2_imm16   ; or only for bits 16-31
+  inline void z_oilh(Register r1, int64_t i2);                // or r1 = r1 | i2_imm16   ; or only for bits 32-47
+  inline void z_oill(Register r1, int64_t i2);                // or r1 = r1 | i2_imm16   ; or only for bits 48-63
+  inline void z_oihf(Register r1, int64_t i2);                // or r1 = r1 | i2_imm32   ; or only for bits  0-31
+  inline void z_oilf(Register r1, int64_t i2);                // or r1 = r1 | i2_imm32   ; or only for bits 32-63
+
+  // xor
+  inline void z_x(   Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_xy(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_xg(  Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_x(   Register r1, const Address& a);
+  inline void z_xy(  Register r1, const Address& a);
+  inline void z_xg(  Register r1, const Address& a);
+
+  inline void z_xr(  Register r1, Register r2);               // xor r1 = r1 ^ r2         ; int32
+  inline void z_xgr( Register r1, Register r2);               // xor r1 = r1 ^ r2         ; int64
+  inline void z_xrk( Register r1, Register r2, Register r3);  // xor r1 = r2 ^ r3         ; int32
+  inline void z_xgrk(Register r1, Register r2, Register r3);  // xor r1 = r2 ^ r3         ; int64
+
+  inline void z_xihf(Register r1, int64_t i2);                // xor r1 = r1 ^ i2_imm32   ; or only for bits  0-31
+  inline void z_xilf(Register r1, int64_t i2);                // xor r1 = r1 ^ i2_imm32   ; or only for bits 32-63
+
+  // shift
+  inline void z_sla( Register r1,              int64_t d2, Register b2=Z_R0); // shift left  r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
+  inline void z_slag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left  r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved!
+  inline void z_sra( Register r1,              int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended
+  inline void z_srag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended
+  inline void z_sll( Register r1,              int64_t d2, Register b2=Z_R0); // shift left  r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added
+  inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left  r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added
+  inline void z_srl( Register r1,              int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended
+  inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended
+
+  // rotate
+  inline void z_rll( Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int32  -- z10
+  inline void z_rllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int64  -- z10
+
+  // rotate the AND/XOR/OR/insert
+  inline void z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then AND selected bits  -- z196
+  inline void z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then XOR selected bits  -- z196
+  inline void z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then OR  selected bits  -- z196
+  inline void z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest = false); // rotate then INS selected bits  -- z196
+
+
+  // memory-immediate instructions (8-bit immediate)
+  // ===============================================
+
+  inline void z_cli( int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8           ; int8
+  inline void z_mvi( int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1)  = i2_imm8           ; int8
+  inline void z_tm(  int64_t d1, Register b1, int64_t i2); // test    *(d1_imm12+b1) against mask i2_imm8 ; int8
+  inline void z_ni(  int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) &= i2_imm8           ; int8
+  inline void z_oi(  int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) |= i2_imm8           ; int8
+  inline void z_xi(  int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) ^= i2_imm8           ; int8
+  inline void z_cliy(int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8           ; int8
+  inline void z_mviy(int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1)  = i2_imm8           ; int8
+  inline void z_tmy( int64_t d1, Register b1, int64_t i2); // test    *(d1_imm12+b1) against mask i2_imm8 ; int8
+  inline void z_niy( int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) &= i2_imm8           ; int8
+  inline void z_oiy( int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) |= i2_imm8           ; int8
+  inline void z_xiy( int64_t d1, Register b1, int64_t i2); // store   *(d1_imm12+b1) ^= i2_imm8           ; int8
+  inline void z_cli( const Address& a, int64_t imm8);      // compare *(a)           ^= imm8              ; int8
+  inline void z_mvi( const Address& a, int64_t imm8);      // store   *(a)            = imm8              ; int8
+  inline void z_tm(  const Address& a, int64_t imm8);      // test    *(a)           against mask imm8    ; int8
+  inline void z_ni(  const Address& a, int64_t imm8);      // store   *(a)           &= imm8              ; int8
+  inline void z_oi(  const Address& a, int64_t imm8);      // store   *(a)           |= imm8              ; int8
+  inline void z_xi(  const Address& a, int64_t imm8);      // store   *(a)           ^= imm8              ; int8
+  inline void z_cliy(const Address& a, int64_t imm8);      // compare *(a)           ^= imm8              ; int8
+  inline void z_mviy(const Address& a, int64_t imm8);      // store   *(a)            = imm8              ; int8
+  inline void z_tmy( const Address& a, int64_t imm8);      // test    *(a)           against mask imm8    ; int8
+  inline void z_niy( const Address& a, int64_t imm8);      // store   *(a)           &= imm8              ; int8
+  inline void z_oiy( const Address& a, int64_t imm8);      // store   *(a)           |= imm8              ; int8
+  inline void z_xiy( const Address& a, int64_t imm8);      // store   *(a)           ^= imm8              ; int8
+
+
+  //------------------------------
+  // Interlocked-Update
+  //------------------------------
+  inline void z_laa(  Register r1, Register r3, int64_t d2, Register b2);   // load and add    int32, signed   -- z196
+  inline void z_laag( Register r1, Register r3, int64_t d2, Register b2);   // load and add    int64, signed   -- z196
+  inline void z_laal( Register r1, Register r3, int64_t d2, Register b2);   // load and add    int32, unsigned -- z196
+  inline void z_laalg(Register r1, Register r3, int64_t d2, Register b2);   // load and add    int64, unsigned -- z196
+  inline void z_lan(  Register r1, Register r3, int64_t d2, Register b2);   // load and and    int32           -- z196
+  inline void z_lang( Register r1, Register r3, int64_t d2, Register b2);   // load and and    int64           -- z196
+  inline void z_lax(  Register r1, Register r3, int64_t d2, Register b2);   // load and xor    int32           -- z196
+  inline void z_laxg( Register r1, Register r3, int64_t d2, Register b2);   // load and xor    int64           -- z196
+  inline void z_lao(  Register r1, Register r3, int64_t d2, Register b2);   // load and or     int32           -- z196
+  inline void z_laog( Register r1, Register r3, int64_t d2, Register b2);   // load and or     int64           -- z196
+
+  inline void z_laa(  Register r1, Register r3, const Address& a);          // load and add    int32, signed   -- z196
+  inline void z_laag( Register r1, Register r3, const Address& a);          // load and add    int64, signed   -- z196
+  inline void z_laal( Register r1, Register r3, const Address& a);          // load and add    int32, unsigned -- z196
+  inline void z_laalg(Register r1, Register r3, const Address& a);          // load and add    int64, unsigned -- z196
+  inline void z_lan(  Register r1, Register r3, const Address& a);          // load and and    int32           -- z196
+  inline void z_lang( Register r1, Register r3, const Address& a);          // load and and    int64           -- z196
+  inline void z_lax(  Register r1, Register r3, const Address& a);          // load and xor    int32           -- z196
+  inline void z_laxg( Register r1, Register r3, const Address& a);          // load and xor    int64           -- z196
+  inline void z_lao(  Register r1, Register r3, const Address& a);          // load and or     int32           -- z196
+  inline void z_laog( Register r1, Register r3, const Address& a);          // load and or     int64           -- z196
+
+  //--------------------------------
+  // Execution Prediction
+  //--------------------------------
+  inline void z_pfd(  int64_t m1, int64_t d2, Register x2, Register b2);  // prefetch
+  inline void z_pfd(  int64_t m1, Address a);
+  inline void z_pfdrl(int64_t m1, int64_t i2);                            // prefetch
+  inline void z_bpp(  int64_t m1, int64_t i2, int64_t d3, Register b3);   // branch prediction    -- EC12
+  inline void z_bprp( int64_t m1, int64_t i2, int64_t i3);                // branch prediction    -- EC12
+
+  //-------------------------------
+  // Transaction Control
+  //-------------------------------
+  inline void z_tbegin(int64_t d1, Register b1, int64_t i2);          // begin transaction               -- EC12
+  inline void z_tbeginc(int64_t d1, Register b1, int64_t i2);         // begin transaction (constrained) -- EC12
+  inline void z_tend();                                               // end transaction                 -- EC12
+  inline void z_tabort(int64_t d2, Register b2);                      // abort transaction               -- EC12
+  inline void z_etnd(Register r1);                                    // extract tx nesting depth        -- EC12
+  inline void z_ppa(Register r1, Register r2, int64_t m3);            // perform processor assist        -- EC12
+
+  //---------------------------------
+  // Conditional Execution
+  //---------------------------------
+  inline void z_locr( Register r1, Register r2, branch_condition cc);             // if (cc) load r1 = r2               ; int32 -- z196
+  inline void z_locgr(Register r1, Register r2, branch_condition cc);             // if (cc) load r1 = r2               ; int64 -- z196
+  inline void z_loc(  Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2)  ; int32 -- z196
+  inline void z_locg( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2)  ; int64 -- z196
+  inline void z_loc(  Register r1, const Address& a, branch_condition cc);        // if (cc) load r1 = *(a)             ; int32 -- z196
+  inline void z_locg( Register r1, const Address& a, branch_condition cc);        // if (cc) load r1 = *(a)             ; int64 -- z196
+  inline void z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int32 -- z196
+  inline void z_stocg(Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int64 -- z196
+
+
+  // Complex CISC instructions
+  // ==========================
+
+  inline void z_cksm(Register r1, Register r2);                       // checksum. This is NOT CRC32
+  inline void z_km(  Register r1, Register r2);                       // cipher message
+  inline void z_kmc( Register r1, Register r2);                       // cipher message with chaining
+  inline void z_kimd(Register r1, Register r2);                       // msg digest (SHA)
+  inline void z_klmd(Register r1, Register r2);                       // msg digest (SHA)
+  inline void z_kmac(Register r1, Register r2);                       // msg authentication code
+
+  inline void z_ex(Register r1, int64_t d2, Register x2, Register b2);// execute
+  inline void z_exrl(Register r1, int64_t i2);                        // execute relative long         -- z10
+  inline void z_exrl(Register r1, address a2);                        // execute relative long         -- z10
+
+  inline void z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3);  // extract cpu time
+  inline void z_ecag(Register r1, Register r3, int64_t d2, Register b2);              // extract CPU attribute
+
+  inline void z_srst(Register r1, Register r2);                       // search string
+  inline void z_srstu(Register r1, Register r2);                      // search string unicode
+
+  inline void z_mvc(const Address& d, const Address& s, int64_t l);               // move l bytes
+  inline void z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
+  inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2=Z_R0);    // move region of memory
+
+  inline void z_stfle(int64_t d2, Register b2);                            // store facility list extended
+
+  inline void z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// and *(d1+b1) = *(d1+l+b1) & *(d2+b2) ; d1, d2: uimm12, ands l+1 bytes
+  inline void z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);//  or *(d1+b1) = *(d1+l+b1) | *(d2+b2) ; d1, d2: uimm12,  ors l+1 bytes
+  inline void z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// xor *(d1+b1) = *(d1+l+b1) ^ *(d2+b2) ; d1, d2: uimm12, xors l+1 bytes
+  inline void z_nc(Address dst, int64_t len, Address src2);                     // and *dst = *dst & *src2, ands len bytes in memory
+  inline void z_oc(Address dst, int64_t len, Address src2);                     //  or *dst = *dst | *src2,  ors len bytes in memory
+  inline void z_xc(Address dst, int64_t len, Address src2);                     // xor *dst = *dst ^ *src2, xors len bytes in memory
+
+  // compare instructions
+  inline void z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);  // compare (*(d1_uimm12+b1), *(d1_uimm12+b1)) ; compare l bytes
+  inline void z_clcle(Register r1, Register r3, int64_t d2, Register b2);  // compare logical long extended, see docu
+  inline void z_clclu(Register r1, Register r3, int64_t d2, Register b2);  // compare logical long unicode, see docu
+
+  // Translate characters
+  inline void z_troo(Register r1, Register r2, int64_t m3);
+  inline void z_trot(Register r1, Register r2, int64_t m3);
+  inline void z_trto(Register r1, Register r2, int64_t m3);
+  inline void z_trtt(Register r1, Register r2, int64_t m3);
+
+
+  // Floatingpoint instructions
+  // ==========================
+
+  // compare instructions
+  inline void z_cebr(FloatRegister r1, FloatRegister r2);                     // compare (r1, r2)                ; float
+  inline void z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2);  // compare (r1, *(d2_imm12+x2+b2)) ; float
+  inline void z_ceb(FloatRegister r1, const Address &a);                      // compare (r1, *(d2_imm12+x2+b2)) ; float
+  inline void z_cdbr(FloatRegister r1, FloatRegister r2);                     // compare (r1, r2)                ; double
+  inline void z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2);  // compare (r1, *(d2_imm12+x2+b2)) ; double
+  inline void z_cdb(FloatRegister r1, const Address &a);                      // compare (r1, *(d2_imm12+x2+b2)) ; double
+
+  // load instructions
+  inline void z_le( FloatRegister r1, int64_t d2, Register x2, Register b2);   // load r1 = *(d2_uimm12+x2+b2) ; float
+  inline void z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2);   // load r1 = *(d2_imm20+x2+b2)  ; float
+  inline void z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2);   // load r1 = *(d2_uimm12+x2+b2) ; double
+  inline void z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2);   // load r1 = *(d2_imm20+x2+b2)  ; double
+  inline void z_le( FloatRegister r1, const Address &a);                       // load r1 = *(a)               ; float
+  inline void z_ley(FloatRegister r1, const Address &a);                       // load r1 = *(a)               ; float
+  inline void z_ld( FloatRegister r1, const Address &a);                       // load r1 = *(a)               ; double
+  inline void z_ldy(FloatRegister r1, const Address &a);                       // load r1 = *(a)               ; double
+
+  // store instructions
+  inline void z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2);  // store *(d2_uimm12+x2+b2) = r1  ; float
+  inline void z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2);  // store *(d2_imm20+x2+b2)  = r1  ; float
+  inline void z_std( FloatRegister r1, int64_t d2, Register x2, Register b2);  // store *(d2_uimm12+x2+b2) = r1  ; double
+  inline void z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2);  // store *(d2_imm20+x2+b2)  = r1  ; double
+  inline void z_ste( FloatRegister r1, const Address &a);                      // store *(a)               = r1  ; float
+  inline void z_stey(FloatRegister r1, const Address &a);                      // store *(a)               = r1  ; float
+  inline void z_std( FloatRegister r1, const Address &a);                      // store *(a)               = r1  ; double
+  inline void z_stdy(FloatRegister r1, const Address &a);                      // store *(a)               = r1  ; double
+
+  // load and store immediates
+  inline void z_lzer(FloatRegister r1);                                 // r1 = 0     ; single
+  inline void z_lzdr(FloatRegister r1);                                 // r1 = 0     ; double
+
+  // Move and Convert instructions
+  inline void z_ler(FloatRegister r1, FloatRegister r2);                // move         r1 = r2 ; float
+  inline void z_ldr(FloatRegister r1, FloatRegister r2);                // move         r1 = r2 ; double
+  inline void z_ledbr(FloatRegister r1, FloatRegister r2);              // conv / round r1 = r2 ; float <- double
+  inline void z_ldebr(FloatRegister r1, FloatRegister r2);              // conv         r1 = r2 ; double <- float
+
+  // move between integer and float registers
+  inline void z_cefbr( FloatRegister r1, Register r2);                  // r1 = r2; float  <-- int32
+  inline void z_cdfbr( FloatRegister r1, Register r2);                  // r1 = r2; double <-- int32
+  inline void z_cegbr( FloatRegister r1, Register r2);                  // r1 = r2; float  <-- int64
+  inline void z_cdgbr( FloatRegister r1, Register r2);                  // r1 = r2; double <-- int64
+
+  // rounding mode for float-2-int conversions
+  inline void z_cfebr(Register r1, FloatRegister r2, RoundingMode m);   // conv r1 = r2  ; int32 <-- float
+  inline void z_cfdbr(Register r1, FloatRegister r2, RoundingMode m);   // conv r1 = r2  ; int32 <-- double
+  inline void z_cgebr(Register r1, FloatRegister r2, RoundingMode m);   // conv r1 = r2  ; int64 <-- float
+  inline void z_cgdbr(Register r1, FloatRegister r2, RoundingMode m);   // conv r1 = r2  ; int64 <-- double
+
+  inline void z_ldgr(FloatRegister r1, Register r2);   // fr1 = r2  ; what kind of conversion?  -- z10
+  inline void z_lgdr(Register r1, FloatRegister r2);   // r1  = fr2 ; what kind of conversion?  -- z10
+
+
+  // ADD
+  inline void z_aebr(FloatRegister f1, FloatRegister f2);                      // f1 = f1 + f2               ; float
+  inline void z_adbr(FloatRegister f1, FloatRegister f2);                      // f1 = f1 + f2               ; double
+  inline void z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 + *(d2+x2+b2)      ; float
+  inline void z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 + *(d2+x2+b2)      ; double
+  inline void z_aeb( FloatRegister f1, const Address& a);                      // f1 = f1 + *(a)             ; float
+  inline void z_adb( FloatRegister f1, const Address& a);                      // f1 = f1 + *(a)             ; double
+
+  // SUB
+  inline void z_sebr(FloatRegister f1, FloatRegister f2);                      // f1 = f1 - f2               ; float
+  inline void z_sdbr(FloatRegister f1, FloatRegister f2);                      // f1 = f1 - f2               ; double
+  inline void z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 - *(d2+x2+b2)      ; float
+  inline void z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 - *(d2+x2+b2)      ; double
+  inline void z_seb( FloatRegister f1, const Address& a);                      // f1 = f1 - *(a)             ; float
+  inline void z_sdb( FloatRegister f1, const Address& a);                      // f1 = f1 - *(a)             ; double
+  // negate
+  inline void z_lcebr(FloatRegister r1, FloatRegister r2);                     // neg r1 = -r2   ; float
+  inline void z_lcdbr(FloatRegister r1, FloatRegister r2);                     // neg r1 = -r2   ; double
+
+  // Absolute value, monadic if fr2 == noreg.
+  inline void z_lpdbr( FloatRegister fr1, FloatRegister fr2 = fnoreg);         // fr1 = |fr2|
+
+
+  // MUL
+  inline void z_meebr(FloatRegister f1, FloatRegister f2);                      // f1 = f1 * f2               ; float
+  inline void z_mdbr( FloatRegister f1, FloatRegister f2);                      // f1 = f1 * f2               ; double
+  inline void z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 * *(d2+x2+b2)      ; float
+  inline void z_mdb(  FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 * *(d2+x2+b2)      ; double
+  inline void z_meeb( FloatRegister f1, const Address& a);
+  inline void z_mdb(  FloatRegister f1, const Address& a);
+
+  // DIV
+  inline void z_debr( FloatRegister f1, FloatRegister f2);                      // f1 = f1 / f2               ; float
+  inline void z_ddbr( FloatRegister f1, FloatRegister f2);                      // f1 = f1 / f2               ; double
+  inline void z_deb(  FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 / *(d2+x2+b2)      ; float
+  inline void z_ddb(  FloatRegister f1, int64_t d2, Register x2, Register b2);  // f1 = f1 / *(d2+x2+b2)      ; double
+  inline void z_deb(  FloatRegister f1, const Address& a);                      // f1 = f1 / *(a)             ; float
+  inline void z_ddb(  FloatRegister f1, const Address& a);                      // f1 = f1 / *(a)             ; double
+
+  // square root
+  inline void z_sqdbr(FloatRegister fr1, FloatRegister fr2);                    // fr1 = sqrt(fr2)            ; double
+  inline void z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2); // fr1 = srqt( *(d2+x2+b2)
+  inline void z_sqdb( FloatRegister fr1, int64_t d2, Register b2);              // fr1 = srqt( *(d2+b2)
+
+  // Nop instruction
+  // ===============
+
+  // branch never (nop)
+  inline void z_nop();
+
+  // ===============================================================================================
+
+  // Simplified emitters:
+  // ====================
+
+
+  // Some memory instructions without index register (just convenience).
+  inline void z_layz(Register r1, int64_t d2, Register b2 = Z_R0);
+  inline void z_lay(Register r1, int64_t d2, Register b2);
+  inline void z_laz(Register r1, int64_t d2, Register b2);
+  inline void z_la(Register r1, int64_t d2, Register b2);
+  inline void z_l(Register r1, int64_t d2, Register b2);
+  inline void z_ly(Register r1, int64_t d2, Register b2);
+  inline void z_lg(Register r1, int64_t d2, Register b2);
+  inline void z_st(Register r1, int64_t d2, Register b2);
+  inline void z_sty(Register r1, int64_t d2, Register b2);
+  inline void z_stg(Register r1, int64_t d2, Register b2);
+  inline void z_lgf(Register r1, int64_t d2, Register b2);
+  inline void z_lgh(Register r1, int64_t d2, Register b2);
+  inline void z_llgh(Register r1, int64_t d2, Register b2);
+  inline void z_llgf(Register r1, int64_t d2, Register b2);
+  inline void z_lgb(Register r1, int64_t d2, Register b2);
+  inline void z_cl( Register r1, int64_t d2, Register b2);
+  inline void z_c(Register r1, int64_t d2, Register b2);
+  inline void z_cg(Register r1, int64_t d2, Register b2);
+  inline void z_sh(Register r1, int64_t d2, Register b2);
+  inline void z_shy(Register r1, int64_t d2, Register b2);
+  inline void z_ste(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_std(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_stdy(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_stey(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_ld(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_ldy(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_le(FloatRegister r1, int64_t d2, Register b2);
+  inline void z_ley(FloatRegister r1, int64_t d2, Register b2);
+
+  inline void z_agf(Register r1, int64_t d2, Register b2);
+
+  inline void z_exrl(Register r1, Label& L);
+  inline void z_larl(Register r1, Label& L);
+  inline void z_bru( Label& L);
+  inline void z_brul(Label& L);
+  inline void z_brul(address a);
+  inline void z_brh( Label& L);
+  inline void z_brl( Label& L);
+  inline void z_bre( Label& L);
+  inline void z_brnh(Label& L);
+  inline void z_brnl(Label& L);
+  inline void z_brne(Label& L);
+  inline void z_brz( Label& L);
+  inline void z_brnz(Label& L);
+  inline void z_brnaz(Label& L);
+  inline void z_braz(Label& L);
+  inline void z_brnp(Label& L);
+
+  inline void z_btrue( Label& L);
+  inline void z_bfalse(Label& L);
+
+  inline void z_brno( Label& L);
+
+
+  inline void z_basr(Register r1, Register r2);
+  inline void z_brasl(Register r1, address a);
+  inline void z_brct(Register r1, address a);
+  inline void z_brct(Register r1, Label& L);
+
+  inline void z_brxh(Register r1, Register r3, address a);
+  inline void z_brxh(Register r1, Register r3, Label& L);
+
+  inline void z_brxle(Register r1, Register r3, address a);
+  inline void z_brxle(Register r1, Register r3, Label& L);
+
+  inline void z_brxhg(Register r1, Register r3, address a);
+  inline void z_brxhg(Register r1, Register r3, Label& L);
+
+  inline void z_brxlg(Register r1, Register r3, address a);
+  inline void z_brxlg(Register r1, Register r3, Label& L);
+
+  // Ppopulation count intrinsics.
+  inline void z_flogr(Register r1, Register r2);    // find leftmost one
+  inline void z_popcnt(Register r1, Register r2);   // population count
+  inline void z_ahhhr(Register r1, Register r2, Register r3);   // ADD halfword high high
+  inline void z_ahhlr(Register r1, Register r2, Register r3);   // ADD halfword high low
+
+  inline void z_tam();
+  inline void z_stck(int64_t d2, Register b2);
+  inline void z_stckf(int64_t d2, Register b2);
+  inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2);
+  inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2);
+
+  inline void z_cs( Register r1, Register r3, int64_t d2, Register b2);
+  inline void z_csy(Register r1, Register r3, int64_t d2, Register b2);
+  inline void z_csg(Register r1, Register r3, int64_t d2, Register b2);
+  inline void z_cs( Register r1, Register r3, const Address& a);
+  inline void z_csy(Register r1, Register r3, const Address& a);
+  inline void z_csg(Register r1, Register r3, const Address& a);
+
+  inline void z_cvd(Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_cvdg(Register r1, int64_t d2, Register x2, Register b2);
+  inline void z_cvd(Register r1, int64_t d2, Register b2);
+  inline void z_cvdg(Register r1, int64_t d2, Register b2);
+
+  // Instruction queries:
+  // instruction properties and recognize emitted instructions
+  // ===========================================================
+
+  static int nop_size() { return 2; }
+
+  static int z_brul_size() { return 6; }
+
+  static bool is_z_basr(short x) {
+    return (BASR_ZOPC == (x & BASR_MASK));
+  }
+  static bool is_z_algr(long x) {
+    return (ALGR_ZOPC == (x & RRE_MASK));
+  }
+  static bool is_z_lb(long x) {
+    return (LB_ZOPC == (x & LB_MASK));
+  }
+  static bool is_z_lh(int x) {
+    return (LH_ZOPC == (x & LH_MASK));
+  }
+  static bool is_z_l(int x) {
+    return (L_ZOPC == (x & L_MASK));
+  }
+  static bool is_z_lgr(long x) {
+    return (LGR_ZOPC == (x & RRE_MASK));
+  }
+  static bool is_z_ly(long x) {
+    return (LY_ZOPC == (x & LY_MASK));
+  }
+  static bool is_z_lg(long x) {
+    return (LG_ZOPC == (x & LG_MASK));
+  }
+  static bool is_z_llgh(long x) {
+    return (LLGH_ZOPC == (x & LLGH_MASK));
+  }
+  static bool is_z_llgf(long x) {
+    return (LLGF_ZOPC == (x & LLGF_MASK));
+  }
+  static bool is_z_le(int x) {
+    return (LE_ZOPC == (x & LE_MASK));
+  }
+  static bool is_z_ld(int x) {
+    return (LD_ZOPC == (x & LD_MASK));
+  }
+  static bool is_z_st(int x) {
+    return (ST_ZOPC == (x & ST_MASK));
+  }
+  static bool is_z_stc(int x) {
+    return (STC_ZOPC == (x & STC_MASK));
+  }
+  static bool is_z_stg(long x) {
+    return (STG_ZOPC == (x & STG_MASK));
+  }
+  static bool is_z_sth(int x) {
+    return (STH_ZOPC == (x & STH_MASK));
+  }
+  static bool is_z_ste(int x) {
+    return (STE_ZOPC == (x & STE_MASK));
+  }
+  static bool is_z_std(int x) {
+    return (STD_ZOPC == (x & STD_MASK));
+  }
+  static bool is_z_slag(long x) {
+    return (SLAG_ZOPC == (x & SLAG_MASK));
+  }
+  static bool is_z_tmy(long x) {
+    return (TMY_ZOPC == (x & TMY_MASK));
+  }
+  static bool is_z_tm(long x) {
+    return ((unsigned int)TM_ZOPC == (x & (unsigned int)TM_MASK));
+  }
+  static bool is_z_bcr(long x) {
+    return (BCR_ZOPC == (x & BCR_MASK));
+  }
+  static bool is_z_nop(long x) {
+    return is_z_bcr(x) && ((x & 0x00ff) == 0);
+  }
+  static bool is_z_nop(address x) {
+    return is_z_nop(* (short *) x);
+  }
+  static bool is_z_br(long x) {
+    return is_z_bcr(x) && ((x & 0x00f0) == 0x00f0);
+  }
+  static bool is_z_brc(long x, int cond) {
+    return ((unsigned int)BRC_ZOPC == (x & BRC_MASK)) && ((cond<<20) == (x & 0x00f00000U));
+  }
+  // Make use of lightweight sync.
+  static bool is_z_sync_full(long x) {
+    return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondFullSync) && ((x & 0x000f)==0x0000);
+  }
+  static bool is_z_sync_light(long x) {
+    return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondLightSync) && ((x & 0x000f)==0x0000);
+  }
+  static bool is_z_sync(long x) {
+    return is_z_sync_full(x) || is_z_sync_light(x);
+  }
+
+  static bool is_z_brasl(long x) {
+    return (BRASL_ZOPC == (x & BRASL_MASK));
+  }
+  static bool is_z_brasl(address a) {
+  long x = (*((long *)a))>>16;
+   return is_z_brasl(x);
+  }
+  static bool is_z_larl(long x) {
+    return (LARL_ZOPC == (x & LARL_MASK));
+  }
+  static bool is_z_lgrl(long x) {
+    return (LGRL_ZOPC == (x & LGRL_MASK));
+  }
+  static bool is_z_lgrl(address a) {
+  long x = (*((long *)a))>>16;
+   return is_z_lgrl(x);
+  }
+
+  static bool is_z_lghi(unsigned long x) {
+    return (unsigned int)LGHI_ZOPC == (x & (unsigned int)LGHI_MASK);
+  }
+
+  static bool is_z_llill(unsigned long x) {
+    return (unsigned int)LLILL_ZOPC == (x & (unsigned int)LLI_MASK);
+  }
+  static bool is_z_llilh(unsigned long x) {
+    return (unsigned int)LLILH_ZOPC == (x & (unsigned int)LLI_MASK);
+  }
+  static bool is_z_llihl(unsigned long x) {
+    return (unsigned int)LLIHL_ZOPC == (x & (unsigned int)LLI_MASK);
+  }
+  static bool is_z_llihh(unsigned long x) {
+    return (unsigned int)LLIHH_ZOPC == (x & (unsigned int)LLI_MASK);
+  }
+  static bool is_z_llilf(unsigned long x) {
+    return LLILF_ZOPC == (x & LLIF_MASK);
+  }
+  static bool is_z_llihf(unsigned long x) {
+    return LLIHF_ZOPC == (x & LLIF_MASK);
+  }
+
+  static bool is_z_iill(unsigned long x) {
+    return (unsigned int)IILL_ZOPC == (x & (unsigned int)II_MASK);
+  }
+  static bool is_z_iilh(unsigned long x) {
+    return (unsigned int)IILH_ZOPC == (x & (unsigned int)II_MASK);
+  }
+  static bool is_z_iihl(unsigned long x) {
+    return (unsigned int)IIHL_ZOPC == (x & (unsigned int)II_MASK);
+  }
+  static bool is_z_iihh(unsigned long x) {
+    return (unsigned int)IIHH_ZOPC == (x & (unsigned int)II_MASK);
+  }
+  static bool is_z_iilf(unsigned long x) {
+    return IILF_ZOPC == (x & IIF_MASK);
+  }
+  static bool is_z_iihf(unsigned long x) {
+    return IIHF_ZOPC == (x & IIF_MASK);
+  }
+
+  static inline bool is_equal(unsigned long inst, unsigned long idef);
+  static inline bool is_equal(unsigned long inst, unsigned long idef, unsigned long imask);
+  static inline bool is_equal(address iloc, unsigned long idef);
+  static inline bool is_equal(address iloc, unsigned long idef, unsigned long imask);
+
+  static inline bool is_sigtrap_range_check(address pc);
+  static inline bool is_sigtrap_zero_check(address pc);
+
+  //-----------------
+  // memory barriers
+  //-----------------
+  // machine barrier instructions:
+  //
+  // - z_sync            Two-way memory barrier, aka fence.
+  //                     Only load-after-store-order is not guaranteed in the
+  //                     z/Architecture memory model, i.e. only 'fence' is needed.
+  //
+  // semantic barrier instructions:
+  // (as defined in orderAccess.hpp)
+  //
+  // - z_release         orders Store|Store,   empty implementation
+  //                            Load|Store
+  // - z_acquire         orders Load|Store,    empty implementation
+  //                            Load|Load
+  // - z_fence           orders Store|Store,   implemented as z_sync.
+  //                            Load|Store,
+  //                            Load|Load,
+  //                            Store|Load
+  //
+  // For this implementation to be correct, we need H/W fixes on (very) old H/W:
+  //          For z990, it is Driver-55:  MCL232 in the J13484 (i390/ML) Stream.
+  //          For z9,   it is Driver-67:  MCL065 in the G40963 (i390/ML) Stream.
+  // These drivers are a prereq. Otherwise, memory synchronization will not work.
+
+  inline void z_sync();
+  inline void z_release();
+  inline void z_acquire();
+  inline void z_fence();
+
+  // Creation
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) { }
+
+};
+
+#endif // CPU_S390_VM_ASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
+#define CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all
+// assembler_s390.* files.
+
+// Local implementation of byte emitters to help inlining.
+inline void Assembler::emit_16(int x) {
+  CodeSection*       cs = code_section();
+  address      code_pos = pc();
+  *(unsigned short*)code_pos = (unsigned short)x;
+  cs->set_end( code_pos + sizeof(unsigned short));
+}
+
+inline void Assembler::emit_32(int x) {
+  CodeSection*       cs = code_section();
+  address      code_pos = pc();
+  *(jint*)code_pos = (jint)x;
+  cs->set_end( code_pos + sizeof( jint));
+}
+
+inline void Assembler::emit_48(long x) {
+  CodeSection*       cs = code_section();
+  address      code_pos = pc();
+  *(unsigned short*)code_pos = (unsigned short)(x>>32);
+  *(jint*)(code_pos+sizeof(unsigned short)) = (jint)x;
+  cs->set_end( code_pos + sizeof( jint) + sizeof( unsigned short));
+}
+
+// Support lightweight sync (from z196). Experimental as of now. For explanation see *.hpp file.
+inline void Assembler::z_sync() {
+  if (VM_Version::has_FastSync()) {
+    z_bcr(bcondLightSync, Z_R0);
+  } else {
+    z_bcr(bcondFullSync, Z_R0);
+  }
+}
+inline void Assembler::z_release() { }
+inline void Assembler::z_acquire() { }
+inline void Assembler::z_fence()   { z_sync(); }
+
+inline void Assembler::z_illtrap() {
+  emit_16(0);
+}
+inline void Assembler::z_illtrap(int id) {
+  emit_16(id & 0x00ff);
+}
+inline void Assembler::z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern) {
+  z_llill(Z_R0, xpattern);
+  z_iilh(Z_R0, pattern);
+  z_illtrap((unsigned int)xpattern);
+}
+
+inline void Assembler::z_lhrl(Register r1, int64_t i2)  { emit_48( LHRL_ZOPC   | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lrl(Register r1, int64_t i2)   { emit_48( LRL_ZOPC    | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lghrl(Register r1, int64_t i2) { emit_48( LGHRL_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lgfrl(Register r1, int64_t i2) { emit_48( LGFRL_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lgrl(Register r1, int64_t i2)  { emit_48( LGRL_ZOPC   | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llhrl(Register r1, int64_t i2) { emit_48( LLHRL_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llghrl(Register r1, int64_t i2){ emit_48( LLGHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llgfrl(Register r1, int64_t i2){ emit_48( LLGFRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+
+inline void Assembler::z_sthrl(Register r1, int64_t i2) { emit_48( STHRL_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_strl(Register r1, int64_t i2)  { emit_48( STRL_ZOPC   | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_stgrl(Register r1, int64_t i2) { emit_48( STGRL_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+
+inline void Assembler::z_cksm(Register r1, Register r2) { emit_32( CKSM_ZOPC   | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_km(  Register r1, Register r2) { emit_32( KM_ZOPC     | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kmc( Register r1, Register r2) { emit_32( KMC_ZOPC    | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kimd(Register r1, Register r2) { emit_32( KIMD_ZOPC   | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_klmd(Register r1, Register r2) { emit_32( KLMD_ZOPC   | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kmac(Register r1, Register r2) { emit_32( KMAC_ZOPC   | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+
+inline void Assembler::z_exrl(Register r1, int64_t i2)  { emit_48( EXRL_ZOPC   | regt(r1, 8, 48) | simm32(i2, 16, 48)); }                             // z10
+inline void Assembler::z_exrl(Register r1, address a2)  { emit_48( EXRL_ZOPC   | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a2, pc()), 16, 48)); } // z10
+
+inline void Assembler::z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3) { emit_48( ECTG_ZOPC | reg(r3, 8, 48) | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm12(d2, 36, 48) | reg(b2, 32, 48)); }
+inline void Assembler::z_ecag(Register r1, Register r3, int64_t d2, Register b2)             { emit_48( ECAG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+
+
+//------------------------------
+// Interlocked-Update
+//------------------------------
+inline void Assembler::z_laa(  Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAA_ZOPC   | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laag( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAG_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laal( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAL_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laalg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAALG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lan(  Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAN_ZOPC   | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lang( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LANG_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lax(  Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAX_ZOPC   | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laxg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAXG_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lao(  Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAO_ZOPC   | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laog( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAOG_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_laa(  Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laa(  r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laag( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laag( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laal( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laal( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laalg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laalg(r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lan(  Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lan(  r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lang( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lang( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lax(  Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lax(  r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laxg( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laxg( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lao(  Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lao(  r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laog( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laog( r1, r3, a.disp12(), a.base()); }
+
+//--------------------------------
+// Execution Prediction
+//--------------------------------
+inline void Assembler::z_pfd(  int64_t m1, int64_t d2, Register x2, Register b2) { emit_48( PFD_ZOPC   | uimm4(m1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_pfd(  int64_t m1, Address a)                            { z_pfd(m1, a.disp(), a.indexOrR0(), a.base()); }
+inline void Assembler::z_pfdrl(int64_t m1, int64_t i2)                           { emit_48( PFDRL_ZOPC | uimm4(m1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_bpp(  int64_t m1, int64_t i2, int64_t d3, Register b3)  { emit_48( BPP_ZOPC   | uimm4(m1, 8, 48) | uimm12(d3, 20, 48) | reg(b3, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_bprp( int64_t m1, int64_t i2, int64_t i3)               { emit_48( BPRP_ZOPC  | uimm4(m1, 8, 48) | simm12(i2, 12, 48) | simm24(i3, 24, 48)); }
+
+//-------------------------------
+// Transaction Control
+//-------------------------------
+inline void Assembler::z_tbegin( int64_t d1, Register b1, int64_t i2) { emit_48( TBEGIN_ZOPC  | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tbeginc(int64_t d1, Register b1, int64_t i2) { emit_48( TBEGINC_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tend()                                       { emit_32( TEND_ZOPC); }
+inline void Assembler::z_tabort( int64_t d2, Register b2)             { emit_32( TABORT_ZOPC  | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_etnd(Register r1)                            { emit_32( ETND_ZOPC    | regt(r1, 24, 32)); }
+inline void Assembler::z_ppa(Register r1, Register r2, int64_t m3)    { emit_32( PPA_ZOPC     | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+
+//---------------------------------
+// Conditional Execution
+//---------------------------------
+inline void Assembler::z_locr(  Register r1, Register r2, branch_condition cc)             { emit_32( LOCR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); }               // z196
+inline void Assembler::z_locgr( Register r1, Register r2, branch_condition cc)             { emit_32( LOCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); }               // z196
+inline void Assembler::z_loc(   Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOC_ZOPC   | regt(r1,  8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_locg(  Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOCG_ZOPC  | regt(r1,  8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_loc(   Register r1, const Address &a, branch_condition cc)        { z_loc(r1, a.disp(), a.base(), cc); }
+inline void Assembler::z_locg(  Register r1, const Address &a, branch_condition cc)        { z_locg(r1, a.disp(), a.base(), cc); }
+inline void Assembler::z_stoc(  Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOC_ZOPC  | regt(r1,  8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_stocg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOCG_ZOPC | regt(r1,  8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+
+inline void Assembler::z_srst( Register r1, Register r2) { emit_32( SRST_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_srstu(Register r1, Register r2) { emit_32( SRSTU_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+//---------------------------------
+// Address calculation
+//---------------------------------
+inline void Assembler::z_layz(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | reg(b2, 16, 48)); }
+inline void Assembler::z_lay( Register r1, const Address &a)                     { z_layz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_laz( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_la(  Register r1, const Address &a)                     { z_laz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_la(  Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32));}
+inline void Assembler::z_larl(Register r1, int64_t i2)    { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_larl(Register r1, address a)     { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+
+inline void Assembler::z_lr(Register r1, Register r2)                          { emit_16( LR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_lgr(Register r1, Register r2)                         { emit_32( LGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lh(Register r1, int64_t d2, Register x2, Register b2) { emit_32( LH_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_lh(Register r1, const Address &a)                     { z_lh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_l(Register r1, int64_t d2, Register x2, Register b2)  { emit_32( L_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_l(Register r1, const Address &a)                      { z_l(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lg(Register r1, const Address &a)                     { z_lg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lbr(  Register r1, Register r2) { emit_32( LBR_ZOPC   | regt(r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_lhr(  Register r1, Register r2) { emit_32( LHR_ZOPC   | regt(r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_lgbr( Register r1, Register r2) { emit_32( LGBR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lghr( Register r1, Register r2) { emit_32( LGHR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lgfr( Register r1, Register r2) { emit_32( LGFR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llhr( Register r1, Register r2) { emit_32( LLHR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llgcr(Register r1, Register r2) { emit_32( LLGCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llghr(Register r1, Register r2) { emit_32( LLGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llgfr(Register r1, Register r2) { emit_32( LLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_sth(Register r1, const Address &a)                     { z_sth(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sth(Register r1, int64_t d2, Register x2, Register b2) { emit_32( STH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_st( Register r1, const Address& d)                     { z_st(r1, d.disp(), d.indexOrR0(), d.base()); }
+inline void Assembler::z_st( Register r1, int64_t d2, Register x2, Register b2) { emit_32( ST_ZOPC  | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stg(Register r1, const Address& d)                     { z_stg(r1, d.disp(), d.indexOrR0(), d.base()); }
+inline void Assembler::z_stg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( STG_ZOPC | reg(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_stcm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( STCM_ZOPC  | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2)         | regz(b2, 16, 48)); }
+inline void Assembler::z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2)         | regz(b2, 16, 48)); }
+
+// memory-immediate instructions (8-bit immediate)
+inline void Assembler::z_cli( int64_t d1, Register b1, int64_t i2) { emit_32( CLI_ZOPC  | uimm12(d1, 20, 32) | regz(b1, 16, 32) | uimm8(i2, 8, 32)); }
+inline void Assembler::z_mvi( int64_t d1, Register b1, int64_t i2) { emit_32( MVI_ZOPC  | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_tm(  int64_t d1, Register b1, int64_t i2) { emit_32( TM_ZOPC   | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_ni(  int64_t d1, Register b1, int64_t i2) { emit_32( NI_ZOPC   | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_oi(  int64_t d1, Register b1, int64_t i2) { emit_32( OI_ZOPC   | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_xi(  int64_t d1, Register b1, int64_t i2) { emit_32( XI_ZOPC   | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_cliy(int64_t d1, Register b1, int64_t i2) { emit_48( CLIY_ZOPC | simm20(d1)         | regz(b1, 16, 48) | uimm8(i2, 8, 48)); }
+inline void Assembler::z_mviy(int64_t d1, Register b1, int64_t i2) { emit_48( MVIY_ZOPC | simm20(d1)         | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_tmy( int64_t d1, Register b1, int64_t i2) { emit_48( TMY_ZOPC  | simm20(d1)         | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_niy( int64_t d1, Register b1, int64_t i2) { emit_48( NIY_ZOPC  | simm20(d1)         | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_oiy( int64_t d1, Register b1, int64_t i2) { emit_48( OIY_ZOPC  | simm20(d1)         | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_xiy( int64_t d1, Register b1, int64_t i2) { emit_48( XIY_ZOPC  | simm20(d1)         | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+
+inline void Assembler::z_cli( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_cli( a.disp12(), a.base(), imm); }
+inline void Assembler::z_mvi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_mvi( a.disp12(), a.base(), imm); }
+inline void Assembler::z_tm(  const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_tm(  a.disp12(), a.base(), imm); }
+inline void Assembler::z_ni(  const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_ni(  a.disp12(), a.base(), imm); }
+inline void Assembler::z_oi(  const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_oi(  a.disp12(), a.base(), imm); }
+inline void Assembler::z_xi(  const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI");  z_xi(  a.disp12(), a.base(), imm); }
+inline void Assembler::z_cliy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLIY"); z_cliy(a.disp20(), a.base(), imm); }
+inline void Assembler::z_mviy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in MVIY"); z_mviy(a.disp20(), a.base(), imm); }
+inline void Assembler::z_tmy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in TMY");  z_tmy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_niy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in NIY");  z_niy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_oiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in OIY");  z_oiy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_xiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in XIY");  z_xiy( a.disp20(), a.base(), imm); }
+
+
+inline void Assembler::z_mvc(const Address& d, const Address& s, int64_t l) {
+  assert(!d.has_index() && !s.has_index(), "Address operand can not be encoded.");
+  z_mvc(d.disp(), l-1, d.base(), s.disp(), s.base());
+}
+inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+
+inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhi ( int64_t d1, Register b1, int64_t i2) { emit_48( MVHI_ZOPC  | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvghi( int64_t d1, Register b1, int64_t i2) { emit_48( MVGHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhhi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
+inline void Assembler::z_mvhi ( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHI");  z_mvghi( d.disp(), d.baseOrR0(), i2); }
+inline void Assembler::z_mvghi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVGHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
+
+inline void Assembler::z_ex(Register r1, int64_t d2, Register x2, Register b2) { emit_32( EX_ZOPC | regz(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+
+inline void Assembler::z_ic  (Register r1, int64_t d2, Register x2, Register b2) { emit_32( IC_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_icy (Register r1, int64_t d2, Register x2, Register b2) { emit_48( ICY_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_icm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( ICM_ZOPC  | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_icmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2)         | regz(b2, 16, 48)); }
+inline void Assembler::z_icmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2)         | regz(b2, 16, 48)); }
+inline void Assembler::z_iihh(Register r1, int64_t i2) { emit_32( IIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iihl(Register r1, int64_t i2) { emit_32( IIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iilh(Register r1, int64_t i2) { emit_32( IILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iill(Register r1, int64_t i2) { emit_32( IILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iihf(Register r1, int64_t i2) { emit_48( IIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_iilf(Register r1, int64_t i2) { emit_48( IILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_lgf(Register r1, const Address& a) { z_lgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgf(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lhy(Register r1, const Address &a) { z_lhy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lhy(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lgh(Register r1, const Address &a) { z_lgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgh(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lt(Register r1, const Address &a) { z_lt(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lt (Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LT_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ltg(Register r1, const Address &a) { z_ltg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ltg(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LTG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ltgf(Register r1, const Address &a) { z_ltgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ltgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTGF_ZOPC| regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lb(Register r1, const Address &a) { z_lb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lb (Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LB_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lgb(Register r1, const Address &a) { z_lgb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgb(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LGB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ly(Register r1, const Address &a) { z_ly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ly(Register r1, int64_t d2, Register x2, Register b2)   { emit_48( LY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llc(Register r1, const Address& a) { z_llc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llc(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LLC_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llh(Register r1, const Address &a) { z_llh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llh(Register r1, int64_t d2, Register x2, Register b2)  { emit_48( LLH_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgf(Register r1, const Address &a) { z_llgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgh(Register r1, const Address &a) { z_llgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgc(Register r1, const Address &a) { z_llgc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgc(Register r1, int64_t d2, Register b2)              { z_llgc( r1, d2, Z_R0, b2); }
+inline void Assembler::z_lhi(Register r1, int64_t i2) { emit_32( LHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lghi(Register r1, int64_t i2) { emit_32( LGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lgfi(Register r1, int64_t i2) { emit_48( LGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llihf(Register r1, int64_t i2) { emit_48( LLIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_llilf(Register r1, int64_t i2) { emit_48( LLILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_llihh(Register r1, int64_t i2) { emit_32( LLIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llihl(Register r1, int64_t i2) { emit_32( LLIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llilh(Register r1, int64_t i2) { emit_32( LLILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llill(Register r1, int64_t i2) { emit_32( LLILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+
+// allow "monadic" use
+inline void Assembler::z_lcr(  Register r1, Register r2) { emit_16( LCR_ZOPC   | regt( r1,  8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lcgr( Register r1, Register r2) { emit_32( LCGR_ZOPC  | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lnr(  Register r1, Register r2) { emit_16( LNR_ZOPC   | regt( r1,  8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC  | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+
+inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_ltr(  Register r1, Register r2) { emit_16( LTR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ltgr( Register r1, Register r2) { emit_32( LTGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ltgfr(Register r1, Register r2) { emit_32( LTGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_stc(  Register r1, const Address &a) { z_stc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stc(  Register r1, int64_t d2, Register x2, Register b2) { emit_32( STC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stcy( Register r1, const Address &a) { z_stcy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stcy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STCY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sthy( Register r1, const Address &a) { z_sthy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sthy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sty(  Register r1, const Address &a) { z_sty(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sty(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( STY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_stfle(int64_t d2, Register b2) { emit_32(STFLE_ZOPC | uimm12(d2,20,32) | regz(b2,16,32)); }
+
+
+//-----------------------------------
+// SHIFT/RORATE OPERATIONS
+//-----------------------------------
+inline void Assembler::z_sla( Register r1,              int64_t d2, Register b2) { emit_32( SLA_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_sra( Register r1,              int64_t d2, Register b2) { emit_32( SRA_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_sll( Register r1,              int64_t d2, Register b2) { emit_32( SLL_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_srl( Register r1,              int64_t d2, Register b2) { emit_32( SRL_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+
+// rotate left
+inline void Assembler::z_rll( Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLL_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+inline void Assembler::z_rllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLLG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+
+// Rotate the AND/XOR/OR/insert
+inline void Assembler::z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then AND selected bits.  -- z196
+  const int64_t len = 48;
+  assert(Immediate::is_uimm(spos3, 6), "range start out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(epos4, 6), "range end   out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+  emit_48( RNSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then XOR selected bits.  -- z196
+  const int64_t len = 48;
+  assert(Immediate::is_uimm(spos3, 6), "range start out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(epos4, 6), "range end   out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+  emit_48( RXSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then OR selected bits.  -- z196
+  const int64_t len = 48;
+  assert(Immediate::is_uimm(spos3, 6), "range start out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(epos4, 6), "range end   out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+  emit_48( ROSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest) { // Rotate then INS selected bits.  -- z196
+  const int64_t len = 48;
+  assert(Immediate::is_uimm(spos3, 6), "range start out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(epos4, 6), "range end   out of range");   // Could just trim to 6bits wide w/o assertion.
+  assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+  emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1));
+}
+
+
+//------------------------------
+// LOGICAL OPERATIONS
+//------------------------------
+inline void Assembler::z_n(   Register r1, int64_t d2, Register x2, Register b2) { emit_32( N_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ny(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( NY_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ng(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( NG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_n(   Register r1, const Address& a) { z_n( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ny(  Register r1, const Address& a) { z_ny(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ng(  Register r1, const Address& a) { z_ng(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_nr(  Register r1, Register r2)              { emit_16( NR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ngr( Register r1, Register r2)              { emit_32( NGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_nrk( Register r1, Register r2, Register r3) { emit_32( NRK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_ngrk(Register r1, Register r2, Register r3) { emit_32( NGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_nihh(Register r1, int64_t i2) { emit_32( NIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nihl(Register r1, int64_t i2) { emit_32( NIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nilh(Register r1, int64_t i2) { emit_32( NILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nill(Register r1, int64_t i2) { emit_32( NILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nihf(Register r1, int64_t i2) { emit_48( NIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_nilf(Register r1, int64_t i2) { emit_48( NILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_o(   Register r1, int64_t d2, Register x2, Register b2) { emit_32( O_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_oy(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( OY_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_og(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( OG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_o(   Register r1, const Address& a) { z_o( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_oy(  Register r1, const Address& a) { z_oy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_og(  Register r1, const Address& a) { z_og(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_or(  Register r1, Register r2)              { emit_16( OR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ogr( Register r1, Register r2)              { emit_32( OGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ork( Register r1, Register r2, Register r3) { emit_32( ORK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_ogrk(Register r1, Register r2, Register r3) { emit_32( OGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_oihh(Register r1, int64_t i2) { emit_32( OIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oihl(Register r1, int64_t i2) { emit_32( OIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oilh(Register r1, int64_t i2) { emit_32( OILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oill(Register r1, int64_t i2) { emit_32( OILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oihf(Register r1, int64_t i2) { emit_48( OIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_oilf(Register r1, int64_t i2) { emit_48( OILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_x(   Register r1, int64_t d2, Register x2, Register b2) { emit_32( X_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_xy(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( XY_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_xg(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( XG_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_x(   Register r1, const Address& a) { z_x( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_xy(  Register r1, const Address& a) { z_xy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_xg(  Register r1, const Address& a) { z_xg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_xr(  Register r1, Register r2)              { emit_16( XR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_xgr( Register r1, Register r2)              { emit_32( XGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_xrk( Register r1, Register r2, Register r3) { emit_32( XRK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_xgrk(Register r1, Register r2, Register r3) { emit_32( XGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_xihf(Register r1, int64_t i2) { emit_48( XIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_xilf(Register r1, int64_t i2) { emit_48( XILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( NC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( OC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( XC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_nc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_nc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+inline void Assembler::z_oc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_oc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+inline void Assembler::z_xc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_xc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+
+
+//---------------
+// ADD
+//---------------
+inline void Assembler::z_a(   Register r1, int64_t d2, Register x2, Register b2) { emit_32( A_ZOPC    | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ay(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( AY_ZOPC   | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_al(  Register r1, int64_t d2, Register x2, Register b2) { emit_32( AL_ZOPC   | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_aly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALY_ZOPC  | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ag(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( AG_ZOPC   | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_agf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AGF_ZOPC  | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_alg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALG_ZOPC  | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_algf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALGF_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_a(   Register r1, const Address& a) { z_a(   r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ay(  Register r1, const Address& a) { z_ay(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_al(  Register r1, const Address& a) { z_al(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_aly( Register r1, const Address& a) { z_aly( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ag(  Register r1, const Address& a) { z_ag(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_agf( Register r1, const Address& a) { z_agf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_alg( Register r1, const Address& a) { z_alg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_algf(Register r1, const Address& a) { z_algf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_ar(  Register r1, Register r2) { emit_16( AR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_agr( Register r1, Register r2) { emit_32( AGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_agfr(Register r1, Register r2) { emit_32( AGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ark( Register r1, Register r2, Register r3) { emit_32( ARK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_agrk(Register r1, Register r2, Register r3) { emit_32( AGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_ahi(  Register r1, int64_t i2) { emit_32( AHI_ZOPC  | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_afi(  Register r1, int64_t i2) { emit_48( AFI_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_aghi( Register r1, int64_t i2) { emit_32( AGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_agfi( Register r1, int64_t i2) { emit_48( AGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_aih(  Register r1, int64_t i2) { emit_48( AIH_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_ahik( Register r1, Register r3, int64_t i2) { emit_48( AHIK_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+inline void Assembler::z_aghik(Register r1, Register r3, int64_t i2) { emit_48( AGHIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+
+
+//-----------------------
+// ADD LOGICAL
+//-----------------------
+inline void Assembler::z_alr(  Register r1, Register r2) { emit_16( ALR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_algr( Register r1, Register r2) { emit_32( ALGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_algfr(Register r1, Register r2) { emit_32( ALGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_alrk( Register r1, Register r2, Register r3) { emit_32( ALRK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_algrk(Register r1, Register r2, Register r3) { emit_32( ALGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_alcgr(Register r1, Register r2) { emit_32( ALCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_alfi( Register r1, int64_t i2) { emit_48( ALFI_ZOPC |  regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_algfi(Register r1, int64_t i2) { emit_48( ALGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+
+inline void Assembler::z_alhsik( Register r1, Register r3, int64_t i2) { emit_48( ALHSIK_ZOPC  | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+inline void Assembler::z_alghsik(Register r1, Register r3, int64_t i2) { emit_48( ALGHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+
+// In-memory arithmetic (add signed, add logical with signed immediate)
+inline void Assembler::z_asi(  int64_t d1, Register b1, int64_t i2) { emit_48( ASI_ZOPC   | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_agsi( int64_t d1, Register b1, int64_t i2) { emit_48( AGSI_ZOPC  | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_alsi( int64_t d1, Register b1, int64_t i2) { emit_48( ALSI_ZOPC  | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_algsi(int64_t d1, Register b1, int64_t i2) { emit_48( ALGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_asi(  const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ASI");   z_asi(  d.disp(), d.base(), i2); }
+inline void Assembler::z_agsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in AGSI");  z_agsi( d.disp(), d.base(), i2); }
+inline void Assembler::z_alsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALSI");  z_alsi( d.disp(), d.base(), i2); }
+inline void Assembler::z_algsi(const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALGSI"); z_algsi(d.disp(), d.base(), i2); }
+
+
+//--------------------
+// SUBTRACT
+//--------------------
+inline void Assembler::z_s(   Register r1, int64_t d2, Register x2, Register b2) { emit_32( S_ZOPC    | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_sy(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( SY_ZOPC   | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sg(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( SG_ZOPC   | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SGF_ZOPC  | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_slg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLG_ZOPC  | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_slgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLGF_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_s(   Register r1, const Address& a) { z_s(   r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sy(  Register r1, const Address& a) { z_sy(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sg(  Register r1, const Address& a) { z_sg(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sgf( Register r1, const Address& a) { z_sgf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_slg( Register r1, const Address& a) { z_slg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_slgf(Register r1, const Address& a) { z_slgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_sr(  Register r1, Register r2) { emit_16( SR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_sgr( Register r1, Register r2) { emit_32( SGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_sgfr(Register r1, Register r2) { emit_32( SGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_srk( Register r1, Register r2, Register r3) { emit_32( SRK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_sgrk(Register r1, Register r2, Register r3) { emit_32( SGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_sh(  Register r1, int64_t d2, Register x2, Register b2) { emit_32( SH_ZOPC  | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_shy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SHY_ZOPC | regt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sh(  Register r1, const Address &a) { z_sh( r1, a.disp(), a.indexOrR0(), a.base()); }
+inline void Assembler::z_shy( Register r1, const Address &a) { z_shy(r1, a.disp(), a.indexOrR0(), a.base()); }
+
+
+//----------------------------
+// SUBTRACT LOGICAL
+//----------------------------
+inline void Assembler::z_slr(  Register r1, Register r2) { emit_16( SLR_ZOPC   | regt(r1,  8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_slgr( Register r1, Register r2) { emit_32( SLGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_slgfr(Register r1, Register r2) { emit_32( SLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_slrk( Register r1, Register r2, Register r3) { emit_32(SLRK_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_slgrk(Register r1, Register r2, Register r3) { emit_32(SLGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_slfi( Register r1, int64_t i2) { emit_48( SLFI_ZOPC  | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_slgfi(Register r1, int64_t i2) { emit_48( SLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+
+
+//--------------------
+// MULTIPLY
+//--------------------
+inline void Assembler::z_msr(  Register r1, Register r2) { emit_32( MSR_ZOPC   | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_msgr( Register r1, Register r2) { emit_32( MSGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_msgfr(Register r1, Register r2) { emit_32( MSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_mlr(  Register r1, Register r2) { emit_32( MLR_ZOPC   | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_mlgr( Register r1, Register r2) { emit_32( MLGR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_mhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MHY_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSY_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSG_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ml(  Register r1, int64_t d2, Register x2, Register b2) { emit_48( ML_ZOPC   | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_mlg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MLG_ZOPC  | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_mhy( Register r1, const Address& a) { z_mhy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msy( Register r1, const Address& a) { z_msy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msg( Register r1, const Address& a) { z_msg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msgf(Register r1, const Address& a) { z_msgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ml(  Register r1, const Address& a) { z_ml(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_mlg( Register r1, const Address& a) { z_mlg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_msfi( Register r1, int64_t i2) { emit_48( MSFI_ZOPC  | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_msgfi(Register r1, int64_t i2) { emit_48( MSGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_mhi(  Register r1, int64_t i2) { emit_32( MHI_ZOPC   | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_mghi( Register r1, int64_t i2) { emit_32( MGHI_ZOPC  | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+
+
+//------------------
+// DIVIDE
+//------------------
+inline void Assembler::z_dsgr( Register r1, Register r2) { emit_32( DSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_dsgfr(Register r1, Register r2) { emit_32( DSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+
+//-------------------
+// COMPARE
+//-------------------
+inline void Assembler::z_cr(  Register r1, Register r2) { emit_16( CR_ZOPC   | reg(r1,  8, 16) | reg(r2,12,16)); }
+inline void Assembler::z_cgr( Register r1, Register r2) { emit_32( CGR_ZOPC  | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_cgfr(Register r1, Register r2) { emit_32( CGFR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_chi( Register r1, int64_t i2)  { emit_32( CHI_ZOPC  | reg(r1,  8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_cghi(Register r1, int64_t i2)  { emit_32( CGHI_ZOPC | reg(r1,  8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_cfi( Register r1, int64_t i2)  { emit_48( CFI_ZOPC  | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_cgfi(Register r1, int64_t i2)  { emit_48( CGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_ch(Register r1, const Address &a) { z_ch(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ch(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_c(Register r1, const Address &a) { z_c(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_c(Register r1,  int64_t d2, Register x2, Register b2) { emit_32( C_ZOPC  | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_cy(Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cy(Register r1, int64_t d2, Register b2) { z_cy(r1, d2, Z_R0, b2); }
+inline void Assembler::z_cg(Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_clr(Register r1, Register r2) { emit_16( CLR_ZOPC | reg(r1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_clgr(Register r1, Register r2) { emit_32( CLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+
+inline void Assembler::z_clfi(Register r1, int64_t i2)  { emit_48( CLFI_ZOPC  | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_clgfi(Register r1, int64_t i2) { emit_48( CLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_cl(Register r1, const Address &a) { z_cl(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cl(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CL_ZOPC | regt(r1, 8, 32) | uimm12(d2,20,32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_cly(Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cly(Register r1, int64_t d2, Register b2) { z_cly(r1, d2, Z_R0, b2); }
+inline void Assembler::z_clg(Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_clg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_clclu(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CLCLU_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | uimm12(d2, 20, 48) | reg(b2, 16, 48)); }
+
+inline void Assembler::z_tmll(Register r1, int64_t i2) { emit_32( TMLL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmlh(Register r1, int64_t i2) { emit_32( TMLH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmhl(Register r1, int64_t i2) { emit_32( TMHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmhh(Register r1, int64_t i2) { emit_32( TMHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+
+// translate characters
+inline void Assembler::z_troo(Register r1, Register r2, int64_t m3) { emit_32( TROO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trot(Register r1, Register r2, int64_t m3) { emit_32( TROT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trto(Register r1, Register r2, int64_t m3) { emit_32( TRTO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trtt(Register r1, Register r2, int64_t m3) { emit_32( TRTT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+
+// signed comparison
+inline void Assembler::z_crb(Register r1,  Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CRB_ZOPC  | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_crj(Register r1,  Register r2, branch_condition m3, address a4)              { emit_48( CRJ_ZOPC  | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, address a4)              { emit_48( CGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cib(Register r1,  int64_t i2, branch_condition m3, int64_t d4, Register b4)  { emit_48( CIB_ZOPC  | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4)  { emit_48( CGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cij(Register r1,  int64_t i2, branch_condition m3, address a4)               { emit_48( CIJ_ZOPC  | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, address a4)               { emit_48( CGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+// unsigned comparison
+inline void Assembler::z_clrb(Register r1,  Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLRB_ZOPC  | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clrj(Register r1,  Register r2, branch_condition m3, address a4)              { emit_48( CLRJ_ZOPC  | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, address a4)              { emit_48( CLGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clib(Register r1,  int64_t i2, branch_condition m3, int64_t d4, Register b4)  { emit_48( CLIB_ZOPC  | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4)  { emit_48( CLGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clij(Register r1,  int64_t i2, branch_condition m3, address a4)               { emit_48( CLIJ_ZOPC  | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, address a4)               { emit_48( CLGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
+
+// Compare and trap instructions (signed).
+inline void Assembler::z_crt(Register  r1, Register r2, int64_t m3)  { emit_32( CRT_ZOPC   | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_cgrt(Register r1, Register r2, int64_t m3)  { emit_32( CGRT_ZOPC  | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_cit(Register  r1, int64_t i2, int64_t m3)   { emit_48( CIT_ZOPC   | reg(r1,  8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgit(Register r1, int64_t i2, int64_t m3)   { emit_48( CGIT_ZOPC  | reg(r1,  8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+
+// Compare and trap instructions (unsigned).
+inline void Assembler::z_clrt(Register  r1, Register r2, int64_t m3) { emit_32( CLRT_ZOPC  | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clgrt(Register r1, Register r2, int64_t m3) { emit_32( CLGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clfit(Register r1, int64_t i2, int64_t m3)  { emit_48( CLFIT_ZOPC | reg(r1,  8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgit(Register r1, int64_t i2, int64_t m3)  { emit_48( CLGIT_ZOPC | reg(r1,  8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+
+inline void Assembler::z_bc(  branch_condition m1, int64_t d2, Register x2, Register b2) { emit_32( BC_ZOPC | 0 << 16 | uimm4(m1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_ZOPC | uimm4(m1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_brc( branch_condition i1, int64_t i2)  { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_brc( branch_condition i1, address a)   { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brcl(branch_condition i1, address a)   { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48)| simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+inline void Assembler::z_bctgr(Register r1, Register r2)        { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };
+
+inline void Assembler::z_basr(Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
+
+inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+
+inline void Assembler::z_brct(Register r1, address a) { emit_32( BRCT_ZOPC | regt(r1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brct(Register r1, Label& L) {z_brct(r1, target(L)); }
+
+inline void Assembler::z_brxh(Register r1, Register r3, address a) {emit_32( BRXH_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32)  | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
+inline void Assembler::z_brxh(Register r1, Register r3, Label& L) {z_brxh(r1, r3, target(L)); }
+
+inline void Assembler::z_brxle(Register r1, Register r3, address a) {emit_32( BRXLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
+inline void Assembler::z_brxle(Register r1, Register r3, Label& L) {z_brxle(r1, r3, target(L)); }
+
+inline void Assembler::z_brxhg(Register r1, Register r3, address a) {emit_48( BRXHG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
+inline void Assembler::z_brxhg(Register r1, Register r3, Label& L) {z_brxhg(r1, r3, target(L)); }
+
+inline void Assembler::z_brxlg(Register r1, Register r3, address a) {emit_48( BRXLG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
+inline void Assembler::z_brxlg(Register r1, Register r3, Label& L) {z_brxlg(r1, r3, target(L)); }
+
+inline void Assembler::z_flogr(Register r1, Register r2) { emit_32( FLOGR_ZOPC  | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_popcnt(Register r1, Register r2) { emit_32( POPCNT_ZOPC  | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32( AHHHR_ZOPC  | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC  | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_tam() { emit_16( TAM_ZOPC); }
+inline void Assembler::z_stck(int64_t d2, Register b2)  { emit_32( STCK_ZOPC  | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
+inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2)  { emit_48( LMG_ZOPC  | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
+
+inline void Assembler::z_cs(Register r1, Register r3, int64_t d2, Register b2)  { emit_32( CS_ZOPC  | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
+inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_cs( Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_cs( r1, r3, a.disp(), a.baseOrR0()); }
+inline void Assembler::z_csy(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csy(r1, r3, a.disp(), a.baseOrR0()); }
+inline void Assembler::z_csg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csg(r1, r3, a.disp(), a.baseOrR0()); }
+
+inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)  { emit_32( CVD_ZOPC  | regt(r1, 8, 32) | reg(x2, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
+inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+
+
+//-------------------------------
+// FLOAT INSTRUCTIONS
+//-------------------------------
+
+//----------------
+// LOAD
+//----------------
+inline void Assembler::z_ler(  FloatRegister r1, FloatRegister r2) { emit_16( LER_ZOPC   | fregt(r1,8,16)    | freg(r2,12,16));   }
+inline void Assembler::z_ldr(  FloatRegister r1, FloatRegister r2) { emit_16( LDR_ZOPC   | fregt(r1,8,16)    | freg(r2,12,16));   }
+inline void Assembler::z_ldebr(FloatRegister r1, FloatRegister r2) { emit_32( LDEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_ledbr(FloatRegister r1, FloatRegister r2) { emit_32( LEDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_le( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LE_ZOPC  | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LEY_ZOPC | fregt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LD_ZOPC  | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LDY_ZOPC | fregt(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_le( FloatRegister r1, const Address &a) { z_le( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ley(FloatRegister r1, const Address &a) { z_ley(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ld( FloatRegister r1, const Address &a) { z_ld( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ldy(FloatRegister r1, const Address &a) { z_ldy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lzdr(FloatRegister r1) { emit_32( LZDR_ZOPC | fregt(r1, 24, 32)); }
+inline void Assembler::z_lzer(FloatRegister f1) { emit_32( LZER_ZOPC | fregt(f1, 24, 32)); }
+
+
+//-----------------
+// STORE
+//-----------------
+inline void Assembler::z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STE_ZOPC  | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STEY_ZOPC | freg(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_std( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STD_ZOPC  | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STDY_ZOPC | freg(r1, 8, 48) | simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ste( FloatRegister r1, const Address &a) { z_ste( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stey(FloatRegister r1, const Address &a) { z_stey(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_std( FloatRegister r1, const Address &a) { z_std( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stdy(FloatRegister r1, const Address &a) { z_stdy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// ADD
+//---------------
+inline void Assembler::z_aebr( FloatRegister f1, FloatRegister f2)                  { emit_32( AEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_adbr( FloatRegister f1, FloatRegister f2)                  { emit_32( ADBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_aeb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( AEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_adb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( ADB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_aeb(  FloatRegister r1, const Address& a)                   { z_aeb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_adb(  FloatRegister r1, const Address& a)                   { z_adb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// SUB
+//---------------
+inline void Assembler::z_sebr( FloatRegister f1, FloatRegister f2)                  { emit_32( SEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_sdbr( FloatRegister f1, FloatRegister f2)                  { emit_32( SDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_seb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_sdb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_seb(  FloatRegister r1, const Address& a)                   { z_seb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sdb(  FloatRegister r1, const Address& a)                   { z_sdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lcebr(FloatRegister r1, FloatRegister r2)                   { emit_32( LCEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_lcdbr(FloatRegister r1, FloatRegister r2)                   { emit_32( LCDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+
+inline void Assembler::z_lpdbr( FloatRegister fr1, FloatRegister fr2) { emit_32( LPDBR_ZOPC | fregt( fr1, 24,32) | freg((fr2 == fnoreg) ? fr1:fr2, 28, 32)); }
+
+
+//---------------
+// MUL
+//---------------
+inline void Assembler::z_meebr(FloatRegister f1, FloatRegister f2)                  { emit_32( MEEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_mdbr( FloatRegister f1, FloatRegister f2)                  { emit_32( MDBR_ZOPC  | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MEEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_mdb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MDB_ZOPC  | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_meeb( FloatRegister r1, const Address& a)                   { z_meeb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_mdb(  FloatRegister r1, const Address& a)                   { z_mdb(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// DIV
+//---------------
+inline void Assembler::z_debr( FloatRegister f1, FloatRegister f2)                      { emit_32( DEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_ddbr( FloatRegister f1, FloatRegister f2)                      { emit_32( DDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_deb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DEB_ZOPC  | fregt( f1, 8, 48)  | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_ddb(  FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DDB_ZOPC  | fregt( f1, 8, 48)  | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_deb(  FloatRegister r1, const Address& a)                      { z_deb(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ddb(  FloatRegister r1, const Address& a)                      { z_ddb(  r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// square root
+//---------------
+inline void Assembler::z_sqdbr(FloatRegister f1, FloatRegister f2)                       { emit_32(SQDBR_ZOPC | fregt(f1, 24, 32)  | freg(f2, 28, 32)); }
+inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2 ) { emit_48( SQDB_ZOPC | fregt( fr1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register b2)               { z_sqdb( fr1, d2, Z_R0, b2);}
+
+
+//---------------
+// CMP
+//---------------
+inline void Assembler::z_cebr(FloatRegister r1, FloatRegister r2)                    { emit_32( CEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CEB_ZOPC  | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ceb(FloatRegister r1, const Address &a)                     { z_ceb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cdbr(FloatRegister r1, FloatRegister r2)                    { emit_32( CDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CDB_ZOPC  | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cdb(FloatRegister r1, const Address &a)                     { z_cdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//------------------------------------
+// FLOAT <-> INT conversion
+//------------------------------------
+inline void Assembler::z_ldgr(FloatRegister r1, Register r2)                  { emit_32( LDGR_ZOPC  | fregt(r1, 24, 32)  | reg(r2, 28, 32));  }
+inline void Assembler::z_lgdr(Register r1, FloatRegister r2)                  { emit_32( LGDR_ZOPC  | regt( r1, 24, 32)  | freg(r2, 28, 32)); }
+
+inline void Assembler::z_cefbr( FloatRegister r1, Register r2)                { emit_32( CEFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cdfbr( FloatRegister r1, Register r2)                { emit_32( CDFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cegbr( FloatRegister r1, Register r2)                { emit_32( CEGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cdgbr( FloatRegister r1, Register r2)                { emit_32( CDGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+
+inline void Assembler::z_cfebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cfdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cgebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cgdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+
+
+  inline void Assembler::z_layz(Register r1, int64_t d2, Register b2)      { z_layz(r1, d2, Z_R0, b2); }
+  inline void Assembler::z_lay(Register r1, int64_t d2, Register b2)       { z_lay( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_laz(Register r1, int64_t d2, Register b2)       { z_laz( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_la(Register r1, int64_t d2, Register b2)        { z_la(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_l(Register r1, int64_t d2, Register b2)         { z_l(   r1, d2, Z_R0, b2); }
+  inline void Assembler::z_ly(Register r1, int64_t d2, Register b2)        { z_ly(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_lg(Register r1, int64_t d2, Register b2)        { z_lg(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_st(Register r1, int64_t d2, Register b2)        { z_st(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_sty(Register r1, int64_t d2, Register b2)       { z_sty( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_stg(Register r1, int64_t d2, Register b2)       { z_stg( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_lgf(Register r1, int64_t d2, Register b2)       { z_lgf( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_lgh(Register r1, int64_t d2, Register b2)       { z_lgh( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_llgh(Register r1, int64_t d2, Register b2)      { z_llgh(r1, d2, Z_R0, b2); }
+  inline void Assembler::z_llgf(Register r1, int64_t d2, Register b2)      { z_llgf(r1, d2, Z_R0, b2); }
+  inline void Assembler::z_lgb(Register r1, int64_t d2, Register b2)       { z_lgb( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_cl( Register r1, int64_t d2, Register b2)       { z_cl(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_c(Register r1, int64_t d2, Register b2)         { z_c(   r1, d2, Z_R0, b2); }
+  inline void Assembler::z_cg(Register r1, int64_t d2, Register b2)        { z_cg(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_sh(Register r1, int64_t d2, Register b2)        { z_sh(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_shy(Register r1, int64_t d2, Register b2)       { z_shy( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_ste(FloatRegister r1, int64_t d2, Register b2)  { z_ste( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_std(FloatRegister r1, int64_t d2, Register b2)  { z_std( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register b2) { z_stdy(r1, d2, Z_R0, b2); }
+  inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register b2) { z_stey(r1, d2, Z_R0, b2); }
+  inline void Assembler::z_ld(FloatRegister r1, int64_t d2, Register b2)   { z_ld(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register b2)  { z_ldy( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_le(FloatRegister r1, int64_t d2, Register b2)   { z_le(  r1, d2, Z_R0, b2); }
+  inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register b2)  { z_ley( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_agf(Register r1, int64_t d2, Register b2)       { z_agf( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_cvd(Register r1, int64_t d2, Register b2)       { z_cvd( r1, d2, Z_R0, b2); }
+  inline void Assembler::z_cvdg(Register r1, int64_t d2, Register b2)      { z_cvdg(r1, d2, Z_R0, b2); }
+
+// signed comparison
+inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, Label& L)   { z_crj(  r1, r2, m3, target(L)); }
+inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, Label& L)  { z_cgrj( r1, r2, m3, target(L)); }
+inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, Label& L)    { z_cij(  r1, i2, m3, target(L)); }
+inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L)   { z_cgij( r1, i2, m3, target(L)); }
+// unsigned comparison
+inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, Label& L)  { z_clrj( r1, r2, m3, target(L)); }
+inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, Label& L) { z_clgrj(r1, r2, m3, target(L)); }
+inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, Label& L)   { z_clij( r1, i2, m3, target(L)); }
+inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L)  { z_clgij(r1, i2, m3, target(L)); }
+
+// branch never (nop), branch always
+inline void Assembler::z_nop() { z_bcr(bcondNop, Z_R0); }
+inline void Assembler::z_br(Register r2) { assert(r2 != Z_R0, "nop if target is Z_R0, use z_nop() instead"); z_bcr(bcondAlways, r2 ); }
+
+inline void Assembler::z_exrl(Register r1, Label& L) { z_exrl(r1, target(L)); }  // z10
+inline void Assembler::z_larl(Register r1, Label& L) { z_larl(r1, target(L)); }
+inline void Assembler::z_bru(   Label& L) { z_brc(bcondAlways,target(L)); }
+inline void Assembler::z_brul(  Label& L) { z_brcl(bcondAlways,target(L)); }
+inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a); }
+inline void Assembler::z_brh(   Label& L) { z_brc(bcondHigh,target(L)); }
+inline void Assembler::z_brl(   Label& L) { z_brc(bcondLow,target(L)); }
+inline void Assembler::z_bre(   Label& L) { z_brc(bcondEqual,target(L)); }
+inline void Assembler::z_brnh(  Label& L) { z_brc(bcondNotHigh,target(L)); }
+inline void Assembler::z_brnl(  Label& L) { z_brc(bcondNotLow,target(L)); }
+inline void Assembler::z_brne(  Label& L) { z_brc(bcondNotEqual,target(L)); }
+inline void Assembler::z_brz(   Label& L) { z_brc(bcondZero,target(L)); }
+inline void Assembler::z_brnz(  Label& L) { z_brc(bcondNotZero,target(L)); }
+inline void Assembler::z_braz(  Label& L) { z_brc(bcondAllZero,target(L)); }
+inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero,target(L)); }
+inline void Assembler::z_brnp(  Label& L) { z_brc( bcondNotPositive, target( L)); }
+inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne,target(L)); }
+inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero,target(L)); }
+inline void Assembler::z_brno(  Label& L) { z_brc(bcondNotOrdered,target(L)); }
+inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); }
+inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }
+
+
+// Instruction must start at passed address.
+// Extra check for illtraps with ID.
+inline int Assembler::instr_len(unsigned char *instr) {
+  switch ((*instr) >> 6) {
+    case 0: return 2;
+    case 1: // fallthru
+    case 2: return 4;
+    case 3: return 6;
+    default:
+      // Control can't reach here.
+      // The switch expression examines just the leftmost two bytes
+      // of the main opcode. So the range of values is just [0..3].
+      // Having a default clause makes the compiler happy.
+      ShouldNotReachHere();
+      return 0;
+  }
+}
+
+// Move instr at pc right-justified into passed long int.
+// Return instr len in bytes as function result.
+// Note: 2-byte instr don't really need to be accessed unsigned
+// because leftmost two bits are always zero. We use
+// unsigned here for reasons of uniformity.
+inline unsigned int Assembler::get_instruction(unsigned char *pc, unsigned long *instr) {
+  unsigned int len = instr_len(pc);
+  switch (len) {
+    case 2:
+      *instr = *(unsigned short*) pc; break;
+    case 4:
+      *instr = *(unsigned int*) pc; break;
+    case 6:
+      // Must compose this case. Can't read 8 bytes and then cut off
+      // the rightmost two bytes. Could potentially access
+      // unallocated storage.
+      *instr = ((unsigned long)(*(unsigned int*)   pc)) << 16 |
+               ((unsigned long)*(unsigned short*) (pc + 4)); break;
+    default:
+      // Control can't reach here.
+      // The length as returned from instr_len() can only be 2, 4, or 6 bytes.
+      // Having a default clause makes the compiler happy.
+      ShouldNotReachHere();
+      break;
+  }
+  return len;
+}
+
+// Check if instruction is the expected one.
+// Instruction is passed right-justified in inst.
+inline bool Assembler::is_equal(unsigned long inst, unsigned long idef) {
+  unsigned long imask;
+
+  if ((idef >> 32) != 0) { // 6byte instructions
+    switch (idef >> 40) {  // select mask by main opcode
+      case 0xc0:
+      case 0xc2:
+      case 0xc4:
+      case 0xc6: imask = RIL_MASK; break;
+      case 0xec:
+        if ((idef & 0x00ffL) < 0x0080L) {
+          imask = RIE_MASK;
+          break;
+        }
+        // Fallthru for other sub opcodes.
+      default:
+#ifdef ASSERT
+        tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef);
+        tty->flush();
+#endif
+        ShouldNotReachHere();
+        return 0;
+    }
+  } else {                 // 4-byte instructions
+    switch (idef >> 24) {  // Select mask by main opcode.
+      case 0x84:
+      case 0x85: imask = RSI_MASK; break;
+      case 0xa5:
+      case 0xa7: imask =  RI_MASK; break;
+      case 0xb9: imask = RRE_MASK; break; // RRE_MASK or RRF_MASK. Opcode fields are at same bit positions.
+      default: {
+#ifdef ASSERT
+        tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef);
+        tty->flush();
+#endif
+        ShouldNotReachHere();
+        return 0;
+      }
+    }
+  }
+  return (inst & imask) == idef;
+}
+
+inline bool Assembler::is_equal(unsigned long inst, unsigned long idef, unsigned long imask) {
+  assert(imask != 0, "valid instruction mask required");
+  return (inst & imask) == idef;
+}
+
+// Check if instruction is the expected one.
+// Instruction is passed left-justified at inst.
+inline bool Assembler::is_equal(address iloc, unsigned long idef) {
+  unsigned long inst;
+  get_instruction(iloc, &inst);
+  return is_equal(inst, idef);
+}
+
+inline bool Assembler::is_equal(address iloc, unsigned long idef, unsigned long imask) {
+  unsigned long inst;
+  get_instruction(iloc, &inst);
+  return is_equal(inst, idef, imask);
+}
+
+inline bool Assembler::is_sigtrap_range_check(address pc) {
+  return (is_equal(pc, CLFIT_ZOPC, RIE_MASK) || is_equal(pc, CLRT_ZOPC, RRE_MASK));
+}
+
+inline bool Assembler::is_sigtrap_zero_check(address pc) {
+  return (is_equal(pc, CGIT_ZOPC, RIE_MASK) || is_equal(pc, CIT_ZOPC, RIE_MASK));
+}
+
+#endif // CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/bytes_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_BYTES_S390_HPP
+#define CPU_S390_VM_BYTES_S390_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Efficient reading and writing of unaligned unsigned data in
+  // platform-specific byte ordering.
+
+  // Use regular load and store for unaligned access.
+  //
+  // On z/Architecture, unaligned loads and stores are supported when using the
+  // "traditional" load (LH, L/LY, LG) and store (STH, ST/STY, STG) instructions.
+  // The penalty for unaligned access is just very few (two or three) ticks,
+  // plus another few (two or three) ticks if the access crosses a cache line boundary.
+  //
+  // In short, it makes no sense on z/Architecture to piecemeal get or put unaligned data.
+
+  // Returns true if the byte ordering used by Java is different from
+  // the native byte ordering of the underlying machine.
+  // z/Arch is big endian, thus, a swap between native and Java ordering
+  // is always a no-op.
+  static inline bool is_Java_byte_ordering_different() { return false; }
+
+  // Only swap on little endian machines => suffix `_le'.
+  static inline u2   swap_u2_le(u2 x) { return x; }
+  static inline u4   swap_u4_le(u4 x) { return x; }
+  static inline u8   swap_u8_le(u8 x) { return x; }
+
+  static inline u2   get_native_u2(address p) { return *(u2*)p; }
+  static inline u4   get_native_u4(address p) { return *(u4*)p; }
+  static inline u8   get_native_u8(address p) { return *(u8*)p; }
+
+  static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; }
+  static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; }
+  static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; }
+
+#include "bytes_linux_s390.inline.hpp"
+
+  // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
+  static inline u2   get_Java_u2(address p) { return get_native_u2(p); }
+  static inline u4   get_Java_u4(address p) { return get_native_u4(p); }
+  static inline u8   get_Java_u8(address p) { return get_native_u8(p); }
+
+  static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); }
+  static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); }
+  static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); }
+};
+
+#endif // CPU_S390_VM_BYTES_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_s390.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+#undef  CHECK_BAILOUT
+#define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; }
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception) :
+  _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception),
+  _index(index) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+    ce->emit_call_c(a);
+    CHECK_BAILOUT();
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ should_not_reach_here());
+    return;
+  }
+
+  // Pass the array index in Z_R1_scratch which is not managed by linear scan.
+  if (_index->is_cpu_register()) {
+    __ lgr_if_needed(Z_R1_scratch, _index->as_register());
+  } else {
+    __ load_const_optimized(Z_R1_scratch, _index->as_jint());
+  }
+
+  Runtime1::StubID stub_id;
+  if (_throw_index_out_of_bounds_exception) {
+    stub_id = Runtime1::throw_index_exception_id;
+  } else {
+    stub_id = Runtime1::throw_range_check_failed_id;
+  }
+  ce->emit_call_c(Runtime1::entry_for (stub_id));
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  bool success = __ set_metadata_constant(m, Z_R1_scratch);
+  if (!success) {
+    ce->compilation()->bailout("const section overflow");
+    return;
+  }
+  ce->store_parameter(/*_method->as_register()*/ Z_R1_scratch, 1);
+  ce->store_parameter(_bci, 0);
+  ce->emit_call_c(Runtime1::entry_for (Runtime1::counter_overflow_id));
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  ce->emit_call_c(Runtime1::entry_for (Runtime1::throw_div0_exception_id));
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for (Runtime1::throw_null_pointer_exception_id);
+  }
+
+  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  __ bind(_entry);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+// Note: pass object in Z_R1_scratch
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_obj->is_valid()) {
+    __ z_lgr(Z_R1_scratch, _obj->as_register()); // _obj contains the optional argument to the stub
+  }
+  address a = Runtime1::entry_for (_stub);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+  address a = Runtime1::entry_for (_stub_id);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+  __ z_brul(_continuation);
+}
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+  __ lgr_if_needed(Z_R13, _length->as_register());
+  address a = Runtime1::entry_for (Runtime1::new_type_array_id);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+  __ z_brul(_continuation);
+}
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+  __ lgr_if_needed(Z_R13, _length->as_register());
+  address a = Runtime1::entry_for (Runtime1::new_object_array_id);
+  ce->emit_call_c(a);
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+  __ z_brul(_continuation);
+}
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+  : MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  Runtime1::StubID enter_id;
+  if (ce->compilation()->has_fpu_code()) {
+    enter_id = Runtime1::monitorenter_id;
+  } else {
+    enter_id = Runtime1::monitorenter_nofpu_id;
+  }
+  __ lgr_if_needed(Z_R1_scratch, _obj_reg->as_register());
+  __ lgr_if_needed(Z_R13, _lock_reg->as_register()); // See LIRGenerator::syncTempOpr().
+  ce->emit_call_c(Runtime1::entry_for (enter_id));
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  // Move address of the BasicObjectLock into Z_R1_scratch.
+  if (_compute_lock) {
+    // Lock_reg was destroyed by fast unlocking attempt => recompute it.
+    ce->monitor_address(_monitor_ix, FrameMap::as_opr(Z_R1_scratch));
+  } else {
+    __ lgr_if_needed(Z_R1_scratch, _lock_reg->as_register());
+  }
+  // Note: non-blocking leaf routine => no call info needed.
+  Runtime1::StubID exit_id;
+  if (ce->compilation()->has_fpu_code()) {
+    exit_id = Runtime1::monitorexit_id;
+  } else {
+    exit_id = Runtime1::monitorexit_nofpu_id;
+  }
+  ce->emit_call_c(Runtime1::entry_for (exit_id));
+  CHECK_BAILOUT();
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
+// - Replace original code with a call to the stub.
+// At Runtime:
+// - call to stub, jump to runtime.
+// - in runtime: Preserve all registers (especially objects, i.e., source and destination object).
+// - in runtime: After initializing class, restore original code, reexecute instruction.
+
+int PatchingStub::_patch_info_offset = - (12 /* load const */ + 2 /*BASR*/);
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+#ifndef PRODUCT
+  const char* bc;
+  switch (_id) {
+  case access_field_id: bc = "patch site (access_field)"; break;
+  case load_klass_id: bc = "patch site (load_klass)"; break;
+  case load_mirror_id: bc = "patch site (load_mirror)"; break;
+  case load_appendix_id: bc = "patch site (load_appendix)"; break;
+  default: bc = "patch site (unknown patch id)"; break;
+  }
+  masm->block_comment(bc);
+#endif
+
+  masm->align(round_to(NativeGeneralJump::instruction_size, wordSize));
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  // Copy original code here.
+  assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+         "not enough room for call");
+
+  NearLabel call_patch;
+
+  int being_initialized_entry = __ offset();
+
+  if (_id == load_klass_id) {
+    // Produce a copy of the load klass instruction for use by the case being initialized.
+#ifdef ASSERT
+    address start = __ pc();
+#endif
+    AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(_index));
+    __ load_const(_obj, addrlit);
+
+#ifdef ASSERT
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      assert(a_byte == *start++, "should be the same code");
+    }
+#endif
+  } else if (_id == load_mirror_id || _id == load_appendix_id) {
+    // Produce a copy of the load mirror instruction for use by the case being initialized.
+#ifdef ASSERT
+    address start = __ pc();
+#endif
+    AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(_index));
+    __ load_const(_obj, addrlit);
+
+#ifdef ASSERT
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      assert(a_byte == *start++, "should be the same code");
+    }
+#endif
+  } else {
+    // Make a copy the code which is going to be patched.
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      __ emit_int8 (a_byte);
+    }
+  }
+
+  address end_of_patch = __ pc();
+  int bytes_to_skip = 0;
+  if (_id == load_mirror_id) {
+    int offset = __ offset();
+    if (CommentedAssembly) {
+      __ block_comment(" being_initialized check");
+    }
+
+    // Static field accesses have special semantics while the class
+    // initializer is being run, so we emit a test which can be used to
+    // check that this code is being executed by the initializing
+    // thread.
+    assert(_obj != noreg, "must be a valid register");
+    assert(_index >= 0, "must have oop index");
+    __ z_lg(Z_R1_scratch, java_lang_Class::klass_offset_in_bytes(), _obj);
+    __ z_cg(Z_thread, Address(Z_R1_scratch, InstanceKlass::init_thread_offset()));
+    __ branch_optimized(Assembler::bcondNotEqual, call_patch);
+
+    // Load_klass patches may execute the patched code before it's
+    // copied back into place so we need to jump back into the main
+    // code of the nmethod to continue execution.
+    __ branch_optimized(Assembler::bcondAlways, _patch_site_continuation);
+
+    // Make sure this extra code gets skipped.
+    bytes_to_skip += __ offset() - offset;
+  }
+
+  // Now emit the patch record telling the runtime how to find the
+  // pieces of the patch. We only need 3 bytes but to help the disassembler
+  // we make the data look like a the following add instruction:
+  //   A R1, D2(X2, B2)
+  // which requires 4 bytes.
+  int sizeof_patch_record = 4;
+  bytes_to_skip += sizeof_patch_record;
+
+  // Emit the offsets needed to find the code to patch.
+  int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
+
+  // Emit the patch record: opcode of the add followed by 3 bytes patch record data.
+  __ emit_int8((int8_t)(A_ZOPC>>24));
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
+  address patch_info_pc = __ pc();
+  assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+  address entry = __ pc();
+  NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+  switch (_id) {
+    case access_field_id:  target = Runtime1::entry_for (Runtime1::access_field_patching_id); break;
+    case load_klass_id:    target = Runtime1::entry_for (Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+    case load_mirror_id:   target = Runtime1::entry_for (Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+    case load_appendix_id: target = Runtime1::entry_for (Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+    default: ShouldNotReachHere();
+  }
+  __ bind(call_patch);
+
+  if (CommentedAssembly) {
+    __ block_comment("patch entry point");
+  }
+  // Cannot use call_c_opt() because its size is not constant.
+  __ load_const(Z_R1_scratch, target); // Must not optimize in order to keep constant _patch_info_offset constant.
+  __ z_basr(Z_R14, Z_R1_scratch);
+  assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+  ce->add_call_info_here(_info);
+  __ z_brcl(Assembler::bcondAlways, _patch_site_entry);
+  if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+    CodeSection* cs = __ code_section();
+    address pc = (address)_pc_start;
+    RelocIterator iter(cs, pc, pc + 1);
+    relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none);
+  }
+}
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  __ load_const_optimized(Z_R1_scratch, _trap_request); // Pass trap request in Z_R1_scratch.
+  ce->emit_call_c(Runtime1::entry_for (Runtime1::deoptimize_id));
+  CHECK_BAILOUT();
+  ce->add_call_info_here(_info);
+  DEBUG_ONLY(__ should_not_reach_here());
+}
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  // Slow case: call to native.
+  __ bind(_entry);
+  __ lgr_if_needed(Z_ARG1, src()->as_register());
+  __ lgr_if_needed(Z_ARG2, src_pos()->as_register());
+  __ lgr_if_needed(Z_ARG3, dst()->as_register());
+  __ lgr_if_needed(Z_ARG4, dst_pos()->as_register());
+  __ lgr_if_needed(Z_ARG5, length()->as_register());
+
+  // Must align calls sites, otherwise they can't be updated atomically on MP hardware.
+  ce->align_call(lir_static_call);
+
+  assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0,
+         "must be aligned");
+
+  ce->emit_static_call_stub();
+
+  // Prepend each BRASL with a nop.
+  __ relocate(relocInfo::static_call_type);
+  __ z_nop();
+  __ z_brasl(Z_R14, SharedRuntime::get_resolve_static_call_stub());
+  ce->add_call_info_here(info());
+  ce->verify_oop_map(info());
+
+#ifndef PRODUCT
+  __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_slowcase_cnt);
+  __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+#endif
+
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+  __ bind(_entry);
+  ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
+  __ z_ltgr(Z_R1_scratch, pre_val_reg); // Pass oop in Z_R1_scratch to Runtime1::g1_pre_barrier_slow_id.
+  __ branch_optimized(Assembler::bcondZero, _continuation);
+  ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_pre_barrier_slow_id));
+  CHECK_BAILOUT();
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ z_ltgr(new_val_reg, new_val_reg);
+  __ branch_optimized(Assembler::bcondZero, _continuation);
+  __ z_lgr(Z_R1_scratch, addr()->as_pointer_register());
+  ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_post_barrier_slow_id));
+  CHECK_BAILOUT();
+  __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_DEFS_S390_HPP
+#define CPU_S390_VM_C1_DEFS_S390_HPP
+
+// Native word offsets from memory address (big endian).
+enum {
+  pd_lo_word_offset_in_bytes = BytesPerInt,
+  pd_hi_word_offset_in_bytes = 0
+};
+
+// Explicit rounding operations are not required to implement the strictFP mode.
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = 16,  // Number of registers used during code emission.
+  // Treat all registers as caller save (values of callee save are hard to find if caller is in runtime).
+  // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14
+  pd_nof_cpu_regs_unallocated = 6,
+  pd_nof_caller_save_cpu_regs_frame_map = pd_nof_cpu_regs_frame_map - pd_nof_cpu_regs_unallocated,  // Number of cpu registers killed by calls.
+  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map,  // Number of registers that are visible to register allocator.
+  pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map,// Number of registers visible linear scan.
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg  = 9, // Others are unallocated (see FrameMap::initialize()).
+
+  pd_nof_fpu_regs_frame_map = 16,  // Number of registers used during code emission.
+  pd_nof_fcpu_regs_unallocated = 1, // Leave Z_F15 unallocated and use it as scratch register.
+  pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated,  // Number of fpu registers killed by calls.
+  pd_nof_fpu_regs_reg_alloc = pd_nof_caller_save_fpu_regs_frame_map,  // Number of registers that are visible to register allocator.
+  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // Number of registers visible to linear scan.
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated - 1,
+
+  pd_nof_xmm_regs_linearscan = 0,
+  pd_nof_caller_save_xmm_regs = 0,
+  pd_first_xmm_reg = -1,
+  pd_last_xmm_reg = -1
+};
+
+// For debug info: a float value in a register is saved in single precision by runtime stubs.
+enum {
+  pd_float_saved_as_double = false
+};
+
+#endif // CPU_S390_VM_C1_DEFS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
+#define CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
+
+// No FPU stack on ZARCH_64
+class FpuStackSim;
+
+#endif // CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 7;
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset.
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(Z_SP_opr, st_off, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      opr = as_long_opr(reg);
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else if (type == T_METADATA) {
+      opr = as_metadata_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    FloatRegister f = r_1->as_FloatRegister();
+    if (type == T_FLOAT) {
+      opr = as_float_opr(f);
+    } else {
+      opr = as_double_opr(f);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return opr;
+}
+
+//               FrameMap
+//--------------------------------------------------------
+
+FloatRegister FrameMap::_fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister
+int           FrameMap::_fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr.
+
+// Some useful constant RInfo's:
+LIR_Opr FrameMap::Z_R0_opr;
+LIR_Opr FrameMap::Z_R1_opr;
+LIR_Opr FrameMap::Z_R2_opr;
+LIR_Opr FrameMap::Z_R3_opr;
+LIR_Opr FrameMap::Z_R4_opr;
+LIR_Opr FrameMap::Z_R5_opr;
+LIR_Opr FrameMap::Z_R6_opr;
+LIR_Opr FrameMap::Z_R7_opr;
+LIR_Opr FrameMap::Z_R8_opr;
+LIR_Opr FrameMap::Z_R9_opr;
+LIR_Opr FrameMap::Z_R10_opr;
+LIR_Opr FrameMap::Z_R11_opr;
+LIR_Opr FrameMap::Z_R12_opr;
+LIR_Opr FrameMap::Z_R13_opr;
+LIR_Opr FrameMap::Z_R14_opr;
+LIR_Opr FrameMap::Z_R15_opr;
+
+LIR_Opr FrameMap::Z_R0_oop_opr;
+LIR_Opr FrameMap::Z_R1_oop_opr;
+LIR_Opr FrameMap::Z_R2_oop_opr;
+LIR_Opr FrameMap::Z_R3_oop_opr;
+LIR_Opr FrameMap::Z_R4_oop_opr;
+LIR_Opr FrameMap::Z_R5_oop_opr;
+LIR_Opr FrameMap::Z_R6_oop_opr;
+LIR_Opr FrameMap::Z_R7_oop_opr;
+LIR_Opr FrameMap::Z_R8_oop_opr;
+LIR_Opr FrameMap::Z_R9_oop_opr;
+LIR_Opr FrameMap::Z_R10_oop_opr;
+LIR_Opr FrameMap::Z_R11_oop_opr;
+LIR_Opr FrameMap::Z_R12_oop_opr;
+LIR_Opr FrameMap::Z_R13_oop_opr;
+LIR_Opr FrameMap::Z_R14_oop_opr;
+LIR_Opr FrameMap::Z_R15_oop_opr;
+
+LIR_Opr FrameMap::Z_R0_metadata_opr;
+LIR_Opr FrameMap::Z_R1_metadata_opr;
+LIR_Opr FrameMap::Z_R2_metadata_opr;
+LIR_Opr FrameMap::Z_R3_metadata_opr;
+LIR_Opr FrameMap::Z_R4_metadata_opr;
+LIR_Opr FrameMap::Z_R5_metadata_opr;
+LIR_Opr FrameMap::Z_R6_metadata_opr;
+LIR_Opr FrameMap::Z_R7_metadata_opr;
+LIR_Opr FrameMap::Z_R8_metadata_opr;
+LIR_Opr FrameMap::Z_R9_metadata_opr;
+LIR_Opr FrameMap::Z_R10_metadata_opr;
+LIR_Opr FrameMap::Z_R11_metadata_opr;
+LIR_Opr FrameMap::Z_R12_metadata_opr;
+LIR_Opr FrameMap::Z_R13_metadata_opr;
+LIR_Opr FrameMap::Z_R14_metadata_opr;
+LIR_Opr FrameMap::Z_R15_metadata_opr;
+
+LIR_Opr FrameMap::Z_SP_opr;
+LIR_Opr FrameMap::Z_FP_opr;
+
+LIR_Opr FrameMap::Z_R2_long_opr;
+LIR_Opr FrameMap::Z_R10_long_opr;
+LIR_Opr FrameMap::Z_R11_long_opr;
+
+LIR_Opr FrameMap::Z_F0_opr;
+LIR_Opr FrameMap::Z_F0_double_opr;
+
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+
+// c1 rnr -> FloatRegister
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+  assert(_init_done, "tables not initialized");
+  debug_only(fpu_range_check(rnr);)
+  return _fpu_rnr2reg[rnr];
+}
+
+void FrameMap::map_float_register(int rnr, FloatRegister reg) {
+  debug_only(fpu_range_check(rnr);)
+  debug_only(fpu_range_check(reg->encoding());)
+  _fpu_rnr2reg[rnr] = reg;              // mapping c1 regnr. -> FloatRegister
+  _fpu_reg2rnr[reg->encoding()] = rnr;  // mapping assembler encoding -> c1 regnr.
+}
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  DEBUG_ONLY(int allocated   = 0;)
+  DEBUG_ONLY(int unallocated = 0;)
+
+  // Register usage:
+  // Z_thread (Z_R8)
+  // Z_fp     (Z_R9)
+  // Z_SP     (Z_R15)
+  DEBUG_ONLY(allocated++); map_register(0, Z_R2);
+  DEBUG_ONLY(allocated++); map_register(1, Z_R3);
+  DEBUG_ONLY(allocated++); map_register(2, Z_R4);
+  DEBUG_ONLY(allocated++); map_register(3, Z_R5);
+  DEBUG_ONLY(allocated++); map_register(4, Z_R6);
+  DEBUG_ONLY(allocated++); map_register(5, Z_R7);
+  DEBUG_ONLY(allocated++); map_register(6, Z_R10);
+  DEBUG_ONLY(allocated++); map_register(7, Z_R11);
+  DEBUG_ONLY(allocated++); map_register(8, Z_R12);
+  DEBUG_ONLY(allocated++); map_register(9, Z_R13);     // <- last register visible in RegAlloc
+  DEBUG_ONLY(unallocated++); map_register(11, Z_R0);   // Z_R0_scratch
+  DEBUG_ONLY(unallocated++); map_register(12, Z_R1);   // Z_R1_scratch
+  DEBUG_ONLY(unallocated++); map_register(10, Z_R14);  // return pc; TODO: Try to let c1/c2 allocate R14.
+
+  // The following registers are usually unavailable.
+  DEBUG_ONLY(unallocated++); map_register(13, Z_R8);
+  DEBUG_ONLY(unallocated++); map_register(14, Z_R9);
+  DEBUG_ONLY(unallocated++); map_register(15, Z_R15);
+  assert(allocated-1 == pd_last_cpu_reg, "wrong number/mapping of allocated CPU registers");
+  assert(unallocated == pd_nof_cpu_regs_unallocated, "wrong number of unallocated CPU registers");
+  assert(nof_cpu_regs == allocated+unallocated, "wrong number of CPU registers");
+
+  int j = 0;
+  for (int i = 0; i < nof_fpu_regs; i++) {
+    if (as_FloatRegister(i) == Z_fscratch_1) continue; // unallocated
+    map_float_register(j++, as_FloatRegister(i));
+  }
+  assert(j == nof_fpu_regs-1, "missed one fpu reg?");
+  map_float_register(j++, Z_fscratch_1);
+
+  _init_done = true;
+
+  Z_R0_opr = as_opr(Z_R0);
+  Z_R1_opr = as_opr(Z_R1);
+  Z_R2_opr = as_opr(Z_R2);
+  Z_R3_opr = as_opr(Z_R3);
+  Z_R4_opr = as_opr(Z_R4);
+  Z_R5_opr = as_opr(Z_R5);
+  Z_R6_opr = as_opr(Z_R6);
+  Z_R7_opr = as_opr(Z_R7);
+  Z_R8_opr = as_opr(Z_R8);
+  Z_R9_opr = as_opr(Z_R9);
+  Z_R10_opr = as_opr(Z_R10);
+  Z_R11_opr = as_opr(Z_R11);
+  Z_R12_opr = as_opr(Z_R12);
+  Z_R13_opr = as_opr(Z_R13);
+  Z_R14_opr = as_opr(Z_R14);
+  Z_R15_opr = as_opr(Z_R15);
+
+  Z_R0_oop_opr = as_oop_opr(Z_R0);
+  Z_R1_oop_opr = as_oop_opr(Z_R1);
+  Z_R2_oop_opr = as_oop_opr(Z_R2);
+  Z_R3_oop_opr = as_oop_opr(Z_R3);
+  Z_R4_oop_opr = as_oop_opr(Z_R4);
+  Z_R5_oop_opr = as_oop_opr(Z_R5);
+  Z_R6_oop_opr = as_oop_opr(Z_R6);
+  Z_R7_oop_opr = as_oop_opr(Z_R7);
+  Z_R8_oop_opr = as_oop_opr(Z_R8);
+  Z_R9_oop_opr = as_oop_opr(Z_R9);
+  Z_R10_oop_opr = as_oop_opr(Z_R10);
+  Z_R11_oop_opr = as_oop_opr(Z_R11);
+  Z_R12_oop_opr = as_oop_opr(Z_R12);
+  Z_R13_oop_opr = as_oop_opr(Z_R13);
+  Z_R14_oop_opr = as_oop_opr(Z_R14);
+  Z_R15_oop_opr = as_oop_opr(Z_R15);
+
+  Z_R0_metadata_opr = as_metadata_opr(Z_R0);
+  Z_R1_metadata_opr = as_metadata_opr(Z_R1);
+  Z_R2_metadata_opr = as_metadata_opr(Z_R2);
+  Z_R3_metadata_opr = as_metadata_opr(Z_R3);
+  Z_R4_metadata_opr = as_metadata_opr(Z_R4);
+  Z_R5_metadata_opr = as_metadata_opr(Z_R5);
+  Z_R6_metadata_opr = as_metadata_opr(Z_R6);
+  Z_R7_metadata_opr = as_metadata_opr(Z_R7);
+  Z_R8_metadata_opr = as_metadata_opr(Z_R8);
+  Z_R9_metadata_opr = as_metadata_opr(Z_R9);
+  Z_R10_metadata_opr = as_metadata_opr(Z_R10);
+  Z_R11_metadata_opr = as_metadata_opr(Z_R11);
+  Z_R12_metadata_opr = as_metadata_opr(Z_R12);
+  Z_R13_metadata_opr = as_metadata_opr(Z_R13);
+  Z_R14_metadata_opr = as_metadata_opr(Z_R14);
+  Z_R15_metadata_opr = as_metadata_opr(Z_R15);
+
+  // TODO: needed? Or can we make Z_R9 available for linear scan allocation.
+  Z_FP_opr = as_pointer_opr(Z_fp);
+  Z_SP_opr = as_pointer_opr(Z_SP);
+
+  Z_R2_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R2), cpu_reg2rnr(Z_R2));
+  Z_R10_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R10), cpu_reg2rnr(Z_R10));
+  Z_R11_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R11), cpu_reg2rnr(Z_R11));
+
+  Z_F0_opr = as_float_opr(Z_F0);
+  Z_F0_double_opr = as_double_opr(Z_F0);
+
+  // All allocated cpu regs are caller saved.
+  for (int c1rnr = 0; c1rnr < max_nof_caller_save_cpu_regs; c1rnr++) {
+    _caller_save_cpu_regs[c1rnr] = as_opr(cpu_rnr2reg(c1rnr));
+  }
+
+  // All allocated fpu regs are caller saved.
+  for (int c1rnr = 0; c1rnr < nof_caller_save_fpu_regs; c1rnr++) {
+    _caller_save_fpu_regs[c1rnr] = as_float_opr(nr2floatreg(c1rnr));
+  }
+}
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(Z_SP, sp_offset);
+}
+
+VMReg FrameMap::fpu_regname (int n) {
+  return nr2floatreg(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  return Z_SP_opr;
+}
+
+// JSR 292
+// On ZARCH_64, there is no need to save the SP, because neither
+// method handle intrinsics nor compiled lambda forms modify it.
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;
+}
+
+bool FrameMap::validate_frame() {
+  return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_FRAMEMAP_S390_HPP
+#define CPU_S390_VM_C1_FRAMEMAP_S390_HPP
+
+ public:
+
+  enum {
+    nof_reg_args = 5,   // Registers Z_ARG1 - Z_ARG5 are available for parameter passing.
+    first_available_sp_in_frame = frame::z_abi_16_size,
+    frame_pad_in_bytes = 0
+  };
+
+  static const int pd_c_runtime_reserved_arg_size;
+
+  static LIR_Opr Z_R0_opr;
+  static LIR_Opr Z_R1_opr;
+  static LIR_Opr Z_R2_opr;
+  static LIR_Opr Z_R3_opr;
+  static LIR_Opr Z_R4_opr;
+  static LIR_Opr Z_R5_opr;
+  static LIR_Opr Z_R6_opr;
+  static LIR_Opr Z_R7_opr;
+  static LIR_Opr Z_R8_opr;
+  static LIR_Opr Z_R9_opr;
+  static LIR_Opr Z_R10_opr;
+  static LIR_Opr Z_R11_opr;
+  static LIR_Opr Z_R12_opr;
+  static LIR_Opr Z_R13_opr;
+  static LIR_Opr Z_R14_opr;
+  static LIR_Opr Z_R15_opr;
+
+  static LIR_Opr Z_R0_oop_opr;
+  static LIR_Opr Z_R1_oop_opr;
+  static LIR_Opr Z_R2_oop_opr;
+  static LIR_Opr Z_R3_oop_opr;
+  static LIR_Opr Z_R4_oop_opr;
+  static LIR_Opr Z_R5_oop_opr;
+  static LIR_Opr Z_R6_oop_opr;
+  static LIR_Opr Z_R7_oop_opr;
+  static LIR_Opr Z_R8_oop_opr;
+  static LIR_Opr Z_R9_oop_opr;
+  static LIR_Opr Z_R10_oop_opr;
+  static LIR_Opr Z_R11_oop_opr;
+  static LIR_Opr Z_R12_oop_opr;
+  static LIR_Opr Z_R13_oop_opr;
+  static LIR_Opr Z_R14_oop_opr;
+  static LIR_Opr Z_R15_oop_opr;
+
+  static LIR_Opr Z_R0_metadata_opr;
+  static LIR_Opr Z_R1_metadata_opr;
+  static LIR_Opr Z_R2_metadata_opr;
+  static LIR_Opr Z_R3_metadata_opr;
+  static LIR_Opr Z_R4_metadata_opr;
+  static LIR_Opr Z_R5_metadata_opr;
+  static LIR_Opr Z_R6_metadata_opr;
+  static LIR_Opr Z_R7_metadata_opr;
+  static LIR_Opr Z_R8_metadata_opr;
+  static LIR_Opr Z_R9_metadata_opr;
+  static LIR_Opr Z_R10_metadata_opr;
+  static LIR_Opr Z_R11_metadata_opr;
+  static LIR_Opr Z_R12_metadata_opr;
+  static LIR_Opr Z_R13_metadata_opr;
+  static LIR_Opr Z_R14_metadata_opr;
+  static LIR_Opr Z_R15_metadata_opr;
+
+  static LIR_Opr Z_SP_opr;
+  static LIR_Opr Z_FP_opr;
+
+  static LIR_Opr Z_R2_long_opr;
+  static LIR_Opr Z_R10_long_opr;
+  static LIR_Opr Z_R11_long_opr;
+
+  static LIR_Opr Z_F0_opr;
+  static LIR_Opr Z_F0_double_opr;
+
+ private:
+  static FloatRegister _fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister
+  static int           _fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr.
+
+  static void map_float_register(int rnr, FloatRegister reg);
+
+  // FloatRegister -> c1 rnr
+  static int fpu_reg2rnr (FloatRegister reg) {
+    assert(_init_done, "tables not initialized");
+    int c1rnr = _fpu_reg2rnr[reg->encoding()];
+    debug_only(fpu_range_check(c1rnr);)
+    return c1rnr;
+  }
+
+ public:
+
+  static LIR_Opr as_long_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+
+  static LIR_Opr as_float_opr(FloatRegister r) {
+    return LIR_OprFact::single_fpu(fpu_reg2rnr(r));
+  }
+  static LIR_Opr as_double_opr(FloatRegister r) {
+    return LIR_OprFact::double_fpu(fpu_reg2rnr(r));
+  }
+
+  static FloatRegister nr2floatreg (int rnr);
+
+  static VMReg fpu_regname (int n);
+
+  // No callee saved registers (saved values are not accessible if callee is in runtime).
+  static bool is_caller_save_register (LIR_Opr opr) { return true; }
+  static bool is_caller_save_register (Register r) { return true; }
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg; }
+
+#endif // CPU_S390_VM_C1_FRAMEMAP_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,3037 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+#define __ _masm->
+
+#ifndef PRODUCT
+#undef __
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm) : _masm)->
+#endif
+
+//------------------------------------------------------------
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+  // Not used on ZARCH_64
+  ShouldNotCallThis();
+  return false;
+}
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::Z_R2_oop_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::Z_R2_opr;
+}
+
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+// Inline cache check: done before the frame is built.
+// The inline cached class is in Z_inline_cache(Z_R9).
+// We fetch the class of the receiver and compare it with the cached class.
+// If they do not match we jump to the slow case.
+int LIR_Assembler::check_icache() {
+  Register receiver = receiverOpr()->as_register();
+  int offset = __ offset();
+  __ inline_cache_check(receiver, Z_inline_cache);
+  return offset;
+}
+
+void LIR_Assembler::osr_entry() {
+  // On-stack-replacement entry sequence (interpreter frame layout described in interpreter_sparc.cpp):
+  //
+  //   1. Create a new compiled activation.
+  //   2. Initialize local variables in the compiled activation. The expression stack must be empty
+  //      at the osr_bci; it is not initialized.
+  //   3. Jump to the continuation address in compiled code to resume execution.
+
+  // OSR entry point
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->end()->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // Create a frame for the compiled activation.
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[number_of_locks-1..0]
+  //
+  // Locals is a direct copy of the interpreter frame so in the osr buffer
+  // the first slot in the local array is the last local from the interpreter
+  // and the last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   I0: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_register();
+  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+      // Verify the interpreter's monitor has a non-null object.
+      __ asm_assert_mem8_isnot_zero(slot_offset + 1*BytesPerWord, OSR_buf, "locked object is NULL", __LINE__);
+      // Copy the lock field into the compiled activation.
+      __ z_lg(Z_R1_scratch, slot_offset + 0, OSR_buf);
+      __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_lock(i));
+      __ z_lg(Z_R1_scratch, slot_offset + 1*BytesPerWord, OSR_buf);
+      __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_object(i));
+    }
+  }
+}
+
+// --------------------------------------------------------------------------------------------
+
+address LIR_Assembler::emit_call_c(address a) {
+  __ align_call_far_patchable(__ pc());
+  address call_addr = __ call_c_opt(a);
+  if (call_addr == NULL) {
+    bailout("const section overflow");
+  }
+  return call_addr;
+}
+
+int LIR_Assembler::emit_exception_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci. => Add a nop.
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // Generate code for exception handler.
+  address handler_base = __ start_a_stub(exception_handler_size);
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  address a = Runtime1::entry_for (Runtime1::handle_exception_from_callee_id);
+  address call_addr = emit_call_c(a);
+  CHECK_BAILOUT_(-1);
+  __ should_not_reach_here();
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+  if (CommentedAssembly) {
+    _masm->block_comment("Unwind handler");
+  }
+#endif
+
+  int offset = code_offset();
+  Register exception_oop_callee_saved = Z_R10; // Z_R10 is callee-saved.
+  Register Rtmp1                      = Z_R11;
+  Register Rtmp2                      = Z_R12;
+
+  // Fetch the exception from TLS and clear out exception related thread state.
+  Address exc_oop_addr = Address(Z_thread, JavaThread::exception_oop_offset());
+  Address exc_pc_addr  = Address(Z_thread, JavaThread::exception_pc_offset());
+  __ z_lg(Z_EXC_OOP, exc_oop_addr);
+  __ clear_mem(exc_oop_addr, sizeof(oop));
+  __ clear_mem(exc_pc_addr, sizeof(intptr_t));
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(Z_EXC_OOP);
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP); // Preserve the exception.
+  }
+
+  // Preform needed unlocking.
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    // Runtime1::monitorexit_id expects lock address in Z_R1_scratch.
+    LIR_Opr lock = FrameMap::as_opr(Z_R1_scratch);
+    monitor_address(0, lock);
+    stub = new MonitorExitStub(lock, true, 0);
+    __ unlock_object(Rtmp1, Rtmp2, lock->as_register(), *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    ShouldNotReachHere(); // Not supported.
+#if 0
+    __ mov(rdi, r15_thread);
+    __ mov_metadata(rsi, method()->constant_encoding());
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
+#endif
+  }
+
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved);  // Restore the exception.
+  }
+
+  // Remove the activation and dispatch to the unwind handler.
+  __ pop_frame();
+  __ z_lg(Z_EXC_PC, _z_abi16(return_pc), Z_SP);
+
+  // Z_EXC_OOP: exception oop
+  // Z_EXC_PC: exception pc
+
+  // Dispatch to the unwind logic.
+  __ load_const_optimized(Z_R5, Runtime1::entry_for (Runtime1::unwind_exception_id));
+  __ z_br(Z_R5);
+
+  // Emit the slow path assembly.
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+int LIR_Assembler::emit_deopt_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci. => Add a nop.
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // Generate code for exception handler.
+  address handler_base = __ start_a_stub(deopt_handler_size);
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("deopt handler overflow");
+    return -1;
+  }  int offset = code_offset();
+  // Size must be constant (see HandlerImpl::emit_deopt_handler).
+  __ load_const(Z_R1_scratch, SharedRuntime::deopt_blob()->unpack());
+  __ call(Z_R1_scratch);
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ clear_reg(reg, true/*64bit*/, false/*set cc*/); // Must not kill cc set by cmove.
+  } else {
+    AddressLiteral a = __ allocate_oop_address(o);
+    bool success = __ load_oop_from_toc(reg, a, reg);
+    if (!success) {
+      bailout("const section overflow");
+    }
+  }
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the object once it's been patched.
+  int oop_index = __ oop_recorder()->allocate_oop_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index);
+
+  AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(oop_index));
+  assert(addrlit.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
+  // The NULL will be dynamically patched later so the sequence to
+  // load the address literal must not be optimized.
+  __ load_const(reg, addrlit);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+void LIR_Assembler::metadata2reg(Metadata* md, Register reg) {
+  bool success = __ set_metadata_constant(md, reg);
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+}
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the klass once it's been patched.
+  int index = __ oop_recorder()->allocate_metadata_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
+  AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(index));
+  assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc");
+  // The NULL will be dynamically patched later so the sequence to
+  // load the address literal must not be optimized.
+  __ load_const(reg, addrlit);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+    case lir_idiv:
+    case lir_irem:
+      arithmetic_idiv(op->code(),
+                      op->in_opr1(),
+                      op->in_opr2(),
+                      op->in_opr3(),
+                      op->result_opr(),
+                      op->info());
+      break;
+    default: ShouldNotReachHere(); break;
+  }
+}
+
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  { _branch_target_blocks.append(op->block()); }
+  if (op->ublock() != NULL) { _branch_target_blocks.append(op->ublock()); }
+#endif
+
+  if (op->cond() == lir_cond_always) {
+    if (op->info() != NULL) { add_debug_info_for_branch(op->info()); }
+    __ branch_optimized(Assembler::bcondAlways, *(op->label()));
+  } else {
+    Assembler::branch_condition acond = Assembler::bcondZero;
+    if (op->code() == lir_cond_float_branch) {
+      assert(op->ublock() != NULL, "must have unordered successor");
+      __ branch_optimized(Assembler::bcondNotOrdered, *(op->ublock()->label()));
+    }
+    switch (op->cond()) {
+      case lir_cond_equal:        acond = Assembler::bcondEqual;     break;
+      case lir_cond_notEqual:     acond = Assembler::bcondNotEqual;  break;
+      case lir_cond_less:         acond = Assembler::bcondLow;       break;
+      case lir_cond_lessEqual:    acond = Assembler::bcondNotHigh;   break;
+      case lir_cond_greaterEqual: acond = Assembler::bcondNotLow;    break;
+      case lir_cond_greater:      acond = Assembler::bcondHigh;      break;
+      case lir_cond_belowEqual:   acond = Assembler::bcondNotHigh;   break;
+      case lir_cond_aboveEqual:   acond = Assembler::bcondNotLow;    break;
+      default:                         ShouldNotReachHere();
+    }
+    __ branch_optimized(acond,*(op->label()));
+  }
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr src  = op->in_opr();
+  LIR_Opr dest = op->result_opr();
+
+  switch (op->bytecode()) {
+    case Bytecodes::_i2l:
+      __ move_reg_if_needed(dest->as_register_lo(), T_LONG, src->as_register(), T_INT);
+      break;
+
+    case Bytecodes::_l2i:
+      __ move_reg_if_needed(dest->as_register(), T_INT, src->as_register_lo(), T_LONG);
+      break;
+
+    case Bytecodes::_i2b:
+      __ move_reg_if_needed(dest->as_register(), T_BYTE, src->as_register(), T_INT);
+      break;
+
+    case Bytecodes::_i2c:
+      __ move_reg_if_needed(dest->as_register(), T_CHAR, src->as_register(), T_INT);
+      break;
+
+    case Bytecodes::_i2s:
+      __ move_reg_if_needed(dest->as_register(), T_SHORT, src->as_register(), T_INT);
+      break;
+
+    case Bytecodes::_f2d:
+      assert(dest->is_double_fpu(), "check");
+      __ move_freg_if_needed(dest->as_double_reg(), T_DOUBLE, src->as_float_reg(), T_FLOAT);
+      break;
+
+    case Bytecodes::_d2f:
+      assert(dest->is_single_fpu(), "check");
+      __ move_freg_if_needed(dest->as_float_reg(), T_FLOAT, src->as_double_reg(), T_DOUBLE);
+      break;
+
+    case Bytecodes::_i2f:
+      __ z_cefbr(dest->as_float_reg(), src->as_register());
+      break;
+
+    case Bytecodes::_i2d:
+      __ z_cdfbr(dest->as_double_reg(), src->as_register());
+      break;
+
+    case Bytecodes::_l2f:
+      __ z_cegbr(dest->as_float_reg(), src->as_register_lo());
+      break;
+    case Bytecodes::_l2d:
+      __ z_cdgbr(dest->as_double_reg(), src->as_register_lo());
+      break;
+
+    case Bytecodes::_f2i:
+    case Bytecodes::_f2l: {
+      Label done;
+      FloatRegister Rsrc = src->as_float_reg();
+      Register Rdst = (op->bytecode() == Bytecodes::_f2i ? dest->as_register() : dest->as_register_lo());
+      __ clear_reg(Rdst, true, false);
+      __ z_cebr(Rsrc, Rsrc);
+      __ z_brno(done); // NaN -> 0
+      if (op->bytecode() == Bytecodes::_f2i) {
+        __ z_cfebr(Rdst, Rsrc, Assembler::to_zero);
+      } else { // op->bytecode() == Bytecodes::_f2l
+        __ z_cgebr(Rdst, Rsrc, Assembler::to_zero);
+      }
+      __ bind(done);
+    }
+    break;
+
+    case Bytecodes::_d2i:
+    case Bytecodes::_d2l: {
+      Label done;
+      FloatRegister Rsrc = src->as_double_reg();
+      Register Rdst = (op->bytecode() == Bytecodes::_d2i ? dest->as_register() : dest->as_register_lo());
+      __ clear_reg(Rdst, true, false);  // Don't set CC.
+      __ z_cdbr(Rsrc, Rsrc);
+      __ z_brno(done); // NaN -> 0
+      if (op->bytecode() == Bytecodes::_d2i) {
+        __ z_cfdbr(Rdst, Rsrc, Assembler::to_zero);
+      } else { // Bytecodes::_d2l
+        __ z_cgdbr(Rdst, Rsrc, Assembler::to_zero);
+      }
+      __ bind(done);
+    }
+    break;
+
+    default: ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::align_call(LIR_Code code) {
+  // End of call instruction must be 4 byte aligned.
+  int offset = __ offset();
+  switch (code) {
+    case lir_icvirtual_call:
+      offset += MacroAssembler::load_const_from_toc_size();
+      // no break
+    case lir_static_call:
+    case lir_optvirtual_call:
+    case lir_dynamic_call:
+      offset += NativeCall::call_far_pcrelative_displacement_offset;
+      break;
+    case lir_virtual_call:   // currently, sparc-specific for niagara
+    default: ShouldNotReachHere();
+  }
+  if ((offset & (NativeCall::call_far_pcrelative_displacement_alignment-1)) != 0) {
+    __ nop();
+  }
+}
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0,
+         "must be aligned (offset=%d)", __ offset());
+  assert(rtype == relocInfo::none ||
+         rtype == relocInfo::opt_virtual_call_type ||
+         rtype == relocInfo::static_call_type, "unexpected rtype");
+  // Prepend each BRASL with a nop.
+  __ relocate(rtype);
+  __ z_nop();
+  __ z_brasl(Z_R14, op->addr());
+  add_call_info(code_offset(), op->info());
+}
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  address virtual_call_oop_addr = NULL;
+  AddressLiteral empty_ic((address) Universe::non_oop_word());
+  virtual_call_oop_addr = __ pc();
+  bool success = __ load_const_from_toc(Z_inline_cache, empty_ic);
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+
+  // CALL to fixup routine. Fixup routine uses ScopeDesc info
+  // to determine who we intended to call.
+  __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr));
+  call(op, relocInfo::none);
+}
+
+// not supported
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+  if (from_reg != to_reg) __ z_lgr(to_reg, from_reg);
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_stack(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+
+  unsigned int lmem = 0;
+  unsigned int lcon = 0;
+  int64_t cbits = 0;
+  Address dest_addr;
+  switch (c->type()) {
+    case T_INT:  // fall through
+    case T_FLOAT:
+      dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      lmem = 4; lcon = 4; cbits = c->as_jint_bits();
+      break;
+
+    case T_ADDRESS:
+      dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      lmem = 8; lcon = 4; cbits = c->as_jint_bits();
+      break;
+
+    case T_OBJECT:
+      dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      if (c->as_jobject() == NULL) {
+        __ store_const(dest_addr, (int64_t)NULL_WORD, 8, 8);
+      } else {
+        jobject2reg(c->as_jobject(), Z_R1_scratch);
+        __ reg2mem_opt(Z_R1_scratch, dest_addr, true);
+      }
+      return;
+
+    case T_LONG:  // fall through
+    case T_DOUBLE:
+      dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+      lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits());
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  __ store_const(dest_addr, cbits, lmem, lcon);
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_address(), "should not call otherwise");
+  // See special case in LIRGenerator::do_StoreIndexed.
+  // T_BYTE: Special case for card mark store.
+  assert(type == T_BYTE || !dest->as_address_ptr()->index()->is_valid(), "not supported");
+  LIR_Const* c = src->as_constant_ptr();
+  Address addr = as_Address(dest->as_address_ptr());
+
+  int store_offset = -1;
+  unsigned int lmem = 0;
+  unsigned int lcon = 0;
+  int64_t cbits = 0;
+  switch (type) {
+    case T_INT:    // fall through
+    case T_FLOAT:
+      lmem = 4; lcon = 4; cbits = c->as_jint_bits();
+      break;
+
+    case T_ADDRESS:
+      lmem = 8; lcon = 4; cbits = c->as_jint_bits();
+      break;
+
+    case T_OBJECT:  // fall through
+    case T_ARRAY:
+      if (c->as_jobject() == NULL) {
+        if (UseCompressedOops && !wide) {
+          store_offset = __ store_const(addr, (int32_t)NULL_WORD, 4, 4);
+        } else {
+          store_offset = __ store_const(addr, (int64_t)NULL_WORD, 8, 8);
+        }
+      } else {
+        jobject2reg(c->as_jobject(), Z_R1_scratch);
+        if (UseCompressedOops && !wide) {
+          __ encode_heap_oop(Z_R1_scratch);
+          store_offset = __ reg2mem_opt(Z_R1_scratch, addr, false);
+        } else {
+          store_offset = __ reg2mem_opt(Z_R1_scratch, addr, true);
+        }
+      }
+      assert(store_offset >= 0, "check");
+      break;
+
+    case T_LONG:    // fall through
+    case T_DOUBLE:
+      lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits());
+      break;
+
+    case T_BOOLEAN: // fall through
+    case T_BYTE:
+      lmem = 1; lcon = 1; cbits = (int8_t)(c->as_jint());
+      break;
+
+    case T_CHAR:    // fall through
+    case T_SHORT:
+      lmem = 2; lcon = 2; cbits = (int16_t)(c->as_jint());
+      break;
+
+    default:
+      ShouldNotReachHere();
+  };
+
+  // Index register is normally not supported, but for
+  // LIRGenerator::CardTableModRef_post_barrier we make an exception.
+  if (type == T_BYTE && dest->as_address_ptr()->index()->is_valid()) {
+    __ load_const_optimized(Z_R0_scratch, (int8_t)(c->as_jint()));
+    store_offset = __ offset();
+    if (Immediate::is_uimm12(addr.disp())) {
+      __ z_stc(Z_R0_scratch, addr);
+    } else {
+      __ z_stcy(Z_R0_scratch, addr);
+    }
+  }
+
+  if (store_offset == -1) {
+    store_offset = __ store_const(addr, cbits, lmem, lcon);
+    assert(store_offset >= 0, "check");
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check(store_offset, info);
+  }
+}
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+
+  switch (c->type()) {
+    case T_INT: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_ADDRESS: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_LONG: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register_lo(), (intptr_t)c->as_jlong());
+      break;
+    }
+
+    case T_OBJECT: {
+      if (patch_code != lir_patch_none) {
+        jobject2reg_with_patching(dest->as_register(), info);
+      } else {
+        jobject2reg(c->as_jobject(), dest->as_register());
+      }
+      break;
+    }
+
+    case T_METADATA: {
+      if (patch_code != lir_patch_none) {
+        klass2reg_with_patching(dest->as_register(), info);
+      } else {
+        metadata2reg(c->as_metadata(), dest->as_register());
+      }
+      break;
+    }
+
+    case T_FLOAT: {
+      Register toc_reg = Z_R1_scratch;
+      __ load_toc(toc_reg);
+      address const_addr = __ float_constant(c->as_jfloat());
+      if (const_addr == NULL) {
+        bailout("const section overflow");
+        break;
+      }
+      int displ = const_addr - _masm->code()->consts()->start();
+      if (dest->is_single_fpu()) {
+        __ z_ley(dest->as_float_reg(), displ, toc_reg);
+      } else {
+        assert(dest->is_single_cpu(), "Must be a cpu register.");
+        __ z_ly(dest->as_register(), displ, toc_reg);
+      }
+    }
+    break;
+
+    case T_DOUBLE: {
+      Register toc_reg = Z_R1_scratch;
+      __ load_toc(toc_reg);
+      address const_addr = __ double_constant(c->as_jdouble());
+      if (const_addr == NULL) {
+        bailout("const section overflow");
+        break;
+      }
+      int displ = const_addr - _masm->code()->consts()->start();
+      if (dest->is_double_fpu()) {
+        __ z_ldy(dest->as_double_reg(), displ, toc_reg);
+      } else {
+        assert(dest->is_double_cpu(), "Must be a long register.");
+        __ z_lg(dest->as_register_lo(), displ, toc_reg);
+      }
+    }
+    break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  if (addr->base()->is_illegal()) {
+    Unimplemented();
+  }
+
+  Register base = addr->base()->as_pointer_register();
+
+  if (addr->index()->is_illegal()) {
+    return Address(base, addr->disp());
+  } else if (addr->index()->is_cpu_register()) {
+    Register index = addr->index()->as_pointer_register();
+    return Address(base, index, addr->disp());
+  } else if (addr->index()->is_constant()) {
+    intptr_t addr_offset = addr->index()->as_constant_ptr()->as_jint() + addr->disp();
+    return Address(base, addr_offset);
+  } else {
+    ShouldNotReachHere();
+    return Address();
+  }
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  switch (type) {
+    case T_INT:
+    case T_FLOAT: {
+      Register tmp = Z_R1_scratch;
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ mem2reg_opt(tmp, from, false);
+      __ reg2mem_opt(tmp, to, false);
+      break;
+    }
+    case T_ADDRESS:
+    case T_OBJECT: {
+      Register tmp = Z_R1_scratch;
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ mem2reg_opt(tmp, from, true);
+      __ reg2mem_opt(tmp, to, true);
+      break;
+    }
+    case T_LONG:
+    case T_DOUBLE: {
+      Register tmp = Z_R1_scratch;
+      Address from = frame_map()->address_for_double_slot(src->double_stack_ix());
+      Address to   = frame_map()->address_for_double_slot(dest->double_stack_ix());
+      __ mem2reg_opt(tmp, from, true);
+      __ reg2mem_opt(tmp, to, true);
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// 4-byte accesses only! Don't use it to access 8 bytes!
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  ShouldNotCallThis();
+  return 0; // unused
+}
+
+// 4-byte accesses only! Don't use it to access 8 bytes!
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  ShouldNotCallThis();
+  return 0; // unused
+}
+
+void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
+                            CodeEmitInfo* info, bool wide, bool unaligned) {
+
+  assert(type != T_METADATA, "load of metadata ptr not supported");
+  LIR_Address* addr = src_opr->as_address_ptr();
+  LIR_Opr to_reg = dest;
+
+  Register src = addr->base()->as_pointer_register();
+  Register disp_reg = Z_R0;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(src);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!to_reg->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  if (addr->index()->is_illegal()) {
+    if (!Immediate::is_simm20(disp_value)) {
+      if (needs_patching) {
+        __ load_const(Z_R1_scratch, (intptr_t)0);
+      } else {
+        __ load_const_optimized(Z_R1_scratch, disp_value);
+      }
+      disp_reg = Z_R1_scratch;
+      disp_value = 0;
+    }
+  } else {
+    if (!Immediate::is_simm20(disp_value)) {
+      __ load_const_optimized(Z_R1_scratch, disp_value);
+      __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register());
+      disp_reg = Z_R1_scratch;
+      disp_value = 0;
+    }
+    disp_reg = addr->index()->as_pointer_register();
+  }
+
+  // Remember the offset of the load. The patching_epilog must be done
+  // before the call to add_debug_info, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset = code_offset();
+
+  assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up");
+
+  bool short_disp = Immediate::is_uimm12(disp_value);
+
+  switch (type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  :  __ z_lb(dest->as_register(),   disp_value, disp_reg, src); break;
+    case T_CHAR  :  __ z_llgh(dest->as_register(), disp_value, disp_reg, src); break;
+    case T_SHORT :
+      if (short_disp) {
+                    __ z_lh(dest->as_register(),   disp_value, disp_reg, src);
+      } else {
+                    __ z_lhy(dest->as_register(),  disp_value, disp_reg, src);
+      }
+      break;
+    case T_INT   :
+      if (short_disp) {
+                    __ z_l(dest->as_register(),    disp_value, disp_reg, src);
+      } else {
+                    __ z_ly(dest->as_register(),   disp_value, disp_reg, src);
+      }
+      break;
+    case T_ADDRESS:
+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+        __ z_llgf(dest->as_register(), disp_value, disp_reg, src);
+        __ decode_klass_not_null(dest->as_register());
+      } else {
+        __ z_lg(dest->as_register(), disp_value, disp_reg, src);
+      }
+      break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+    {
+      if (UseCompressedOops && !wide) {
+        __ z_llgf(dest->as_register(), disp_value, disp_reg, src);
+        __ oop_decoder(dest->as_register(), dest->as_register(), true);
+      } else {
+        __ z_lg(dest->as_register(), disp_value, disp_reg, src);
+      }
+      break;
+    }
+    case T_FLOAT:
+      if (short_disp) {
+                    __ z_le(dest->as_float_reg(),  disp_value, disp_reg, src);
+      } else {
+                    __ z_ley(dest->as_float_reg(), disp_value, disp_reg, src);
+      }
+      break;
+    case T_DOUBLE:
+      if (short_disp) {
+                    __ z_ld(dest->as_double_reg(),  disp_value, disp_reg, src);
+      } else {
+                    __ z_ldy(dest->as_double_reg(), disp_value, disp_reg, src);
+      }
+      break;
+    case T_LONG  :  __ z_lg(dest->as_register_lo(), disp_value, disp_reg, src); break;
+    default      : ShouldNotReachHere();
+  }
+  if (type == T_ARRAY || type == T_OBJECT) {
+    __ verify_oop(dest->as_register());
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, src, info);
+  }
+  if (info != NULL) add_debug_info_for_null_check(offset, info);
+}
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  assert(src->is_stack(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  if (dest->is_single_cpu()) {
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true);
+      __ verify_oop(dest->as_register());
+    } else if (type == T_METADATA) {
+      __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true);
+    } else {
+      __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), false);
+    }
+  } else if (dest->is_double_cpu()) {
+    Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix());
+    __ mem2reg_opt(dest->as_register_lo(), src_addr_LO, true);
+  } else if (dest->is_single_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
+    __ mem2freg_opt(dest->as_float_reg(), src_addr, false);
+  } else if (dest->is_double_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
+    __ mem2freg_opt(dest->as_double_reg(), src_addr, true);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_stack(), "should not call otherwise");
+
+  if (src->is_single_cpu()) {
+    const Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
+    if (type == T_OBJECT || type == T_ARRAY) {
+      __ verify_oop(src->as_register());
+      __ reg2mem_opt(src->as_register(), dst, true);
+    } else if (type == T_METADATA) {
+      __ reg2mem_opt(src->as_register(), dst, true);
+    } else {
+      __ reg2mem_opt(src->as_register(), dst, false);
+    }
+  } else if (src->is_double_cpu()) {
+    Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix());
+    __ reg2mem_opt(src->as_register_lo(), dstLO, true);
+  } else if (src->is_single_fpu()) {
+    Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+    __ freg2mem_opt(src->as_float_reg(), dst_addr, false);
+  } else if (src->is_double_fpu()) {
+    Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+    __ freg2mem_opt(src->as_double_reg(), dst_addr, true);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) {
+  if (from_reg->is_float_kind() && to_reg->is_float_kind()) {
+    if (from_reg->is_double_fpu()) {
+      // double to double moves
+      assert(to_reg->is_double_fpu(), "should match");
+      __ z_ldr(to_reg->as_double_reg(), from_reg->as_double_reg());
+    } else {
+      // float to float moves
+      assert(to_reg->is_single_fpu(), "should match");
+      __ z_ler(to_reg->as_float_reg(), from_reg->as_float_reg());
+    }
+  } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) {
+    if (from_reg->is_double_cpu()) {
+      __ z_lgr(to_reg->as_pointer_register(), from_reg->as_pointer_register());
+    } else if (to_reg->is_double_cpu()) {
+      // int to int moves
+      __ z_lgr(to_reg->as_register_lo(), from_reg->as_register());
+    } else {
+      // int to int moves
+      __ z_lgr(to_reg->as_register(), from_reg->as_register());
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) {
+    __ verify_oop(to_reg->as_register());
+  }
+}
+
+void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type,
+                            LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack,
+                            bool wide, bool unaligned) {
+  assert(type != T_METADATA, "store of metadata ptr not supported");
+  LIR_Address* addr = dest_opr->as_address_ptr();
+
+  Register dest = addr->base()->as_pointer_register();
+  Register disp_reg = Z_R0;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+
+  if (addr->base()->is_oop_register()) {
+    __ verify_oop(dest);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!from->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  assert(!needs_patching || (!Immediate::is_simm20(disp_value) && addr->index()->is_illegal()), "assumption");
+  if (addr->index()->is_illegal()) {
+    if (!Immediate::is_simm20(disp_value)) {
+      if (needs_patching) {
+        __ load_const(Z_R1_scratch, (intptr_t)0);
+      } else {
+        __ load_const_optimized(Z_R1_scratch, disp_value);
+      }
+      disp_reg = Z_R1_scratch;
+      disp_value = 0;
+    }
+  } else {
+    if (!Immediate::is_simm20(disp_value)) {
+      __ load_const_optimized(Z_R1_scratch, disp_value);
+      __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register());
+      disp_reg = Z_R1_scratch;
+      disp_value = 0;
+    }
+    disp_reg = addr->index()->as_pointer_register();
+  }
+
+  assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up");
+
+  if (type == T_ARRAY || type == T_OBJECT) {
+    __ verify_oop(from->as_register());
+  }
+
+  bool short_disp = Immediate::is_uimm12(disp_value);
+
+  // Remember the offset of the store. The patching_epilog must be done
+  // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset = code_offset();
+  switch (type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  :
+      if (short_disp) {
+                    __ z_stc(from->as_register(),  disp_value, disp_reg, dest);
+      } else {
+                    __ z_stcy(from->as_register(), disp_value, disp_reg, dest);
+      }
+      break;
+    case T_CHAR  : // fall through
+    case T_SHORT :
+      if (short_disp) {
+                    __ z_sth(from->as_register(),  disp_value, disp_reg, dest);
+      } else {
+                    __ z_sthy(from->as_register(), disp_value, disp_reg, dest);
+      }
+      break;
+    case T_INT   :
+      if (short_disp) {
+                    __ z_st(from->as_register(),  disp_value, disp_reg, dest);
+      } else {
+                    __ z_sty(from->as_register(), disp_value, disp_reg, dest);
+      }
+      break;
+    case T_LONG  :  __ z_stg(from->as_register_lo(), disp_value, disp_reg, dest); break;
+    case T_ADDRESS: __ z_stg(from->as_register(),    disp_value, disp_reg, dest); break;
+      break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+      {
+        if (UseCompressedOops && !wide) {
+          Register compressed_src = Z_R14;
+          __ z_lgr(compressed_src, from->as_register());
+          __ encode_heap_oop(compressed_src);
+          offset = code_offset();
+          if (short_disp) {
+            __ z_st(compressed_src,  disp_value, disp_reg, dest);
+          } else {
+            __ z_sty(compressed_src, disp_value, disp_reg, dest);
+          }
+        } else {
+          __ z_stg(from->as_register(), disp_value, disp_reg, dest);
+        }
+        break;
+      }
+    case T_FLOAT :
+      if (short_disp) {
+                    __ z_ste(from->as_float_reg(),  disp_value, disp_reg, dest);
+      } else {
+                    __ z_stey(from->as_float_reg(), disp_value, disp_reg, dest);
+      }
+      break;
+    case T_DOUBLE:
+      if (short_disp) {
+                    __ z_std(from->as_double_reg(),  disp_value, disp_reg, dest);
+      } else {
+                    __ z_stdy(from->as_double_reg(), disp_value, disp_reg, dest);
+      }
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, dest, info);
+  }
+
+  if (info != NULL) add_debug_info_for_null_check(offset, info);
+}
+
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  assert(result->is_illegal() ||
+         (result->is_single_cpu() && result->as_register() == Z_R2) ||
+         (result->is_double_cpu() && result->as_register_lo() == Z_R2) ||
+         (result->is_single_fpu() && result->as_float_reg() == Z_F0) ||
+         (result->is_double_fpu() && result->as_double_reg() == Z_F0), "convention");
+
+  AddressLiteral pp(os::get_polling_page());
+  __ load_const_optimized(Z_R1_scratch, pp);
+
+  // Pop the frame before the safepoint code.
+  int retPC_offset = initial_frame_size_in_bytes() + _z_abi16(return_pc);
+  if (Displacement::is_validDisp(retPC_offset)) {
+    __ z_lg(Z_R14, retPC_offset, Z_SP);
+    __ add2reg(Z_SP, initial_frame_size_in_bytes());
+  } else {
+    __ add2reg(Z_SP, initial_frame_size_in_bytes());
+    __ restore_return_pc();
+  }
+
+  // We need to mark the code position where the load from the safepoint
+  // polling page was emitted as relocInfo::poll_return_type here.
+  __ relocate(relocInfo::poll_return_type);
+  __ load_from_polling_page(Z_R1_scratch);
+
+  __ z_br(Z_R14); // Return to caller.
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+  AddressLiteral pp(os::get_polling_page());
+  __ load_const_optimized(tmp->as_register_lo(), pp);
+  guarantee(info != NULL, "Shouldn't be NULL");
+  add_debug_info_for_branch(info);
+  int offset = __ offset();
+  __ relocate(relocInfo::poll_type);
+  __ load_from_polling_page(tmp->as_register_lo());
+  return offset;
+}
+
+void LIR_Assembler::emit_static_call_stub() {
+
+  // Stub is fixed up when the corresponding call is converted from calling
+  // compiled code to calling interpreted code.
+
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  int start = __ offset();
+
+  __ relocate(static_stub_Relocation::spec(call_pc));
+
+  // See also Matcher::interpreter_method_oop_reg().
+  AddressLiteral meta = __ allocate_metadata_address(NULL);
+  bool success = __ load_const_from_toc(Z_method, meta);
+
+  __ set_inst_mark();
+  AddressLiteral a((address)-1);
+  success = success && __ load_const_from_toc(Z_R1, a);
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+
+  __ z_br(Z_R1);
+  assert(__ offset() - start <= call_stub_size, "stub too big");
+  __ end_a_stub(); // Update current stubs pointer and restore insts_end.
+}
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  bool unsigned_comp = condition == lir_cond_belowEqual || condition == lir_cond_aboveEqual;
+  if (opr1->is_single_cpu()) {
+    Register reg1 = opr1->as_register();
+    if (opr2->is_single_cpu()) {
+      // cpu register - cpu register
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+        __ z_clgr(reg1, opr2->as_register());
+      } else {
+        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+        if (unsigned_comp) {
+          __ z_clr(reg1, opr2->as_register());
+        } else {
+          __ z_cr(reg1, opr2->as_register());
+        }
+      }
+    } else if (opr2->is_stack()) {
+      // cpu register - stack
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+        __ z_cg(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+      } else {
+        if (unsigned_comp) {
+          __ z_cly(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+        } else {
+          __ z_cy(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+        }
+      }
+    } else if (opr2->is_constant()) {
+      // cpu register - constant
+      LIR_Const* c = opr2->as_constant_ptr();
+      if (c->type() == T_INT) {
+        if (unsigned_comp) {
+          __ z_clfi(reg1, c->as_jint());
+        } else {
+          __ z_cfi(reg1, c->as_jint());
+        }
+      } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
+        // In 64bit oops are single register.
+        jobject o = c->as_jobject();
+        if (o == NULL) {
+          __ z_ltgr(reg1, reg1);
+        } else {
+          jobject2reg(o, Z_R1_scratch);
+          __ z_cgr(reg1, Z_R1_scratch);
+        }
+      } else {
+        fatal("unexpected type: %s", basictype_to_str(c->type()));
+      }
+      // cpu register - address
+    } else if (opr2->is_address()) {
+      if (op->info() != NULL) {
+        add_debug_info_for_null_check_here(op->info());
+      }
+      if (unsigned_comp) {
+        __ z_cly(reg1, as_Address(opr2->as_address_ptr()));
+      } else {
+        __ z_cy(reg1, as_Address(opr2->as_address_ptr()));
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (opr1->is_double_cpu()) {
+    assert(!unsigned_comp, "unexpected");
+    Register xlo = opr1->as_register_lo();
+    Register xhi = opr1->as_register_hi();
+    if (opr2->is_double_cpu()) {
+      __ z_cgr(xlo, opr2->as_register_lo());
+    } else if (opr2->is_constant()) {
+      // cpu register - constant 0
+      assert(opr2->as_jlong() == (jlong)0, "only handles zero");
+      __ z_ltgr(xlo, xlo);
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (opr1->is_single_fpu()) {
+    if (opr2->is_single_fpu()) {
+      __ z_cebr(opr1->as_float_reg(), opr2->as_float_reg());
+    } else {
+      // stack slot
+      Address addr = frame_map()->address_for_slot(opr2->single_stack_ix());
+      if (Immediate::is_uimm12(addr.disp())) {
+        __ z_ceb(opr1->as_float_reg(), addr);
+      } else {
+        __ z_ley(Z_fscratch_1, addr);
+        __ z_cebr(opr1->as_float_reg(), Z_fscratch_1);
+      }
+    }
+  } else if (opr1->is_double_fpu()) {
+    if (opr2->is_double_fpu()) {
+    __ z_cdbr(opr1->as_double_reg(), opr2->as_double_reg());
+    } else {
+      // stack slot
+      Address addr = frame_map()->address_for_slot(opr2->double_stack_ix());
+      if (Immediate::is_uimm12(addr.disp())) {
+        __ z_cdb(opr1->as_double_reg(), addr);
+      } else {
+        __ z_ldy(Z_fscratch_1, addr);
+        __ z_cdbr(opr1->as_double_reg(), Z_fscratch_1);
+      }
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
+  Label    done;
+  Register dreg = dst->as_register();
+
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    assert((left->is_single_fpu() && right->is_single_fpu()) ||
+           (left->is_double_fpu() && right->is_double_fpu()), "unexpected operand types");
+    bool is_single = left->is_single_fpu();
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    FloatRegister lreg = is_single ? left->as_float_reg() : left->as_double_reg();
+    FloatRegister rreg = is_single ? right->as_float_reg() : right->as_double_reg();
+    if (is_single) {
+      __ z_cebr(lreg, rreg);
+    } else {
+      __ z_cdbr(lreg, rreg);
+    }
+    if (VM_Version::has_LoadStoreConditional()) {
+      Register one       = Z_R0_scratch;
+      Register minus_one = Z_R1_scratch;
+      __ z_lghi(minus_one, -1);
+      __ z_lghi(one,  1);
+      __ z_lghi(dreg, 0);
+      __ z_locgr(dreg, one,       is_unordered_less ? Assembler::bcondHigh            : Assembler::bcondHighOrNotOrdered);
+      __ z_locgr(dreg, minus_one, is_unordered_less ? Assembler::bcondLowOrNotOrdered : Assembler::bcondLow);
+    } else {
+      __ clear_reg(dreg, true, false);
+      __ z_bre(done); // if (left == right) dst = 0
+
+      // if (left > right || ((code ~= cmpg) && (left <> right)) dst := 1
+      __ z_lhi(dreg, 1);
+      __ z_brc(is_unordered_less ? Assembler::bcondHigh : Assembler::bcondHighOrNotOrdered, done);
+
+      // if (left < right || ((code ~= cmpl) && (left <> right)) dst := -1
+      __ z_lhi(dreg, -1);
+    }
+  } else {
+    assert(code == lir_cmp_l2i, "check");
+    if (VM_Version::has_LoadStoreConditional()) {
+      Register one       = Z_R0_scratch;
+      Register minus_one = Z_R1_scratch;
+      __ z_cgr(left->as_register_lo(), right->as_register_lo());
+      __ z_lghi(minus_one, -1);
+      __ z_lghi(one,  1);
+      __ z_lghi(dreg, 0);
+      __ z_locgr(dreg, one, Assembler::bcondHigh);
+      __ z_locgr(dreg, minus_one, Assembler::bcondLow);
+    } else {
+      __ z_cgr(left->as_register_lo(), right->as_register_lo());
+      __ z_lghi(dreg,  0);     // eq value
+      __ z_bre(done);
+      __ z_lghi(dreg,  1);     // gt value
+      __ z_brh(done);
+      __ z_lghi(dreg, -1);     // lt value
+    }
+  }
+  __ bind(done);
+}
+
+// result = condition ? opr1 : opr2
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+  Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
+  switch (condition) {
+    case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
+    case lir_cond_notEqual:     acond = Assembler::bcondNotEqual; ncond = Assembler::bcondEqual;    break;
+    case lir_cond_less:         acond = Assembler::bcondLow;      ncond = Assembler::bcondNotLow;   break;
+    case lir_cond_lessEqual:    acond = Assembler::bcondNotHigh;  ncond = Assembler::bcondHigh;     break;
+    case lir_cond_greaterEqual: acond = Assembler::bcondNotLow;   ncond = Assembler::bcondLow;      break;
+    case lir_cond_greater:      acond = Assembler::bcondHigh;     ncond = Assembler::bcondNotHigh;  break;
+    case lir_cond_belowEqual:   acond = Assembler::bcondNotHigh;  ncond = Assembler::bcondHigh;     break;
+    case lir_cond_aboveEqual:   acond = Assembler::bcondNotLow;   ncond = Assembler::bcondLow;      break;
+    default:                    ShouldNotReachHere();
+  }
+
+  if (opr1->is_cpu_register()) {
+    reg2reg(opr1, result);
+  } else if (opr1->is_stack()) {
+    stack2reg(opr1, result, result->type());
+  } else if (opr1->is_constant()) {
+    const2reg(opr1, result, lir_patch_none, NULL);
+  } else {
+    ShouldNotReachHere();
+  }
+
+  if (VM_Version::has_LoadStoreConditional() && !opr2->is_constant()) {
+    // Optimized version that does not require a branch.
+    if (opr2->is_single_cpu()) {
+      assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move");
+      __ z_locgr(result->as_register(), opr2->as_register(), ncond);
+    } else if (opr2->is_double_cpu()) {
+      assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
+      assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
+      __ z_locgr(result->as_register_lo(), opr2->as_register_lo(), ncond);
+    } else if (opr2->is_single_stack()) {
+      __ z_loc(result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()), ncond);
+    } else if (opr2->is_double_stack()) {
+      __ z_locg(result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix()), ncond);
+    } else {
+      ShouldNotReachHere();
+    }
+  } else {
+    Label skip;
+    __ z_brc(acond, skip);
+    if (opr2->is_cpu_register()) {
+      reg2reg(opr2, result);
+    } else if (opr2->is_stack()) {
+      stack2reg(opr2, result, result->type());
+    } else if (opr2->is_constant()) {
+      const2reg(opr2, result, lir_patch_none, NULL);
+    } else {
+      ShouldNotReachHere();
+    }
+    __ bind(skip);
+  }
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                             CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+  if (left->is_single_cpu()) {
+    assert(left == dest, "left and dest must be equal");
+    Register lreg = left->as_register();
+
+    if (right->is_single_cpu()) {
+      // cpu register - cpu register
+      Register rreg = right->as_register();
+      switch (code) {
+        case lir_add: __ z_ar (lreg, rreg); break;
+        case lir_sub: __ z_sr (lreg, rreg); break;
+        case lir_mul: __ z_msr(lreg, rreg); break;
+        default: ShouldNotReachHere();
+      }
+
+    } else if (right->is_stack()) {
+      // cpu register - stack
+      Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
+      switch (code) {
+        case lir_add: __ z_ay(lreg, raddr); break;
+        case lir_sub: __ z_sy(lreg, raddr); break;
+        default: ShouldNotReachHere();
+      }
+
+    } else if (right->is_constant()) {
+      // cpu register - constant
+      jint c = right->as_constant_ptr()->as_jint();
+      switch (code) {
+        case lir_add: __ z_agfi(lreg, c);  break;
+        case lir_sub: __ z_agfi(lreg, -c); break; // note: -min_jint == min_jint
+        case lir_mul: __ z_msfi(lreg, c);  break;
+        default: ShouldNotReachHere();
+      }
+
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (left->is_double_cpu()) {
+    assert(left == dest, "left and dest must be equal");
+    Register lreg_lo = left->as_register_lo();
+    Register lreg_hi = left->as_register_hi();
+
+    if (right->is_double_cpu()) {
+      // cpu register - cpu register
+      Register rreg_lo = right->as_register_lo();
+      Register rreg_hi = right->as_register_hi();
+      assert_different_registers(lreg_lo, rreg_lo);
+      switch (code) {
+        case lir_add:
+          __ z_agr(lreg_lo, rreg_lo);
+          break;
+        case lir_sub:
+          __ z_sgr(lreg_lo, rreg_lo);
+          break;
+        case lir_mul:
+          __ z_msgr(lreg_lo, rreg_lo);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+
+    } else if (right->is_constant()) {
+      // cpu register - constant
+      jlong c = right->as_constant_ptr()->as_jlong_bits();
+      switch (code) {
+        case lir_add: __ z_agfi(lreg_lo, c); break;
+        case lir_sub:
+          if (c != min_jint) {
+                      __ z_agfi(lreg_lo, -c);
+          } else {
+            // -min_jint cannot be represented as simm32 in z_agfi
+            // min_jint sign extended:      0xffffffff80000000
+            // -min_jint as 64 bit integer: 0x0000000080000000
+            // 0x80000000 can be represented as uimm32 in z_algfi
+            // lreg_lo := lreg_lo + -min_jint == lreg_lo + 0x80000000
+                      __ z_algfi(lreg_lo, UCONST64(0x80000000));
+          }
+          break;
+        case lir_mul: __ z_msgfi(lreg_lo, c); break;
+        default:
+          ShouldNotReachHere();
+      }
+
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (left->is_single_fpu()) {
+    assert(left == dest, "left and dest must be equal");
+    FloatRegister lreg = left->as_float_reg();
+    FloatRegister rreg = right->is_single_fpu() ? right->as_float_reg() : fnoreg;
+    Address raddr;
+
+    if (rreg == fnoreg) {
+      assert(right->is_single_stack(), "constants should be loaded into register");
+      raddr = frame_map()->address_for_slot(right->single_stack_ix());
+      if (!Immediate::is_uimm12(raddr.disp())) {
+        __ mem2freg_opt(rreg = Z_fscratch_1, raddr, false);
+      }
+    }
+
+    if (rreg != fnoreg) {
+      switch (code) {
+        case lir_add: __ z_aebr(lreg, rreg);  break;
+        case lir_sub: __ z_sebr(lreg, rreg);  break;
+        case lir_mul_strictfp: // fall through
+        case lir_mul: __ z_meebr(lreg, rreg); break;
+        case lir_div_strictfp: // fall through
+        case lir_div: __ z_debr(lreg, rreg);  break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      switch (code) {
+        case lir_add: __ z_aeb(lreg, raddr);  break;
+        case lir_sub: __ z_seb(lreg, raddr);  break;
+        case lir_mul_strictfp: // fall through
+        case lir_mul: __ z_meeb(lreg, raddr);  break;
+        case lir_div_strictfp: // fall through
+        case lir_div: __ z_deb(lreg, raddr);  break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else if (left->is_double_fpu()) {
+    assert(left == dest, "left and dest must be equal");
+    FloatRegister lreg = left->as_double_reg();
+    FloatRegister rreg = right->is_double_fpu() ? right->as_double_reg() : fnoreg;
+    Address raddr;
+
+    if (rreg == fnoreg) {
+      assert(right->is_double_stack(), "constants should be loaded into register");
+      raddr = frame_map()->address_for_slot(right->double_stack_ix());
+      if (!Immediate::is_uimm12(raddr.disp())) {
+        __ mem2freg_opt(rreg = Z_fscratch_1, raddr, true);
+      }
+    }
+
+    if (rreg != fnoreg) {
+      switch (code) {
+        case lir_add: __ z_adbr(lreg, rreg); break;
+        case lir_sub: __ z_sdbr(lreg, rreg); break;
+        case lir_mul_strictfp: // fall through
+        case lir_mul: __ z_mdbr(lreg, rreg); break;
+        case lir_div_strictfp: // fall through
+        case lir_div: __ z_ddbr(lreg, rreg); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      switch (code) {
+        case lir_add: __ z_adb(lreg, raddr); break;
+        case lir_sub: __ z_sdb(lreg, raddr); break;
+        case lir_mul_strictfp: // fall through
+        case lir_mul: __ z_mdb(lreg, raddr); break;
+        case lir_div_strictfp: // fall through
+        case lir_div: __ z_ddb(lreg, raddr); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else if (left->is_address()) {
+    assert(left == dest, "left and dest must be equal");
+    assert(code == lir_add, "unsupported operation");
+    assert(right->is_constant(), "unsupported operand");
+    jint c = right->as_constant_ptr()->as_jint();
+    LIR_Address* lir_addr = left->as_address_ptr();
+    Address addr = as_Address(lir_addr);
+    switch (lir_addr->type()) {
+      case T_INT:
+        __ add2mem_32(addr, c, Z_R1_scratch);
+        break;
+      case T_LONG:
+        __ add2mem_64(addr, c, Z_R1_scratch);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::fpop() {
+  // do nothing
+}
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
+  switch (code) {
+    case lir_sqrt: {
+      assert(!thread->is_valid(), "there is no need for a thread_reg for dsqrt");
+      FloatRegister src_reg = value->as_double_reg();
+      FloatRegister dst_reg = dest->as_double_reg();
+      __ z_sqdbr(dst_reg, src_reg);
+      break;
+    }
+    case lir_abs: {
+      assert(!thread->is_valid(), "there is no need for a thread_reg for fabs");
+      FloatRegister src_reg = value->as_double_reg();
+      FloatRegister dst_reg = dest->as_double_reg();
+      __ z_lpdbr(dst_reg, src_reg);
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+  if (left->is_single_cpu()) {
+    Register reg = left->as_register();
+    if (right->is_constant()) {
+      int val = right->as_constant_ptr()->as_jint();
+      switch (code) {
+        case lir_logic_and: __ z_nilf(reg, val); break;
+        case lir_logic_or:  __ z_oilf(reg, val); break;
+        case lir_logic_xor: __ z_xilf(reg, val); break;
+        default: ShouldNotReachHere();
+      }
+    } else if (right->is_stack()) {
+      Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
+      switch (code) {
+        case lir_logic_and: __ z_ny(reg, raddr); break;
+        case lir_logic_or:  __ z_oy(reg, raddr); break;
+        case lir_logic_xor: __ z_xy(reg, raddr); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register rright = right->as_register();
+      switch (code) {
+        case lir_logic_and: __ z_nr(reg, rright); break;
+        case lir_logic_or : __ z_or(reg, rright); break;
+        case lir_logic_xor: __ z_xr(reg, rright); break;
+        default: ShouldNotReachHere();
+      }
+    }
+    move_regs(reg, dst->as_register());
+  } else {
+    Register l_lo = left->as_register_lo();
+    if (right->is_constant()) {
+      __ load_const_optimized(Z_R1_scratch, right->as_constant_ptr()->as_jlong());
+      switch (code) {
+        case lir_logic_and:
+          __ z_ngr(l_lo, Z_R1_scratch);
+          break;
+        case lir_logic_or:
+          __ z_ogr(l_lo, Z_R1_scratch);
+          break;
+        case lir_logic_xor:
+          __ z_xgr(l_lo, Z_R1_scratch);
+          break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register r_lo;
+      if (right->type() == T_OBJECT || right->type() == T_ARRAY) {
+        r_lo = right->as_register();
+      } else {
+        r_lo = right->as_register_lo();
+      }
+      switch (code) {
+        case lir_logic_and:
+          __ z_ngr(l_lo, r_lo);
+          break;
+        case lir_logic_or:
+          __ z_ogr(l_lo, r_lo);
+          break;
+        case lir_logic_xor:
+          __ z_xgr(l_lo, r_lo);
+          break;
+        default: ShouldNotReachHere();
+      }
+    }
+
+    Register dst_lo = dst->as_register_lo();
+
+    move_regs(l_lo, dst_lo);
+  }
+}
+
+// See operand selection in LIRGenerator::do_ArithmeticOp_Int().
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
+  if (left->is_double_cpu()) {
+    // 64 bit integer case
+    assert(left->is_double_cpu(), "left must be register");
+    assert(right->is_double_cpu() || is_power_of_2_long(right->as_jlong()),
+           "right must be register or power of 2 constant");
+    assert(result->is_double_cpu(), "result must be register");
+
+    Register lreg = left->as_register_lo();
+    Register dreg = result->as_register_lo();
+
+    if (right->is_constant()) {
+      // Convert division by a power of two into some shifts and logical operations.
+      Register treg1 = Z_R0_scratch;
+      Register treg2 = Z_R1_scratch;
+      jlong divisor = right->as_jlong();
+      jlong log_divisor = log2_long(right->as_jlong());
+
+      if (divisor == min_jlong) {
+        // Min_jlong is special. Result is '0' except for min_jlong/min_jlong = 1.
+        if (dreg == lreg) {
+          NearLabel done;
+          __ load_const_optimized(treg2, min_jlong);
+          __ z_cgr(lreg, treg2);
+          __ z_lghi(dreg, 0);           // Preserves condition code.
+          __ z_brne(done);
+          __ z_lghi(dreg, 1);           // min_jlong / min_jlong = 1
+          __ bind(done);
+        } else {
+          assert_different_registers(dreg, lreg);
+          NearLabel done;
+          __ z_lghi(dreg, 0);
+          __ compare64_and_branch(lreg, min_jlong, Assembler::bcondNotEqual, done);
+          __ z_lghi(dreg, 1);
+          __ bind(done);
+        }
+        return;
+      }
+      __ move_reg_if_needed(dreg, T_LONG, lreg, T_LONG);
+      if (divisor == 2) {
+        __ z_srlg(treg2, dreg, 63);     // dividend < 0 ? 1 : 0
+      } else {
+        __ z_srag(treg2, dreg, 63);     // dividend < 0 ? -1 : 0
+        __ and_imm(treg2, divisor - 1, treg1, true);
+      }
+      if (code == lir_idiv) {
+        __ z_agr(dreg, treg2);
+        __ z_srag(dreg, dreg, log_divisor);
+      } else {
+        assert(code == lir_irem, "check");
+        __ z_agr(treg2, dreg);
+        __ and_imm(treg2, ~(divisor - 1), treg1, true);
+        __ z_sgr(dreg, treg2);
+      }
+      return;
+    }
+
+    // Divisor is not a power of 2 constant.
+    Register rreg = right->as_register_lo();
+    Register treg = temp->as_register_lo();
+    assert(right->is_double_cpu(), "right must be register");
+    assert(lreg == Z_R11, "see ldivInOpr()");
+    assert(rreg != lreg, "right register must not be same as left register");
+    assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10) ||
+           (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see ldivInOpr(), ldivOutOpr(), lremOutOpr()");
+
+    Register R1 = lreg->predecessor();
+    Register R2 = rreg;
+    assert(code != lir_idiv || lreg==dreg, "see code below");
+    if (code == lir_idiv) {
+      __ z_lcgr(lreg, lreg);
+    } else {
+      __ clear_reg(dreg, true, false);
+    }
+    NearLabel done;
+    __ compare64_and_branch(R2, -1, Assembler::bcondEqual, done);
+    if (code == lir_idiv) {
+      __ z_lcgr(lreg, lreg); // Revert lcgr above.
+    }
+    if (ImplicitDiv0Checks) {
+      // No debug info because the idiv won't trap.
+      // Add_debug_info_for_div0 would instantiate another DivByZeroStub,
+      // which is unnecessary, too.
+      add_debug_info_for_div0(__ offset(), info);
+    }
+    __ z_dsgr(R1, R2);
+    __ bind(done);
+    return;
+  }
+
+  // 32 bit integer case
+
+  assert(left->is_single_cpu(), "left must be register");
+  assert(right->is_single_cpu() || is_power_of_2(right->as_jint()), "right must be register or power of 2 constant");
+  assert(result->is_single_cpu(), "result must be register");
+
+  Register lreg = left->as_register();
+  Register dreg = result->as_register();
+
+  if (right->is_constant()) {
+    // Convert division by a power of two into some shifts and logical operations.
+    Register treg1 = Z_R0_scratch;
+    Register treg2 = Z_R1_scratch;
+    jlong divisor = right->as_jint();
+    jlong log_divisor = log2_long(right->as_jint());
+    __ move_reg_if_needed(dreg, T_LONG, lreg, T_INT); // sign extend
+    if (divisor == 2) {
+      __ z_srlg(treg2, dreg, 63);     // dividend < 0 ?  1 : 0
+    } else {
+      __ z_srag(treg2, dreg, 63);     // dividend < 0 ? -1 : 0
+      __ and_imm(treg2, divisor - 1, treg1, true);
+    }
+    if (code == lir_idiv) {
+      __ z_agr(dreg, treg2);
+      __ z_srag(dreg, dreg, log_divisor);
+    } else {
+      assert(code == lir_irem, "check");
+      __ z_agr(treg2, dreg);
+      __ and_imm(treg2, ~(divisor - 1), treg1, true);
+      __ z_sgr(dreg, treg2);
+    }
+    return;
+  }
+
+  // Divisor is not a power of 2 constant.
+  Register rreg = right->as_register();
+  Register treg = temp->as_register();
+  assert(right->is_single_cpu(), "right must be register");
+  assert(lreg == Z_R11, "left register must be rax,");
+  assert(rreg != lreg, "right register must not be same as left register");
+  assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10)
+      || (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see divInOpr(), divOutOpr(), remOutOpr()");
+
+  Register R1 = lreg->predecessor();
+  Register R2 = rreg;
+  __ move_reg_if_needed(lreg, T_LONG, lreg, T_INT); // sign extend
+  if (ImplicitDiv0Checks) {
+    // No debug info because the idiv won't trap.
+    // Add_debug_info_for_div0 would instantiate another DivByZeroStub,
+    // which is unnecessary, too.
+    add_debug_info_for_div0(__ offset(), info);
+  }
+  __ z_dsgfr(R1, R2);
+}
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  assert(exceptionOop->as_register() == Z_EXC_OOP, "should match");
+  assert(exceptionPC->as_register() == Z_EXC_PC, "should match");
+
+  // Exception object is not added to oop map by LinearScan
+  // (LinearScan assumes that no oops are in fixed registers).
+  info->add_register_oop(exceptionOop);
+
+  // Reuse the debug info from the safepoint poll for the throw op itself.
+  __ get_PC(Z_EXC_PC);
+  add_call_info(__ offset(), info); // for exception handler
+  address stub = Runtime1::entry_for (compilation()->has_fpu_code() ? Runtime1::handle_exception_id
+                                                                    : Runtime1::handle_exception_nofpu_id);
+  emit_call_c(stub);
+}
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  assert(exceptionOop->as_register() == Z_EXC_OOP, "should match");
+
+  __ branch_optimized(Assembler::bcondAlways, _unwind_handler_entry);
+}
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  ciArrayKlass* default_type = op->expected_type();
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+
+  CodeStub* stub = op->stub();
+  int flags = op->flags();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+  // If we don't know anything, just go through the generic arraycopy.
+  if (default_type == NULL) {
+    Label done;
+    // Save outgoing arguments in callee saved registers (C convention) in case
+    // a call to System.arraycopy is needed.
+    Register callee_saved_src     = Z_R10;
+    Register callee_saved_src_pos = Z_R11;
+    Register callee_saved_dst     = Z_R12;
+    Register callee_saved_dst_pos = Z_R13;
+    Register callee_saved_length  = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved.
+
+    __ lgr_if_needed(callee_saved_src, src);
+    __ lgr_if_needed(callee_saved_src_pos, src_pos);
+    __ lgr_if_needed(callee_saved_dst, dst);
+    __ lgr_if_needed(callee_saved_dst_pos, dst_pos);
+    __ lgr_if_needed(callee_saved_length, length);
+
+    // C function requires 64 bit values.
+    __ z_lgfr(src_pos, src_pos);
+    __ z_lgfr(dst_pos, dst_pos);
+    __ z_lgfr(length, length);
+
+    address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    // Pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint.
+
+    // The arguments are in the corresponding registers.
+    assert(Z_ARG1 == src,     "assumption");
+    assert(Z_ARG2 == src_pos, "assumption");
+    assert(Z_ARG3 == dst,     "assumption");
+    assert(Z_ARG4 == dst_pos, "assumption");
+    assert(Z_ARG5 == length,  "assumption");
+    if (copyfunc_addr == NULL) { // Use C version if stub was not generated.
+      emit_call_c(C_entry);
+    } else {
+#ifndef PRODUCT
+      if (PrintC1Statistics) {
+        __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_generic_arraycopystub_cnt);
+        __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+      }
+#endif
+      emit_call_c(copyfunc_addr);
+    }
+    CHECK_BAILOUT();
+
+    __ compare32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation());
+
+    if (copyfunc_addr != NULL) {
+      __ z_lgr(tmp, Z_RET);
+      __ z_xilf(tmp, -1);
+    }
+
+    // Restore values from callee saved registers so they are where the stub
+    // expects them.
+    __ lgr_if_needed(src, callee_saved_src);
+    __ lgr_if_needed(src_pos, callee_saved_src_pos);
+    __ lgr_if_needed(dst, callee_saved_dst);
+    __ lgr_if_needed(dst_pos, callee_saved_dst_pos);
+    __ lgr_if_needed(length, callee_saved_length);
+
+    if (copyfunc_addr != NULL) {
+      __ z_sr(length, tmp);
+      __ z_ar(src_pos, tmp);
+      __ z_ar(dst_pos, tmp);
+    }
+    __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
+
+  int elem_size = type2aelembytes(basic_type);
+  int shift_amount;
+
+  switch (elem_size) {
+    case 1 :
+      shift_amount = 0;
+      break;
+    case 2 :
+      shift_amount = 1;
+      break;
+    case 4 :
+      shift_amount = 2;
+      break;
+    case 8 :
+      shift_amount = 3;
+      break;
+    default:
+      shift_amount = -1;
+      ShouldNotReachHere();
+  }
+
+  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+  // Length and pos's are all sign extended at this point on 64bit.
+
+  // test for NULL
+  if (flags & LIR_OpArrayCopy::src_null_check) {
+    __ compareU64_and_branch(src, (intptr_t)0, Assembler::bcondZero, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_null_check) {
+    __ compareU64_and_branch(dst, (intptr_t)0, Assembler::bcondZero, *stub->entry());
+  }
+
+  // Check if negative.
+  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+    __ compare32_and_branch(src_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+    __ compare32_and_branch(dst_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry());
+  }
+
+  // If the compiler was not able to prove that exact type of the source or the destination
+  // of the arraycopy is an array type, check at runtime if the source or the destination is
+  // an instance type.
+  if (flags & LIR_OpArrayCopy::type_check) {
+    assert(Klass::_lh_neutral_value == 0, "or replace z_lt instructions");
+
+    if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+      __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ branch_optimized(Assembler::bcondNotLow, *stub->entry());
+    }
+
+    if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+      __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ branch_optimized(Assembler::bcondNotLow, *stub->entry());
+    }
+  }
+
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ z_la(tmp, Address(src_pos, length));
+    __ z_cl(tmp, src_length_addr);
+    __ branch_optimized(Assembler::bcondHigh, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ z_la(tmp, Address(dst_pos, length));
+    __ z_cl(tmp, dst_length_addr);
+    __ branch_optimized(Assembler::bcondHigh, *stub->entry());
+  }
+
+  if (flags & LIR_OpArrayCopy::length_positive_check) {
+    __ z_ltr(length, length);
+    __ branch_optimized(Assembler::bcondNegative, *stub->entry());
+  }
+
+  // Stubs require 64 bit values.
+  __ z_lgfr(src_pos, src_pos); // int -> long
+  __ z_lgfr(dst_pos, dst_pos); // int -> long
+  __ z_lgfr(length, length);   // int -> long
+
+  if (flags & LIR_OpArrayCopy::type_check) {
+    // We don't know the array types are compatible.
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays.
+      if (UseCompressedClassPointers) {
+        __ z_l(tmp, src_klass_addr);
+        __ z_c(tmp, dst_klass_addr);
+      } else {
+        __ z_lg(tmp, src_klass_addr);
+        __ z_cg(tmp, dst_klass_addr);
+      }
+      __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      NearLabel cont, slow;
+      Register src_klass = Z_R1_scratch;
+      Register dst_klass = Z_R10;
+
+      __ load_klass(src_klass, src);
+      __ load_klass(dst_klass, dst);
+
+      __ check_klass_subtype_fast_path(src_klass, dst_klass, tmp, &cont, &slow, NULL);
+
+      store_parameter(src_klass, 0); // sub
+      store_parameter(dst_klass, 1); // super
+      emit_call_c(Runtime1::entry_for (Runtime1::slow_subtype_check_id));
+      CHECK_BAILOUT();
+      // Sets condition code 0 for match (2 otherwise).
+      __ branch_optimized(Assembler::bcondEqual, cont);
+
+      __ bind(slow);
+
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+      if (copyfunc_addr != NULL) { // use stub if available
+        // Src is not a sub class of dst so we have to do a
+        // per-element check.
+
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          // Check that at least both of them object arrays.
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+          Address klass_lh_addr(tmp, Klass::layout_helper_offset());
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ load_const_optimized(Z_R1_scratch, objArray_lh);
+          __ z_c(Z_R1_scratch, klass_lh_addr);
+          __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+        }
+
+        // Save outgoing arguments in callee saved registers (C convention) in case
+        // a call to System.arraycopy is needed.
+        Register callee_saved_src     = Z_R10;
+        Register callee_saved_src_pos = Z_R11;
+        Register callee_saved_dst     = Z_R12;
+        Register callee_saved_dst_pos = Z_R13;
+        Register callee_saved_length  = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved.
+
+        __ lgr_if_needed(callee_saved_src, src);
+        __ lgr_if_needed(callee_saved_src_pos, src_pos);
+        __ lgr_if_needed(callee_saved_dst, dst);
+        __ lgr_if_needed(callee_saved_dst_pos, dst_pos);
+        __ lgr_if_needed(callee_saved_length, length);
+
+        __ z_llgfr(length, length); // Higher 32bits must be null.
+
+        __ z_sllg(Z_ARG1, src_pos, shift_amount); // index -> byte offset
+        __ z_sllg(Z_ARG2, dst_pos, shift_amount); // index -> byte offset
+
+        __ z_la(Z_ARG1, Address(src, Z_ARG1, arrayOopDesc::base_offset_in_bytes(basic_type)));
+        assert_different_registers(Z_ARG1, dst, dst_pos, length);
+        __ z_la(Z_ARG2, Address(dst, Z_ARG2, arrayOopDesc::base_offset_in_bytes(basic_type)));
+        assert_different_registers(Z_ARG2, dst, length);
+
+        __ z_lgr(Z_ARG3, length);
+        assert_different_registers(Z_ARG3, dst);
+
+        __ load_klass(Z_ARG5, dst);
+        __ z_lg(Z_ARG5, Address(Z_ARG5, ObjArrayKlass::element_klass_offset()));
+        __ z_lg(Z_ARG4, Address(Z_ARG5, Klass::super_check_offset_offset()));
+        emit_call_c(copyfunc_addr);
+        CHECK_BAILOUT();
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          NearLabel failed;
+          __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, failed);
+          __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_cnt);
+          __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+          __ bind(failed);
+        }
+#endif
+
+        __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
+          __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+        }
+#endif
+
+        __ z_lgr(tmp, Z_RET);
+        __ z_xilf(tmp, -1);
+
+        // Restore previously spilled arguments
+        __ lgr_if_needed(src, callee_saved_src);
+        __ lgr_if_needed(src_pos, callee_saved_src_pos);
+        __ lgr_if_needed(dst, callee_saved_dst);
+        __ lgr_if_needed(dst_pos, callee_saved_dst_pos);
+        __ lgr_if_needed(length, callee_saved_length);
+
+        __ z_sr(length, tmp);
+        __ z_ar(src_pos, tmp);
+        __ z_ar(dst_pos, tmp);
+      }
+
+      __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+
+      __ bind(cont);
+    }
+  }
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class. For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type. For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    NearLabel known_ok, halt;
+    metadata2reg(default_type->constant_encoding(), tmp);
+    if (UseCompressedClassPointers) {
+      __ encode_klass_not_null(tmp);
+    }
+
+    if (basic_type != T_OBJECT) {
+      if (UseCompressedClassPointers)         { __ z_c (tmp, dst_klass_addr); }
+      else                                    { __ z_cg(tmp, dst_klass_addr); }
+      __ branch_optimized(Assembler::bcondNotEqual, halt);
+      if (UseCompressedClassPointers)         { __ z_c (tmp, src_klass_addr); }
+      else                                    { __ z_cg(tmp, src_klass_addr); }
+      __ branch_optimized(Assembler::bcondEqual, known_ok);
+    } else {
+      if (UseCompressedClassPointers)         { __ z_c (tmp, dst_klass_addr); }
+      else                                    { __ z_cg(tmp, dst_klass_addr); }
+      __ branch_optimized(Assembler::bcondEqual, known_ok);
+      __ compareU64_and_branch(src, dst, Assembler::bcondEqual, known_ok);
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ load_const_optimized(Z_R1_scratch, Runtime1::arraycopy_count_address(basic_type));
+    __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+  }
+#endif
+
+  __ z_sllg(tmp, src_pos, shift_amount); // index -> byte offset
+  __ z_sllg(Z_R1_scratch, dst_pos, shift_amount); // index -> byte offset
+
+  assert_different_registers(Z_ARG1, dst, dst_pos, length);
+  __ z_la(Z_ARG1, Address(src, tmp, arrayOopDesc::base_offset_in_bytes(basic_type)));
+  assert_different_registers(Z_ARG2, length);
+  __ z_la(Z_ARG2, Address(dst, Z_R1_scratch, arrayOopDesc::base_offset_in_bytes(basic_type)));
+  __ lgr_if_needed(Z_ARG3, length);
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+  __ call_VM_leaf(entry);
+
+  __ bind(*stub->continuation());
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  if (dest->is_single_cpu()) {
+    if (left->type() == T_OBJECT) {
+      switch (code) {
+        case lir_shl:  __ z_sllg (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+        case lir_shr:  __ z_srag (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+        case lir_ushr: __ z_srlg (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts");
+      Register masked_count = Z_R1_scratch;
+      __ z_lr(masked_count, count->as_register());
+      __ z_nill(masked_count, 31);
+      switch (code) {
+        case lir_shl:  __ z_sllg (dest->as_register(), left->as_register(), 0, masked_count); break;
+        case lir_shr:  __ z_sra  (dest->as_register(), 0, masked_count); break;
+        case lir_ushr: __ z_srl  (dest->as_register(), 0, masked_count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else {
+    switch (code) {
+      case lir_shl:  __ z_sllg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+      case lir_shr:  __ z_srag (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+      case lir_ushr: __ z_srlg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+      default: ShouldNotReachHere();
+    }
+  }
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+  if (left->type() == T_OBJECT) {
+    count = count & 63;  // Shouldn't shift by more than sizeof(intptr_t).
+    Register l = left->as_register();
+    Register d = dest->as_register_lo();
+    switch (code) {
+      case lir_shl:  __ z_sllg (d, l, count); break;
+      case lir_shr:  __ z_srag (d, l, count); break;
+      case lir_ushr: __ z_srlg (d, l, count); break;
+      default: ShouldNotReachHere();
+    }
+    return;
+  }
+  if (dest->is_single_cpu()) {
+    assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts");
+    count = count & 0x1F; // Java spec
+    switch (code) {
+      case lir_shl:  __ z_sllg (dest->as_register(), left->as_register(), count); break;
+      case lir_shr:  __ z_sra  (dest->as_register(), count); break;
+      case lir_ushr: __ z_srl  (dest->as_register(), count); break;
+      default: ShouldNotReachHere();
+    }
+  } else if (dest->is_double_cpu()) {
+    count = count & 63; // Java spec
+    Register l = left->as_pointer_register();
+    Register d = dest->as_pointer_register();
+    switch (code) {
+      case lir_shl:  __ z_sllg (d, l, count); break;
+      case lir_shr:  __ z_srag (d, l, count); break;
+      case lir_ushr: __ z_srlg (d, l, count); break;
+      default: ShouldNotReachHere();
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    // Make sure klass is initialized & doesn't have finalizer.
+    const int state_offset = in_bytes(InstanceKlass::init_state_offset());
+    Register iklass = op->klass()->as_register();
+    add_debug_info_for_null_check_here(op->stub()->info());
+    if (Immediate::is_uimm12(state_offset)) {
+      __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized);
+    } else {
+      __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized);
+    }
+    __ branch_optimized(Assembler::bcondNotEqual, *op->stub()->entry()); // Use long branch, because slow_case might be far.
+  }
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+  __ bind(*op->stub()->continuation());
+  __ verify_oop(op->obj()->as_register());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  Register len = op->len()->as_register();
+  __ move_reg_if_needed(len, T_LONG, len, T_INT); // sign extend
+
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+    __ z_brul(*op->stub()->entry());
+  } else {
+    __ allocate_array(op->obj()->as_register(),
+                      op->len()->as_register(),
+                      op->tmp1()->as_register(),
+                      op->tmp2()->as_register(),
+                      arrayOopDesc::header_size(op->type()),
+                      type2aelembytes(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+                                        Register recv, Register tmp1, Label* update_done) {
+  uint i;
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
+    __ z_cg(recv, receiver_addr);
+    __ z_brne(next_test);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+    __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+    __ branch_optimized(Assembler::bcondAlways, *update_done);
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in.
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
+    __ z_ltg(Z_R0_scratch, recv_addr);
+    __ z_brne(next_test);
+    __ z_stg(recv, recv_addr);
+    __ load_const_optimized(tmp1, DataLayout::counter_increment);
+    __ z_stg(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)), mdo);
+    __ branch_optimized(Assembler::bcondAlways, *update_done);
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+                                    ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+  Unimplemented();
+}
+
+void LIR_Assembler::store_parameter(Register r, int param_num) {
+  assert(param_num >= 0, "invalid num");
+  int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame;
+  assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ z_stg(r, offset_in_bytes, Z_SP);
+}
+
+void LIR_Assembler::store_parameter(jint c, int param_num) {
+  assert(param_num >= 0, "invalid num");
+  int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame;
+  assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ store_const(Address(Z_SP, offset_in_bytes), c, Z_R1_scratch, true);
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  // We always need a stub for the failure case.
+  CodeStub* stub = op->stub();
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  Register Rtmp1 = Z_R1_scratch;
+  ciKlass* k = op->klass();
+
+  assert(!op->tmp3()->is_valid(), "tmp3's not needed");
+
+  // Check if it needs to be profiled.
+  ciMethodData* md = NULL;
+  ciProfileData* data = NULL;
+
+  if (op->should_profile()) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    int bci = op->profiled_bci();
+    md = method->method_data_or_null();
+    assert(md != NULL, "Sanity");
+    data = md->bci_to_data(bci);
+    assert(data != NULL,                "need data for type check");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  }
+
+  // Temp operands do not overlap with inputs, if this is their last
+  // use (end of range is exclusive), so a register conflict is possible.
+  if (obj == k_RInfo) {
+    k_RInfo = dst;
+  } else if (obj == klass_RInfo) {
+    klass_RInfo = dst;
+  }
+  assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+  if (op->should_profile()) {
+    NearLabel not_null;
+    __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondNotEqual, not_null);
+    // Object is null; update MDO and exit.
+    Register mdo = klass_RInfo;
+    metadata2reg(md->constant_encoding(), mdo);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+    int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+    __ or2mem_8(data_addr, header_bits);
+    __ branch_optimized(Assembler::bcondAlways, *obj_is_null);
+    __ bind(not_null);
+  } else {
+    __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondEqual, *obj_is_null);
+  }
+
+  NearLabel profile_cast_failure, profile_cast_success;
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+  Label *success_target = op->should_profile() ? &profile_cast_success : success;
+
+  // Patching may screw with our temporaries on sparc,
+  // so let's do it before loading the class.
+  if (k->is_loaded()) {
+    metadata2reg(k->constant_encoding(), k_RInfo);
+  } else {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  }
+  assert(obj != k_RInfo, "must be different");
+
+  __ verify_oop(obj);
+
+  // Get object class.
+  // Not a safepoint as obj null check happens earlier.
+  if (op->fast_check()) {
+    if (UseCompressedClassPointers) {
+      __ load_klass(klass_RInfo, obj);
+      __ compareU64_and_branch(k_RInfo, klass_RInfo, Assembler::bcondNotEqual, *failure_target);
+    } else {
+      __ z_cg(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
+      __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+    }
+    // Successful cast, fall through to profile or jump.
+  } else {
+    bool need_slow_path = !k->is_loaded() ||
+                          ((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset()));
+    intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L;
+    __ load_klass(klass_RInfo, obj);
+    // Perform the fast part of the checking logic.
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1,
+                                     (need_slow_path ? success_target : NULL),
+                                     failure_target, NULL,
+                                     RegisterOrConstant(super_check_offset));
+    if (need_slow_path) {
+      // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+      store_parameter(klass_RInfo, 0); // sub
+      store_parameter(k_RInfo, 1);     // super
+      emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
+      CHECK_BAILOUT();
+      __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+      // Fall through to success case.
+    }
+  }
+
+  if (op->should_profile()) {
+    Register mdo = klass_RInfo, recv = k_RInfo;
+    assert_different_registers(obj, mdo, recv);
+    __ bind(profile_cast_success);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ load_klass(recv, obj);
+    type_profile_helper(mdo, md, data, recv, Rtmp1, success);
+    __ branch_optimized(Assembler::bcondAlways, *success);
+
+    __ bind(profile_cast_failure);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1);
+    __ branch_optimized(Assembler::bcondAlways, *failure);
+  } else {
+    __ branch_optimized(Assembler::bcondAlways, *success);
+  }
+}
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = Z_R1_scratch;
+
+    CodeStub* stub = op->stub();
+
+    // Check if it needs to be profiled.
+    ciMethodData* md = NULL;
+    ciProfileData* data = NULL;
+
+    assert_different_registers(value, k_RInfo, klass_RInfo);
+
+    if (op->should_profile()) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      int bci = op->profiled_bci();
+      md = method->method_data_or_null();
+      assert(md != NULL, "Sanity");
+      data = md->bci_to_data(bci);
+      assert(data != NULL,                "need data for type check");
+      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+    }
+    NearLabel profile_cast_success, profile_cast_failure, done;
+    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+    if (op->should_profile()) {
+      NearLabel not_null;
+      __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondNotEqual, not_null);
+      // Object is null; update MDO and exit.
+      Register mdo = klass_RInfo;
+      metadata2reg(md->constant_encoding(), mdo);
+      Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ or2mem_8(data_addr, header_bits);
+      __ branch_optimized(Assembler::bcondAlways, done);
+      __ bind(not_null);
+    } else {
+      __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondEqual, done);
+    }
+
+    add_debug_info_for_null_check_here(op->info_for_exception());
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // Get instance klass (it's already uncompressed).
+    __ z_lg(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+    // Perform the fast part of the checking logic.
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+    // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+    store_parameter(klass_RInfo, 0); // sub
+    store_parameter(k_RInfo, 1);     // super
+    emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
+    CHECK_BAILOUT();
+    __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+    // Fall through to success case.
+
+    if (op->should_profile()) {
+      Register mdo = klass_RInfo, recv = k_RInfo;
+      assert_different_registers(value, mdo, recv);
+      __ bind(profile_cast_success);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ load_klass(recv, value);
+      type_profile_helper(mdo, md, data, recv, Rtmp1, &done);
+      __ branch_optimized(Assembler::bcondAlways, done);
+
+      __ bind(profile_cast_failure);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1);
+      __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+    }
+
+    __ bind(done);
+  } else {
+    if (code == lir_checkcast) {
+      Register obj = op->object()->as_register();
+      Register dst = op->result_opr()->as_register();
+      NearLabel success;
+      emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+      __ bind(success);
+      __ lgr_if_needed(dst, obj);
+    } else {
+      if (code == lir_instanceof) {
+        Register obj = op->object()->as_register();
+        Register dst = op->result_opr()->as_register();
+        NearLabel success, failure, done;
+        emit_typecheck_helper(op, &success, &failure, &failure);
+        __ bind(failure);
+        __ clear_reg(dst);
+        __ branch_optimized(Assembler::bcondAlways, done);
+        __ bind(success);
+        __ load_const_optimized(dst, 1);
+        __ bind(done);
+      } else {
+        ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  Register addr = op->addr()->as_pointer_register();
+  Register t1_cmp = Z_R1_scratch;
+  if (op->code() == lir_cas_long) {
+    assert(VM_Version::supports_cx8(), "wrong machine");
+    Register cmp_value_lo = op->cmp_value()->as_register_lo();
+    Register new_value_lo = op->new_value()->as_register_lo();
+    __ z_lgr(t1_cmp, cmp_value_lo);
+    // Perform the compare and swap operation.
+    __ z_csg(t1_cmp, new_value_lo, 0, addr);
+  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+    Register cmp_value = op->cmp_value()->as_register();
+    Register new_value = op->new_value()->as_register();
+    if (op->code() == lir_cas_obj) {
+      if (UseCompressedOops) {
+                 t1_cmp = op->tmp1()->as_register();
+        Register t2_new = op->tmp2()->as_register();
+        assert_different_registers(cmp_value, new_value, addr, t1_cmp, t2_new);
+        __ oop_encoder(t1_cmp, cmp_value, true /*maybe null*/);
+        __ oop_encoder(t2_new, new_value, true /*maybe null*/);
+        __ z_cs(t1_cmp, t2_new, 0, addr);
+      } else {
+        __ z_lgr(t1_cmp, cmp_value);
+        __ z_csg(t1_cmp, new_value, 0, addr);
+      }
+    } else {
+      __ z_lr(t1_cmp, cmp_value);
+      __ z_cs(t1_cmp, new_value, 0, addr);
+    }
+  } else {
+    ShouldNotReachHere(); // new lir_cas_??
+  }
+}
+
+void LIR_Assembler::set_24bit_FPU() {
+  ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::reset_FPU() {
+  ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::breakpoint() {
+  Unimplemented();
+  //  __ breakpoint_trap();
+}
+
+void LIR_Assembler::push(LIR_Opr opr) {
+  ShouldNotCallThis(); // unused
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+  ShouldNotCallThis(); // unused
+}
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
+  Address addr = frame_map()->address_for_monitor_lock(monitor_no);
+  __ add2reg(dst_opr->as_register(), addr.disp(), addr.base());
+}
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();  // May not be an oop.
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+  if (!UseFastLocking) {
+    __ branch_optimized(Assembler::bcondAlways, *op->stub()->entry());
+  } else if (op->code() == lir_lock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    // Add debug info for NullPointerException only if one is possible.
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check_here(op->info());
+    }
+    __ lock_object(hdr, obj, lock, *op->stub()->entry());
+    // done
+  } else if (op->code() == lir_unlock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+  } else {
+    ShouldNotReachHere();
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
+
+  // Update counter for all call types.
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+  assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register_lo();
+  metadata2reg(md->constant_encoding(), mdo);
+
+  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes.
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // Required for optimized MH invokes.
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, tmp1, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type.
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations.
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data. Select an empty slot.
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time.
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+          metadata2reg(known_klass->constant_encoding(), tmp1);
+          __ z_stg(tmp1, recv_addr);
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      NearLabel update_done;
+      type_profile_helper(mdo, md, data, recv, tmp1, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1);
+      __ bind(update_done);
+    }
+  } else {
+    // static call
+    __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1);
+  }
+}
+
+void LIR_Assembler::align_backward_branch_target() {
+  __ align(OptoLoopAlignment);
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay* op) {
+  ShouldNotCallThis(); // There are no delay slots on ZARCH_64.
+}
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  assert(left->is_register(), "can only handle registers");
+
+  if (left->is_single_cpu()) {
+    __ z_lcr(dest->as_register(), left->as_register());
+  } else if (left->is_single_fpu()) {
+    __ z_lcebr(dest->as_float_reg(), left->as_float_reg());
+  } else if (left->is_double_fpu()) {
+    __ z_lcdbr(dest->as_double_reg(), left->as_double_reg());
+  } else {
+    assert(left->is_double_cpu(), "Must be a long");
+    __ z_lcgr(dest->as_register_lo(), left->as_register_lo());
+  }
+}
+
+void LIR_Assembler::fxch(int i) {
+  ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::fld(int i) {
+  ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::ffree(int i) {
+  ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest,
+                            const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  assert(!tmp->is_valid(), "don't need temporary");
+  emit_call_c(dest);
+  CHECK_BAILOUT();
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  ShouldNotCallThis(); // not needed on ZARCH_64
+}
+
+void LIR_Assembler::membar() {
+  __ z_fence();
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ z_acquire();
+}
+
+void LIR_Assembler::membar_release() {
+  __ z_release();
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ z_acquire();
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ z_release();
+}
+
+void LIR_Assembler::membar_loadstore() {
+  __ z_acquire();
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ z_fence();
+}
+
+void LIR_Assembler::on_spin_wait() {
+  Unimplemented();
+}
+
+void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+  LIR_Address* addr = addr_opr->as_address_ptr();
+  assert(addr->scale() == LIR_Address::times_1, "scaling unsupported");
+  __ load_address(dest->as_pointer_register(), as_Address(addr));
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  ShouldNotCallThis(); // unused
+}
+
+#ifdef ASSERT
+// Emit run-time assertion.
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  Unimplemented();
+}
+#endif
+
+void LIR_Assembler::peephole(LIR_List*) {
+  // Do nothing for now.
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+  assert(code == lir_xadd, "lir_xchg not supported");
+  Address src_addr = as_Address(src->as_address_ptr());
+  Register base = src_addr.base();
+  intptr_t disp = src_addr.disp();
+  if (src_addr.index()->is_valid()) {
+    // LAA and LAAG do not support index register.
+    __ load_address(Z_R1_scratch, src_addr);
+    base = Z_R1_scratch;
+    disp = 0;
+  }
+  if (data->type() == T_INT) {
+    __ z_laa(dest->as_register(), data->as_register(), disp, base);
+  } else if (data->type() == T_LONG) {
+    assert(data->as_register_lo() == data->as_register_hi(), "should be a single register");
+    __ z_laag(dest->as_register_lo(), data->as_register_lo(), disp, base);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  Register obj = op->obj()->as_register();
+  Register tmp1 = op->tmp()->as_pointer_register();
+  Register tmp2 = Z_R1_scratch;
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none, null_seen, init_klass;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+  __ verify_oop(obj);
+
+  if (do_null || tmp1 != obj DEBUG_ONLY(|| true)) {
+    __ z_ltgr(tmp1, obj);
+  }
+  if (do_null) {
+    __ z_brnz(update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ z_lg(tmp1, mdo_addr);
+      __ z_oill(tmp1, TypeEntries::null_seen);
+      __ z_stg(tmp1, mdo_addr);
+    }
+    if (do_update) {
+      __ z_bru(next);
+    }
+  } else {
+    __ asm_assert_ne("unexpect null obj", __LINE__);
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      __ load_klass(tmp1, tmp1);
+      metadata2reg(exact_klass->constant_encoding(), tmp2);
+      __ z_cgr(tmp1, tmp2);
+      __ asm_assert_eq("exact klass and actual klass differ", __LINE__);
+    }
+#endif
+
+    Label do_update;
+    __ z_lg(tmp2, mdo_addr);
+
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          metadata2reg(exact_klass->constant_encoding(), tmp1);
+        } else {
+          __ load_klass(tmp1, tmp1);
+        }
+
+        // Klass seen before: nothing to do (regardless of unknown bit).
+        __ z_lgr(Z_R0_scratch, tmp2);
+        assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+        __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF);
+        __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next);
+
+        // Already unknown: Nothing to do anymore.
+        __ z_tmll(tmp2, TypeEntries::type_unknown);
+        __ z_brc(Assembler::bcondAllOne, next);
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ z_lgr(Z_R0_scratch, tmp2);
+          assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+          __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF);
+          __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, init_klass);
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        // Already unknown: Nothing to do anymore.
+        __ z_tmll(tmp2, TypeEntries::type_unknown);
+        __ z_brc(Assembler::bcondAllOne, next);
+      }
+
+      // Different than before. Cannot keep accurate profile.
+      __ z_oill(tmp2, TypeEntries::type_unknown);
+      __ z_bru(do_update);
+    } else {
+      // There's a single possible klass at this profile point.
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        metadata2reg(exact_klass->constant_encoding(), tmp1);
+        __ z_lgr(Z_R0_scratch, tmp2);
+        assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+        __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF);
+        __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ z_lgr(Z_R0_scratch, tmp2);
+          assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+          __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF);
+          __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, ok);
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+        }
+#endif
+
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        // Already unknown: Nothing to do anymore.
+        __ z_tmll(tmp2, TypeEntries::type_unknown);
+        __ z_brc(Assembler::bcondAllOne, next);
+        __ z_oill(tmp2, TypeEntries::type_unknown);
+        __ z_bru(do_update);
+      }
+    }
+
+    __ bind(init_klass);
+    // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+    __ z_ogr(tmp2, tmp1);
+
+    __ bind(do_update);
+    __ z_stg(tmp2, mdo_addr);
+
+    __ bind(next);
+  }
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(), "crc must be register");
+  assert(op->val()->is_single_cpu(), "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+
+  __ load_const_optimized(res, StubRoutines::crc_table_addr());
+  __ not_(crc, noreg, false); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ not_(res, crc, false); // ~crc
+}
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
+#define CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
+
+ private:
+
+  // Record the type of the receiver in ReceiverTypeData.
+  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+                           Register recv, Register tmp1, Label* update_done);
+  // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+  void setup_md_access(ciMethod* method, int bci,
+                       ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+ public:
+  address emit_call_c(address a);
+
+  void store_parameter(Register r, int param_num);
+  void store_parameter(jint     c, int param_num);
+
+  void check_reserved_argument_area(int bytes) {
+    assert(bytes + FrameMap::first_available_sp_in_frame <= frame_map()->reserved_argument_area_size(),
+           "reserved_argument_area too small");
+  }
+
+  enum {
+    call_stub_size = 512, // See Compile::MAX_stubs_size and CompiledStaticCall::emit_to_interp_stub.
+    exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(128),
+    deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(64)
+  };
+
+#endif // CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1246 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_s390.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+void LIRItem::load_byte_item() {
+  // Byte loads use same registers as other loads.
+  load_item();
+}
+
+void LIRItem::load_nonconstant(int bits) {
+  LIR_Opr r = value()->operand();
+  if (_gen->can_inline_as_constant(value(), bits)) {
+    if (!r->is_constant()) {
+      r = LIR_OprFact::value_type(value()->type());
+    }
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
+  LIR_Opr r = li.value()->operand();
+  if (r->is_constant()) {
+    // Constants get loaded with sign extend on this platform.
+    ll->move(li.result(), dst);
+  } else {
+    if (!r->is_register()) {
+      li.load_item_force(dst);
+    }
+    LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
+    ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::as_oop_opr(Z_EXC_OOP); }
+LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::as_opr(Z_EXC_PC); }
+LIR_Opr LIRGenerator::divInOpr()        { return FrameMap::Z_R11_opr; }
+LIR_Opr LIRGenerator::divOutOpr()       { return FrameMap::Z_R11_opr; }
+LIR_Opr LIRGenerator::remOutOpr()       { return FrameMap::Z_R10_opr; }
+LIR_Opr LIRGenerator::ldivInOpr()       { return FrameMap::Z_R11_long_opr; }
+LIR_Opr LIRGenerator::ldivOutOpr()      { return FrameMap::Z_R11_long_opr; }
+LIR_Opr LIRGenerator::lremOutOpr()      { return FrameMap::Z_R10_long_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
+LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::Z_R13_opr; }
+LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
+
+LIR_Opr LIRGenerator::result_register_for (ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case intTag:    opr = FrameMap::Z_R2_opr;        break;
+    case objectTag: opr = FrameMap::Z_R2_oop_opr;    break;
+    case longTag:   opr = FrameMap::Z_R2_long_opr;   break;
+    case floatTag:  opr = FrameMap::Z_F0_opr;        break;
+    case doubleTag: opr = FrameMap::Z_F0_double_opr; break;
+
+    case addressTag:
+    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  return new_register(T_INT);
+}
+
+//--------- Loading items into registers. --------------------------------
+
+// z/Architecture cannot inline all constants.
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return Immediate::is_simm16(v->type()->as_IntConstant()->value());
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return Immediate::is_simm16(v->type()->as_LongConstant()->value());
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(Value i, int bits) const {
+  if (i->type()->as_IntConstant() != NULL) {
+    return Assembler::is_simm(i->type()->as_IntConstant()->value(), bits);
+  } else if (i->type()->as_LongConstant() != NULL) {
+    return Assembler::is_simm(i->type()->as_LongConstant()->value(), bits);
+  } else {
+    return can_store_as_constant(i, as_BasicType(i->type()));
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+  if (c->type() == T_INT) {
+    return Immediate::is_simm20(c->as_jint());
+  } else   if (c->type() == T_LONG) {
+    return Immediate::is_simm20(c->as_jlong());
+  }
+  return false;
+}
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return new_register(longType);
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+  if (index->is_constant()) {
+    intptr_t large_disp = ((intx)(index->as_constant_ptr()->as_jint()) << shift) + disp;
+    if (Displacement::is_validDisp(large_disp)) {
+      return new LIR_Address(base, large_disp, type);
+    }
+    // Index is illegal so replace it with the displacement loaded into a register.
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(large_disp), index);
+    return new LIR_Address(base, index, type);
+  } else {
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    return new LIR_Address(base, index, disp, type);
+  }
+}
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type, bool needs_card_mark) {
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+
+  LIR_Address* addr;
+  if (index_opr->is_constant()) {
+    addr = new LIR_Address(array_opr,
+                           offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
+  } else {
+    if (index_opr->type() == T_INT) {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ convert(Bytecodes::_i2l, index_opr, tmp);
+      index_opr = tmp;
+    }
+    if (shift > 0) {
+      __ shift_left(index_opr, shift, index_opr);
+    }
+    addr = new LIR_Address(array_opr,
+                           index_opr,
+                           offset_in_bytes, type);
+  }
+  if (needs_card_mark) {
+    // This store will need a precise card mark, so go ahead and
+    // compute the full adddres instead of computing once for the
+    // store and again for the card mark.
+    LIR_Opr tmp = new_pointer_register();
+    __ leal(LIR_OprFact::address(addr), tmp);
+    return new LIR_Address(tmp, type);
+  } else {
+    return addr;
+  }
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r = LIR_OprFact::illegalOpr;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+  } else {
+    ShouldNotReachHere();
+  }
+  return r;
+}
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+}
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr scratch = FrameMap::Z_R1_opr;
+  __ load(new LIR_Address(base, disp, T_INT), scratch, info);
+  __ cmp(condition, scratch, c);
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+  if (tmp->is_valid()) {
+    if (is_power_of_2(c + 1)) {
+      __ move(left, tmp);
+      __ shift_left(left, log2_intptr(c + 1), left);
+      __ sub(left, tmp, result);
+      return true;
+    } else if (is_power_of_2(c - 1)) {
+      __ move(left, tmp);
+      __ shift_left(left, log2_intptr(c - 1), left);
+      __ add(left, tmp, result);
+      return true;
+    }
+  }
+  return false;
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType type = item->type();
+  __ store(item, new LIR_Address(FrameMap::Z_SP_opr, in_bytes(offset_from_sp), type));
+}
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = x->compute_needs_range_check();
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant(20);
+
+  if (use_length && needs_range_check) {
+    length.set_instruction(x->length());
+    length.load_item();
+  }
+  if (needs_store_check) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // The CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different.
+  CodeEmitInfo* range_check_info = state_for (x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // Emit array address setup early so it schedules better.
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+  if (value.result()->is_constant() && array_addr->index()->is_valid()) {
+    // Constants cannot be stored with index register on ZARCH_64 (see LIR_Assembler::const2mem()).
+    LIR_Opr tmp = new_pointer_register();
+    __ leal(LIR_OprFact::address(array_addr), tmp);
+    array_addr = new LIR_Address(tmp, x->elt_type());
+  }
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // Range_check also does the null check.
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(value.result(), array_addr, null_check_info);
+    // Seems to be a precise.
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  } else {
+    __ move(value.result(), array_addr, null_check_info);
+  }
+}
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop.
+  LIR_Opr lock = new_register(T_INT);
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for (x);
+  }
+  // This CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked).
+  CodeEmitInfo* info = state_for (x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
+                x->monitor_no(), info_for_exception, info);
+}
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem value(x->x(), this);
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  __ negate(value.result(), reg);
+}
+
+// for _fadd, _fmul, _fsub, _fdiv, _frem
+//     _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg  = &left;
+  LIRItem* right_arg = &right;
+  assert(!left.is_stack(), "can't both be memory operands");
+  left.load_item();
+
+  if (right.is_register() || right.is_constant()) {
+    right.load_item();
+  } else {
+    right.dont_load_item();
+  }
+
+  if ((x->op() == Bytecodes::_frem) || (x->op() == Bytecodes::_drem)) {
+    address entry;
+    switch (x->op()) {
+    case Bytecodes::_frem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+      break;
+    case Bytecodes::_drem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL);
+    set_result(x, result);
+  } else {
+    LIR_Opr reg = rlock(x);
+    LIR_Opr tmp = LIR_OprFact::illegalOpr;
+    arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp(), tmp);
+    set_result(x, reg);
+  }
+}
+
+// for _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+    // Use shifts if divisior is a power of 2 otherwise use DSGR instruction.
+    // Instruction: DSGR R1, R2
+    // input : R1+1: dividend   (R1, R1+1 designate a register pair, R1 must be even)
+    //         R2:   divisor
+    //
+    // output: R1+1: quotient
+    //         R1:   remainder
+    //
+    // Register selection: R1:   Z_R10
+    //                     R1+1: Z_R11
+    //                     R2:   to be chosen by register allocator (linear scan)
+
+    // R1, and R1+1 will be destroyed.
+
+    LIRItem right(x->y(), this);
+    LIRItem left(x->x() , this);   // Visit left second, so that the is_register test is valid.
+
+    // Call state_for before load_item_force because state_for may
+    // force the evaluation of other instructions that are needed for
+    // correct debug info. Otherwise the live range of the fix
+    // register might be too long.
+    CodeEmitInfo* info = state_for (x);
+
+    LIR_Opr result = rlock_result(x);
+    LIR_Opr result_reg = result;
+    LIR_Opr tmp = LIR_OprFact::illegalOpr;
+    LIR_Opr divisor_opr = right.result();
+    if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jlong())) {
+      left.load_item();
+      right.dont_load_item();
+    } else {
+      left.load_item_force(ldivInOpr());
+      right.load_item();
+
+      // DSGR instruction needs register pair.
+      if (x->op() == Bytecodes::_ldiv) {
+        result_reg = ldivOutOpr();
+        tmp        = lremOutOpr();
+      } else {
+        result_reg = lremOutOpr();
+        tmp        = ldivOutOpr();
+      }
+    }
+
+    if (!ImplicitDiv0Checks) {
+      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+      // Idiv/irem cannot trap (passing info would generate an assertion).
+      info = NULL;
+    }
+
+    if (x->op() == Bytecodes::_lrem) {
+      __ irem(left.result(), right.result(), result_reg, tmp, info);
+    } else if (x->op() == Bytecodes::_ldiv) {
+      __ idiv(left.result(), right.result(), result_reg, tmp, info);
+    } else {
+      ShouldNotReachHere();
+    }
+
+    if (result_reg != result) {
+      __ move(result_reg, result);
+    }
+  } else {
+    LIRItem left(x->x(), this);
+    LIRItem right(x->y(), this);
+
+    left.load_item();
+    right.load_nonconstant(32);
+    rlock_result(x);
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+    // Use shifts if divisior is a power of 2 otherwise use DSGFR instruction.
+    // Instruction: DSGFR R1, R2
+    // input : R1+1: dividend   (R1, R1+1 designate a register pair, R1 must be even)
+    //         R2:   divisor
+    //
+    // output: R1+1: quotient
+    //         R1:   remainder
+    //
+    // Register selection: R1:   Z_R10
+    //                     R1+1: Z_R11
+    //                     R2:   To be chosen by register allocator (linear scan).
+
+    // R1, and R1+1 will be destroyed.
+
+    LIRItem right(x->y(), this);
+    LIRItem left(x->x() , this);   // Visit left second, so that the is_register test is valid.
+
+    // Call state_for before load_item_force because state_for may
+    // force the evaluation of other instructions that are needed for
+    // correct debug info. Otherwise the live range of the fix
+    // register might be too long.
+    CodeEmitInfo* info = state_for (x);
+
+    LIR_Opr result = rlock_result(x);
+    LIR_Opr result_reg = result;
+    LIR_Opr tmp = LIR_OprFact::illegalOpr;
+    LIR_Opr divisor_opr = right.result();
+    if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jint())) {
+      left.load_item();
+      right.dont_load_item();
+    } else {
+      left.load_item_force(divInOpr());
+      right.load_item();
+
+      // DSGFR instruction needs register pair.
+      if (x->op() == Bytecodes::_idiv) {
+        result_reg = divOutOpr();
+        tmp        = remOutOpr();
+      } else {
+        result_reg = remOutOpr();
+        tmp        = divOutOpr();
+      }
+    }
+
+    if (!ImplicitDiv0Checks) {
+      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::intConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+      // Idiv/irem cannot trap (passing info would generate an assertion).
+      info = NULL;
+    }
+
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left.result(), right.result(), result_reg, tmp, info);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left.result(), right.result(), result_reg, tmp, info);
+    } else {
+      ShouldNotReachHere();
+    }
+
+    if (result_reg != result) {
+      __ move(result_reg, result);
+    }
+  } else {
+    LIRItem left(x->x(),  this);
+    LIRItem right(x->y(), this);
+    LIRItem* left_arg = &left;
+    LIRItem* right_arg = &right;
+    if (x->is_commutative() && left.is_stack() && right.is_register()) {
+      // swap them if left is real stack (or cached) and right is real register(not cached)
+      left_arg = &right;
+      right_arg = &left;
+    }
+
+    left_arg->load_item();
+
+    // Do not need to load right, as we can handle stack and constants.
+    if (x->op() == Bytecodes::_imul) {
+      bool use_tmp = false;
+      if (right_arg->is_constant()) {
+        int iconst = right_arg->get_jint_constant();
+        if (is_power_of_2(iconst - 1) || is_power_of_2(iconst + 1)) {
+          use_tmp = true;
+        }
+      }
+      right_arg->dont_load_item();
+      LIR_Opr tmp = LIR_OprFact::illegalOpr;
+      if (use_tmp) {
+        tmp = new_register(T_INT);
+      }
+      rlock_result(x);
+
+      arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp);
+    } else {
+      right_arg->dont_load_item();
+      rlock_result(x);
+      LIR_Opr tmp = LIR_OprFact::illegalOpr;
+      arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp);
+    }
+  }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // If an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary.
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag: do_ArithmeticOp_FPU(x);  return;
+    case longTag:   do_ArithmeticOp_Long(x); return;
+    case intTag:    do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  // count must always be in rcx
+  LIRItem value(x->x(), this);
+  LIRItem count(x->y(), this);
+
+  ValueTag elemType = x->type()->tag();
+  bool must_load_count = !count.is_constant();
+  if (must_load_count) {
+    count.load_item();
+  } else {
+    count.dont_load_item();
+  }
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  shift_op(x->op(), reg, value.result(), count.result(), LIR_OprFact::illegalOpr);
+}
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+  // IF an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary.
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+  right.load_nonconstant(32);
+  LIR_Opr reg = rlock_result(x);
+
+  logic_op(x->op(), reg, left.result(), right.result());
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // Value to compare with field.
+  LIRItem val   (x->argument_at(3), this);  // Replace field with val if matches cmp.
+
+  // Get address of field.
+  obj.load_item();
+  offset.load_nonconstant(20);
+  cmp.load_item();
+  val.load_item();
+
+  LIR_Opr addr = new_pointer_register();
+  LIR_Address* a;
+  if (offset.result()->is_constant()) {
+    assert(Immediate::is_simm20(offset.result()->as_jlong()), "should have been loaded into register");
+    a = new LIR_Address(obj.result(),
+                        offset.result()->as_jlong(),
+                        as_BasicType(type));
+  } else {
+    a = new LIR_Address(obj.result(),
+                        offset.result(),
+                        0,
+                        as_BasicType(type));
+  }
+  __ leal(LIR_OprFact::address(a), addr);
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+
+  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+  if (type == objectType) {
+    __ cas_obj(addr, cmp.result(), val.result(), new_register(T_OBJECT), new_register(T_OBJECT));
+  } else if (type == intType) {
+    __ cas_int(addr, cmp.result(), val.result(), ill, ill);
+  } else if (type == longType) {
+    __ cas_long(addr, cmp.result(), val.result(), ill, ill);
+  } else {
+    ShouldNotReachHere();
+  }
+  // Generate conditional move of boolean result.
+  LIR_Opr result = rlock_result(x);
+  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0),
+           result, as_BasicType(type));
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Precise card mark since could either be object or array
+    post_barrier(addr, val.result());
+  }
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  switch (x->id()) {
+    case vmIntrinsics::_dabs:
+    case vmIntrinsics::_dsqrt: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+
+      switch (x->id()) {
+      case vmIntrinsics::_dsqrt: {
+        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      case vmIntrinsics::_dabs: {
+        __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      }
+      break;
+    }
+    case vmIntrinsics::_dlog10: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+
+      address runtime_entry = NULL;
+      switch (x->id()) {
+      case vmIntrinsics::_dsin:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+        break;
+      case vmIntrinsics::_dcos:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+        break;
+      case vmIntrinsics::_dtan:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+        break;
+      case vmIntrinsics::_dlog:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+        break;
+      case vmIntrinsics::_dlog10:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+        break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+
+      LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+  }
+}
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Copy stubs possibly call C code, e.g. G1 barriers, so we need to reserve room
+  // for the C ABI (see frame::z_abi_160).
+  BasicTypeArray sig; // Empty signature is precise enough.
+  frame_map()->c_calling_convention(&sig);
+
+  // Make all state_for calls early since they can emit code.
+  CodeEmitInfo* info = state_for (x, x->state());
+
+  LIRItem src(x->argument_at(0), this);
+  LIRItem src_pos(x->argument_at(1), this);
+  LIRItem dst(x->argument_at(2), this);
+  LIRItem dst_pos(x->argument_at(3), this);
+  LIRItem length(x->argument_at(4), this);
+
+  // Operands for arraycopy must use fixed registers, otherwise
+  // LinearScan will fail allocation (because arraycopy always needs a
+  // call).
+
+  src.load_item_force     (FrameMap::as_oop_opr(Z_ARG1));
+  src_pos.load_item_force (FrameMap::as_opr(Z_ARG2));
+  dst.load_item_force     (FrameMap::as_oop_opr(Z_ARG3));
+  dst_pos.load_item_force (FrameMap::as_opr(Z_ARG4));
+  length.load_item_force  (FrameMap::as_opr(Z_ARG5));
+
+  LIR_Opr tmp =            FrameMap::as_opr(Z_R7);
+
+  set_no_result(x);
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+               length.result(), tmp, expected_type, flags, info); // does add_safepoint
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  LIRItem value(x->value(), this);
+
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  __ convert(x->op(), value.result(), reg);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+  print_if_not_loaded(x);
+
+  // This instruction can be deoptimized in the slow path : use
+  // Z_R2 as result register.
+  const LIR_Opr reg = result_register_for (x->type());
+
+  CodeEmitInfo* info = state_for (x, x->state());
+  LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+  LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+  LIR_Opr tmp3 = reg;
+  LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+  LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+  new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  CodeEmitInfo* info = state_for (x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  LIR_Opr reg = result_register_for (x->type());
+  LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+  LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+  LIR_Opr tmp3 = reg;
+  LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+  LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* info = state_for (x, x->state());
+  // In case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+  // and therefore provide the state before the parameters have been consumed.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for (x, x->state_before());
+  }
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  const LIR_Opr reg = result_register_for (x->type());
+  LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+  LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+  LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciKlass* obj = ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(i, i, NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for (x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for (x, x->state());
+
+  i = dims->length();
+  while (--i >= 0) {
+    LIRItem* size = items->at(i);
+    size->load_nonconstant(32);
+    // FrameMap::_reserved_argument_area_size includes the dimensions varargs, because
+    // it's initialized to hir()->max_stack() when the FrameMap is created.
+    store_stack_parameter(size->result(), in_ByteSize(i*sizeof(jint) + FrameMap::first_available_sp_in_frame));
+  }
+
+  LIR_Opr klass_reg = FrameMap::Z_R3_metadata_opr;
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::Z_R4_opr;
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+  LIR_Opr varargs = FrameMap::Z_R5_opr;
+  __ leal(LIR_OprFact::address(new LIR_Address(FrameMap::Z_SP_opr, FrameMap::first_available_sp_in_frame, T_INT)),
+          varargs);
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  LIR_Opr reg = result_register_for (x->type());
+  __ call_runtime(Runtime1::entry_for (Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // Nothing to do.
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+    // Must do this before locking the destination register as an oop register,
+    // and before the obj is loaded (the latter is for deoptimization).
+    patching_info = state_for (x, x->state_before());
+  }
+  obj.load_item();
+
+  // info for exceptions
+  CodeEmitInfo* info_for_exception = state_for (x);
+
+  CodeStub* stub;
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr tmp1 = new_register(objectType);
+  LIR_Opr tmp2 = new_register(objectType);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  __ checkcast(reg, obj.result(), x->klass(),
+               tmp1, tmp2, tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for (x, x->state_before());
+  }
+  // Ensure the result register is not the input register because the
+  // result is initialized before the patching safepoint.
+  obj.load_item();
+  LIR_Opr out_reg = rlock_result(x);
+  LIR_Opr tmp1 = new_register(objectType);
+  LIR_Opr tmp2 = new_register(objectType);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+                x->direct_compare(), patching_info,
+                x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_If (If* x) {
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  bool is_safepoint = x->is_safepoint();
+
+  If::Condition cond = x->cond();
+
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+
+  if (tag == longTag) {
+    // For longs, only conditions "eql", "neq", "lss", "geq" are valid;
+    // mirror for other conditions.
+    if (cond == If::gtr || cond == If::leq) {
+      cond = Instruction::mirror(cond);
+      xin = &yitem;
+      yin = &xitem;
+    }
+    xin->set_destroys_register();
+  }
+  xin->load_item();
+  // TODO: don't load long constants != 0L
+  if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) {
+    // inline long zero
+    yin->dont_load_item();
+  } else if (tag == longTag || tag == floatTag || tag == doubleTag) {
+    // Longs cannot handle constants at right side.
+    yin->load_item();
+  } else {
+    yin->dont_load_item();
+  }
+
+  // Add safepoint before generating condition code so it can be recomputed.
+  if (x->is_safepoint()) {
+    // Increment backedge counter if needed.
+    increment_backedge_counter(state_for (x, x->state_before()), x->profiled_bci());
+    // Use safepoint_poll_register() instead of LIR_OprFact::illegalOpr.
+    __ safepoint(safepoint_poll_register(), state_for (x, x->state_before()));
+  }
+  set_no_result(x);
+
+  LIR_Opr left = xin->result();
+  LIR_Opr right = yin->result();
+  __ cmp(lir_cond(cond), left, right);
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  profile_branch(x, cond);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+  } else {
+    __ branch(lir_cond(cond), right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+  return FrameMap::as_pointer_opr(Z_thread);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) {
+  __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::Z_R2_opr);
+  LIR_OprList* args = new LIR_OprList(1);
+  args->append(FrameMap::Z_R2_opr);
+  address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry);
+  __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args);
+}
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+  __ store(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  __ load(address, result, info);
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  if (is_obj) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(data, addr);
+    assert(src->is_register(), "must be register");
+    // Seems to be a precise address.
+    post_barrier(LIR_OprFact::address(addr), data);
+  } else {
+    __ move(data, addr);
+  }
+}
+
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  __ load(addr, dst);
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  assert (x->is_add() && type != T_ARRAY && type != T_OBJECT, "not supported");
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  value.load_item();
+  off.load_nonconstant(20);
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  LIR_Opr offset = off.result();
+
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+    assert(Immediate::is_simm20(offset->as_jlong()), "should have been loaded into register");
+    addr = new LIR_Address(src.result(), offset->as_jlong(), type);
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  __ xadd(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr);
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "or should not be here");
+  LIR_Opr result = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // Registers destroyed by update_crc32.
+      crc.set_destroys_register();
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if (off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for (x->type());
+
+      LIR_Opr arg1 = cc->at(0);
+      LIR_Opr arg2 = cc->at(1);
+      LIR_Opr arg3 = cc->at(2);
+
+      // CCallingConventionRequiresIntsAsLongs
+      crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
+      __ leal(LIR_OprFact::address(a), arg2);
+      load_int_as_long(gen()->lir(), len, arg3);
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
+      __ move(result_reg, result);
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
+
+void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
+  fatal("FMA intrinsic is not implemented on this platform");
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+  return FrameMap::nr2floatreg(fpu_regnr());
+}
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+  return FrameMap::nr2floatreg(fpu_regnrHi());
+}
+
+// Reg2 unused.
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+  assert(!as_FloatRegister(reg2)->is_valid(), "Not used on this platform");
+  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
+                             (reg1 << LIR_OprDesc::reg2_shift) |
+                             LIR_OprDesc::double_type          |
+                             LIR_OprDesc::fpu_register         |
+                             LIR_OprDesc::double_size);
+}
+
+#ifndef PRODUCT
+void LIR_Address::verify() const {
+  assert(base()->is_cpu_register(), "wrong base operand");
+  assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
+  assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
+         "wrong type for addresses");
+}
+#endif // PRODUCT
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/debug.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack on ZARCH_64.
+  ShouldNotCallThis();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_LINEARSCAN_S390_HPP
+#define CPU_S390_VM_C1_LINEARSCAN_S390_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14
+  assert(FrameMap::Z_R14_opr->cpu_regnr() == 10, "wrong assumption below");
+  assert(FrameMap::Z_R0_opr->cpu_regnr()  == 11, "wrong assumption below");
+  assert(FrameMap::Z_R1_opr->cpu_regnr()  == 12, "wrong assumption below");
+  assert(FrameMap::Z_R8_opr->cpu_regnr()  == 13, "wrong assumption below");
+  assert(FrameMap::Z_R9_opr->cpu_regnr()  == 14, "wrong assumption below");
+  assert(FrameMap::Z_R15_opr->cpu_regnr() == 15, "wrong assumption below");
+  assert(reg_num >= 0, "invalid reg_num");
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  // IBM Z requires one cpu registers for long,
+  // and one fpu register for double.
+  return 1;
+}
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+  return true; // No callee-saved registers on IBM Z.
+}
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // No special case behaviours.
+}
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  return false; // No special case behaviours.
+}
+
+#endif // CPU_S390_VM_C1_LINEARSCAN_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+  Label ic_miss, ic_hit;
+  verify_oop(receiver);
+  int klass_offset = oopDesc::klass_offset_in_bytes();
+
+  if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+    if (VM_Version::has_CompareBranch()) {
+      z_cgij(receiver, 0, Assembler::bcondEqual, ic_miss);
+    } else {
+      z_ltgr(receiver, receiver);
+      z_bre(ic_miss);
+    }
+  }
+
+  compare_klass_ptr(iCache, klass_offset, receiver, false);
+  z_bre(ic_hit);
+
+  // If icache check fails, then jump to runtime routine.
+  // Note: RECEIVER must still contain the receiver!
+  load_const_optimized(Z_R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub()));
+  z_br(Z_R1_scratch);
+  align(CodeEntryAlignment);
+  bind(ic_hit);
+}
+
+void C1_MacroAssembler::explicit_null_check(Register base) {
+  ShouldNotCallThis(); // unused
+}
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
+  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+  generate_stack_overflow_check(bang_size_in_bytes);
+  save_return_pc();
+  push_frame(frame_size_in_bytes); // TODO: Must we add z_abi_160?
+}
+
+void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) {
+  ShouldNotCallThis(); // unused
+}
+
+void C1_MacroAssembler::verified_entry() {
+  if (C1Breakpoint) z_illtrap(0xC1);
+}
+
+void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert_different_registers(hdr, obj, disp_hdr);
+  NearLabel done;
+
+  verify_oop(obj);
+
+  // Load object header.
+  z_lg(hdr, Address(obj, hdr_offset));
+
+  // Save object being locked into the BasicObjectLock...
+  z_stg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+  if (UseBiasedLocking) {
+    biased_locking_enter(obj, hdr, Z_R1_scratch, Z_R0_scratch, done, &slow_case);
+  }
+
+  // and mark it as unlocked.
+  z_oill(hdr, markOopDesc::unlocked_value);
+  // Save unlocked object header into the displaced header location on the stack.
+  z_stg(hdr, Address(disp_hdr, (intptr_t)0));
+  // Test if object header is still the same (i.e. unlocked), and if so, store the
+  // displaced header address in the object header. If it is not the same, get the
+  // object header instead.
+  z_csg(hdr, disp_hdr, hdr_offset, obj);
+  // If the object header was the same, we're done.
+  if (PrintBiasedLockingStatistics) {
+    Unimplemented();
+#if 0
+    cond_inc32(Assembler::equal,
+               ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+#endif
+  }
+  branch_optimized(Assembler::bcondEqual, done);
+  // If the object header was not the same, it is now in the hdr register.
+  // => Test if it is a stack pointer into the same stack (recursive locking), i.e.:
+  //
+  // 1) (hdr & markOopDesc::lock_mask_in_place) == 0
+  // 2) rsp <= hdr
+  // 3) hdr <= rsp + page_size
+  //
+  // These 3 tests can be done by evaluating the following expression:
+  //
+  // (hdr - Z_SP) & (~(page_size-1) | markOopDesc::lock_mask_in_place)
+  //
+  // assuming both the stack pointer and page_size have their least
+  // significant 2 bits cleared and page_size is a power of 2
+  z_sgr(hdr, Z_SP);
+
+  load_const_optimized(Z_R0_scratch, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+  z_ngr(hdr, Z_R0_scratch); // AND sets CC (result eq/ne 0).
+  // For recursive locking, the result is zero. => Save it in the displaced header
+  // location (NULL in the displaced hdr location indicates recursive locking).
+  z_stg(hdr, Address(disp_hdr, (intptr_t)0));
+  // Otherwise we don't care about the result and handle locking via runtime call.
+  branch_optimized(Assembler::bcondNotZero, slow_case);
+  // done
+  bind(done);
+}
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert_different_registers(hdr, obj, disp_hdr);
+  NearLabel done;
+
+  if (UseBiasedLocking) {
+    // Load object.
+    z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    biased_locking_exit(obj, hdr, done);
+  }
+
+  // Load displaced header.
+  z_ltg(hdr, Address(disp_hdr, (intptr_t)0));
+  // If the loaded hdr is NULL we had recursive locking, and we are done.
+  z_bre(done);
+  if (!UseBiasedLocking) {
+    // Load object.
+    z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+  }
+  verify_oop(obj);
+  // Test if object header is pointing to the displaced header, and if so, restore
+  // the displaced header in the object. If the object header is not pointing to
+  // the displaced header, get the object header instead.
+  z_csg(disp_hdr, hdr, hdr_offset, obj);
+  // If the object header was not pointing to the displaced header,
+  // we do unlocking via runtime call.
+  branch_optimized(Assembler::bcondNotEqual, slow_case);
+  // done
+  bind(done);
+}
+
+void C1_MacroAssembler::try_allocate(
+  Register obj,                        // result: Pointer to object after successful allocation.
+  Register var_size_in_bytes,          // Object size in bytes if unknown at compile time; invalid otherwise.
+  int      con_size_in_bytes,          // Object size in bytes if   known at compile time.
+  Register t1,                         // Temp register: Must be global register for incr_allocated_bytes.
+  Label&   slow_case                   // Continuation point if fast allocation fails.
+) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+  } else {
+    // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it.
+    z_brul(slow_case);
+  }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1) {
+  assert_different_registers(obj, klass, len, t1, Rzero);
+  if (UseBiasedLocking && !len->is_valid()) {
+    assert_different_registers(obj, klass, len, t1);
+    z_lg(t1, Address(klass, Klass::prototype_header_offset()));
+  } else {
+    // This assumes that all prototype bits fit in an int32_t.
+    load_const_optimized(t1, (intx)markOopDesc::prototype());
+  }
+  z_stg(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (len->is_valid()) {
+    // Length will be in the klass gap, if one exists.
+    z_st(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+  } else if (UseCompressedClassPointers) {
+    store_klass_gap(Rzero, obj);  // Zero klass gap for compressed oops.
+  }
+  store_klass(klass, obj, t1);
+}
+
+void C1_MacroAssembler::initialize_body(Register objectFields, Register len_in_bytes, Register Rzero) {
+  Label done;
+  assert_different_registers(objectFields, len_in_bytes, Rzero);
+
+  // Initialize object fields.
+  // See documentation for MVCLE instruction!!!
+  assert(objectFields->encoding()%2==0, "objectFields must be an even register");
+  assert(len_in_bytes->encoding() == (objectFields->encoding()+1), "objectFields and len_in_bytes must be a register pair");
+  assert(Rzero->encoding()%2==1, "Rzero must be an odd register");
+
+  // Use Rzero as src length, then mvcle will copy nothing
+  // and fill the object with the padding value 0.
+  move_long_ext(objectFields, as_Register(Rzero->encoding()-1), 0);
+  bind(done);
+}
+
+void C1_MacroAssembler::allocate_object(
+  Register obj,                        // Result: pointer to object after successful allocation.
+  Register t1,                         // temp register
+  Register t2,                         // temp register: Must be a global register for try_allocate.
+  int      hdr_size,                   // object header size in words
+  int      obj_size,                   // object size in words
+  Register klass,                      // object klass
+  Label&   slow_case                   // Continuation point if fast allocation fails.
+) {
+  assert_different_registers(obj, t1, t2, klass);
+
+  // Allocate space and initialize header.
+  try_allocate(obj, noreg, obj_size * wordSize, t1, slow_case);
+
+  initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(
+  Register obj,                        // result: Pointer to object after successful allocation.
+  Register klass,                      // object klass
+  Register var_size_in_bytes,          // Object size in bytes if unknown at compile time; invalid otherwise.
+  int      con_size_in_bytes,          // Object size in bytes if   known at compile time.
+  Register t1,                         // temp register
+  Register t2                          // temp register
+ ) {
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+         "con_size_in_bytes is not multiple of alignment");
+  assert(var_size_in_bytes == noreg, "not implemented");
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  const Register Rzero = t2;
+
+  z_xgr(Rzero, Rzero);
+  initialize_header(obj, klass, noreg, Rzero, t1);
+
+  // Clear rest of allocated space.
+  const int threshold = 4 * BytesPerWord;
+  if (con_size_in_bytes <= threshold) {
+    // Use explicit null stores.
+    // code size = 6*n bytes (n = number of fields to clear)
+    for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
+      z_stg(Rzero, Address(obj, i));
+  } else {
+    // Code size generated by initialize_body() is 16.
+    Register object_fields = Z_R0_scratch;
+    Register len_in_bytes  = Z_R1_scratch;
+    z_la(object_fields, hdr_size_in_bytes, obj);
+    load_const_optimized(len_in_bytes, con_size_in_bytes - hdr_size_in_bytes);
+    initialize_body(object_fields, len_in_bytes, Rzero);
+  }
+
+  // Dtrace support is unimplemented.
+  //  if (CURRENT_ENV->dtrace_alloc_probes()) {
+  //    assert(obj == rax, "must be");
+  //    call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+  //  }
+
+  verify_oop(obj);
+}
+
+void C1_MacroAssembler::allocate_array(
+  Register obj,                        // result: Pointer to array after successful allocation.
+  Register len,                        // array length
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  int      hdr_size,                   // object header size in words
+  int      elt_size,                   // element size in bytes
+  Register klass,                      // object klass
+  Label&   slow_case                   // Continuation point if fast allocation fails.
+) {
+  assert_different_registers(obj, len, t1, t2, klass);
+
+  // Determine alignment mask.
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+
+  // Check for negative or excessive length.
+  compareU64_and_branch(len, (int32_t)max_array_allocation_length, bcondHigh, slow_case);
+
+  // Compute array size.
+  // Note: If 0 <= len <= max_length, len*elt_size + header + alignment is
+  // smaller or equal to the largest integer. Also, since top is always
+  // aligned, we can do the alignment here instead of at the end address
+  // computation.
+  const Register arr_size = t2;
+  switch (elt_size) {
+    case  1: lgr_if_needed(arr_size, len); break;
+    case  2: z_sllg(arr_size, len, 1); break;
+    case  4: z_sllg(arr_size, len, 2); break;
+    case  8: z_sllg(arr_size, len, 3); break;
+    default: ShouldNotReachHere();
+  }
+  add2reg(arr_size, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
+  z_nill(arr_size, (~MinObjAlignmentInBytesMask) & 0xffff);            // Align array size.
+
+  try_allocate(obj, arr_size, 0, t1, slow_case);
+
+  initialize_header(obj, klass, len, noreg, t1);
+
+  // Clear rest of allocated space.
+  Label done;
+  Register object_fields = t1;
+  Register Rzero = Z_R1_scratch;
+  z_aghi(arr_size, -(hdr_size * BytesPerWord));
+  z_bre(done); // Jump if size of fields is zero.
+  z_la(object_fields, hdr_size * BytesPerWord, obj);
+  z_xgr(Rzero, Rzero);
+  initialize_body(object_fields, arr_size, Rzero);
+  bind(done);
+
+  // Dtrace support is unimplemented.
+  // if (CURRENT_ENV->dtrace_alloc_probes()) {
+  //   assert(obj == rax, "must be");
+  //   call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+  // }
+
+  verify_oop(obj);
+}
+
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  Unimplemented();
+  // if (!VerifyOops) return;
+  // verify_oop_addr(Address(SP, stack_offset + STACK_BIAS));
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  if (!VerifyOops) return;
+  NearLabel not_null;
+  compareU64_and_branch(r, (intptr_t)0, bcondNotEqual, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(Register preserve1,
+                                             Register preserve2,
+                                             Register preserve3) {
+  Register dead_value = noreg;
+  for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (r != preserve1 && r != preserve2 && r != preserve3 && r != Z_SP && r != Z_thread) {
+      if (dead_value == noreg) {
+        load_const_optimized(r, 0xc1dead);
+        dead_value = r;
+      } else {
+        z_lgr(r, dead_value);
+      }
+    }
+  }
+}
+
+#endif // !PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
+#define CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
+
+  void pd_init() { /* nothing to do */ }
+
+ public:
+  void try_allocate(
+    Register obj,                      // result: Pointer to object after successful allocation.
+    Register var_size_in_bytes,        // Object size in bytes if unknown at compile time; invalid otherwise.
+    int      con_size_in_bytes,        // Object size in bytes if   known at compile time.
+    Register t1,                       // temp register
+    Label&   slow_case                 // Continuation point if fast allocation fails.
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1);
+  void initialize_body(Register objectFields, Register len_in_bytes, Register Rzero);
+
+  // locking
+  // hdr     : Used to hold locked markOop to be CASed into obj, contents destroyed.
+  // obj     : Must point to the object to lock, contents preserved.
+  // disp_hdr: Must point to the displaced header location, contents preserved.
+  // Returns code offset at which to add null check debug information.
+  void lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case);
+
+  // unlocking
+  // hdr     : Used to hold original markOop to be CASed back into obj, contents destroyed.
+  // obj     : Must point to the object to lock, contents preserved.
+  // disp_hdr: Must point to the displaced header location, contents destroyed.
+  void unlock_object(Register hdr, Register obj, Register lock, Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: Pointer to object after successful allocation.
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // Object size in bytes if unknown at compile time; invalid otherwise.
+    int      con_size_in_bytes,        // Object size in bytes if   known at compile time.
+    Register t1,                       // temp register
+    Register t2                        // temp register
+  );
+
+  // Allocation of fixed-size objects.
+  // This can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards.
+  void allocate_object(
+    Register obj,                      // result: Pointer to object after successful allocation.
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      obj_size,                 // object size in words
+    Register klass,                    // object klass
+    Label&   slow_case                 // Continuation point if fast allocation fails.
+  );
+
+  enum {
+    max_array_allocation_length = 0x01000000 // Sparc friendly value, requires sethi only.
+  };
+
+  // Allocation of arrays.
+  void allocate_array(
+    Register obj,                      // result: Pointer to array after successful allocation.
+    Register len,                      // array length
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      elt_size,                 // element size in bytes
+    Register klass,                    // object klass
+    Label&   slow_case                 // Continuation point if fast allocation fails.
+  );
+
+  // Invalidates registers in this window.
+  void invalidate_registers(Register preserve1 = noreg, Register preserve2 = noreg,
+                            Register preserve3 = noreg) PRODUCT_RETURN;
+
+  void nop() { z_nop(); }
+
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
+#endif // CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_s390.inline.hpp"
+#include "registerSaver_s390.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry_point, int number_of_arguments) {
+  set_num_rt_args(0); // Nothing on stack.
+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different");
+
+  // We cannot trust that code generated by the C++ compiler saves R14
+  // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
+  // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
+  // Therefore we load the PC into Z_R1_scratch and let set_last_Java_frame() save
+  // it into the frame anchor.
+  address pc = get_PC(Z_R1_scratch);
+  int call_offset = (int)(pc - addr_at(0));
+  set_last_Java_frame(Z_SP, Z_R1_scratch);
+
+  // ARG1 must hold thread address.
+  z_lgr(Z_ARG1, Z_thread);
+
+  address return_pc = NULL;
+  align_call_far_patchable(this->pc());
+  return_pc = call_c_opt(entry_point);
+  assert(return_pc != NULL, "const section overflow");
+
+  reset_last_Java_frame();
+
+  // Check for pending exceptions.
+  {
+    load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
+
+    // This used to conditionally jump to forward_exception however it is
+    // possible if we relocate that the branch will not reach. So we must jump
+    // around so we can always reach.
+
+    Label ok;
+    z_bre(ok); // Bcondequal is the same as bcondZero.
+
+    // exception pending => forward to exception handler
+
+    // Make sure that the vm_results are cleared.
+    if (oop_result1->is_valid()) {
+      clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
+    }
+    if (metadata_result->is_valid()) {
+      clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(jlong));
+    }
+    if (frame_size() == no_frame_size) {
+      // Pop the stub frame.
+      pop_frame();
+      restore_return_pc();
+      load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
+      z_br(Z_R1);
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      load_const_optimized(Z_R1, Runtime1::entry_for (Runtime1::forward_exception_id));
+      z_br(Z_R1);
+    }
+
+    bind(ok);
+  }
+
+  // Get oop results if there are any and reset the values in the thread.
+  if (oop_result1->is_valid()) {
+    get_vm_result(oop_result1);
+  }
+  if (metadata_result->is_valid()) {
+    get_vm_result_2(metadata_result);
+  }
+
+  return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg1);
+  return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg1);
+  assert(arg2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg2);
+  return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg1);
+  assert(arg2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg2);
+  assert(arg3 != Z_ARG3, "smashed argument");
+  lgr_if_needed(Z_ARG4, arg3);
+  return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+#ifndef PRODUCT
+#undef  __
+#define __ (Verbose ? (sasm->block_comment(FILE_AND_LINE),sasm):sasm)->
+#endif // !PRODUCT
+
+#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
+
+static OopMap* generate_oop_map(StubAssembler* sasm) {
+  RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers;
+  int frame_size_in_slots =
+    RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+  return RegisterSaver::generate_oop_map(sasm, reg_set);
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true, Register return_pc = Z_R14) {
+  __ block_comment("save_live_registers");
+  RegisterSaver::RegisterSet reg_set =
+    save_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers;
+  int frame_size_in_slots =
+    RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+  return RegisterSaver::save_live_registers(sasm, reg_set, return_pc);
+}
+
+static OopMap* save_live_registers_except_r2(StubAssembler* sasm, bool save_fpu_registers = true) {
+  if (!save_fpu_registers) {
+    __ unimplemented(FILE_AND_LINE);
+  }
+  __ block_comment("save_live_registers");
+  RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers_except_r2;
+  int frame_size_in_slots =
+      RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+  return RegisterSaver::save_live_registers(sasm, reg_set);
+}
+
+static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) {
+  __ block_comment("save_volatile_registers");
+  RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers;
+  int frame_size_in_slots =
+    RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+  return RegisterSaver::save_live_registers(sasm, reg_set, return_pc);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  __ block_comment("restore_live_registers");
+  RegisterSaver::RegisterSet reg_set =
+    restore_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers;
+  RegisterSaver::restore_live_registers(sasm, reg_set);
+}
+
+static void restore_live_registers_except_r2(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (!restore_fpu_registers) {
+    __ unimplemented(FILE_AND_LINE);
+  }
+  __ block_comment("restore_live_registers_except_r2");
+  RegisterSaver::restore_live_registers(sasm, RegisterSaver::all_registers_except_r2);
+}
+
+static void restore_volatile_registers(StubAssembler* sasm) {
+  __ block_comment("restore_volatile_registers");
+  RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers;
+  RegisterSaver::restore_live_registers(sasm, reg_set);
+}
+
+void Runtime1::initialize_pd() {
+  // Nothing to do.
+}
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+  // Make a frame and preserve the caller's caller-save registers.
+  OopMap* oop_map = save_live_registers(sasm);
+  int call_offset;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    call_offset = __ call_RT(noreg, noreg, target, Z_R1_scratch, Z_R0_scratch);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+  // Incoming parameters: Z_EXC_OOP and Z_EXC_PC.
+  // Keep copies in callee-saved registers during runtime call.
+  const Register exception_oop_callee_saved = Z_R11;
+  const Register exception_pc_callee_saved = Z_R12;
+  // Other registers used in this stub.
+  const Register handler_addr = Z_R4;
+
+  // Verify that only exception_oop, is valid at this time.
+  __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC);
+
+  // Check that fields in JavaThread for exception oop and issuing pc are set.
+  __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0);
+  __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0);
+
+  // Save exception_oop and pc in callee-saved register to preserve it
+  // during runtime calls.
+  __ verify_not_null_oop(Z_EXC_OOP);
+  __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP);
+  __ lgr_if_needed(exception_pc_callee_saved, Z_EXC_PC);
+
+  __ push_frame_abi160(0); // Runtime code needs the z_abi_160.
+
+  // Search the exception handler address of the caller (using the return address).
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Z_thread, Z_EXC_PC);
+  // Z_RET(Z_R2): exception handler address of the caller.
+
+  __ pop_frame();
+
+  __ invalidate_registers(exception_oop_callee_saved, exception_pc_callee_saved, Z_RET);
+
+  // Move result of call into correct register.
+  __ lgr_if_needed(handler_addr, Z_RET);
+
+  // Restore exception oop and pc to Z_EXC_OOP and Z_EXC_PC (required convention of exception handler).
+  __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved);
+  __ lgr_if_needed(Z_EXC_PC, exception_pc_callee_saved);
+
+  // Verify that there is really a valid exception in Z_EXC_OOP.
+  __ verify_not_null_oop(Z_EXC_OOP);
+
+  __ z_br(handler_addr); // Jump to exception handler.
+}
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // Make a frame and preserve the caller's caller-save registers.
+  OopMap* oop_map = save_live_registers(sasm);
+
+  // Call the runtime patching routine, returns non-zero if nmethod got deopted.
+  int call_offset = __ call_RT(noreg, noreg, target);
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // Re-execute the patched instruction or, if the nmethod was
+  // deoptmized, return to the deoptimization handler entry that will
+  // cause re-execution of the current bytecode.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  __ z_ltr(Z_RET, Z_RET); // return value == 0
+
+  restore_live_registers(sasm);
+
+  __ z_bcr(Assembler::bcondZero, Z_R14);
+
+  // Return to the deoptimization handler entry for unpacking and
+  // rexecute if we simply returned then we'd deopt as if any call we
+  // patched had just returned.
+  AddressLiteral dest(deopt_blob->unpack_with_reexecution());
+  __ load_const_optimized(Z_R1_scratch, dest);
+  __ z_br(Z_R1_scratch);
+
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+
+  // for better readability
+  const bool must_gc_arguments = true;
+  const bool dont_gc_arguments = false;
+
+  // Default value; overwritten for some optimized stubs that are
+  // called from methods that do not use the fpu.
+  bool save_fpu_registers = true;
+
+  // Stub code and info for the different stubs.
+  OopMapSet* oop_maps = NULL;
+  switch (id) {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+        // will not return
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        Register klass    = Z_R11; // Incoming
+        Register obj      = Z_R2;  // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+            UseTLAB && FastTLABRefill) {
+          // Sapjvm: must call RT to generate allocation events.
+        }
+
+        OopMap* map = save_live_registers_except_r2(sasm);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r2(sasm);
+
+        __ verify_oop(obj);
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case counter_overflow_id:
+      {
+        // Arguments :
+        //   bci    : stack param 0
+        //   method : stack param 1
+        //
+        Register bci = Z_ARG2, method = Z_ARG3;
+        // frame size in bytes
+        OopMap* map = save_live_registers(sasm);
+        const int frame_size = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size;
+        __ z_lg(bci,    0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+        __ z_lg(method, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ z_br(Z_R14);
+      }
+      break;
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        Register length   = Z_R13; // Incoming
+        Register klass    = Z_R11; // Incoming
+        Register obj      = Z_R2;  // Result
+
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // Assert object type is really an array of the proper kind.
+        {
+          NearLabel ok;
+          Register t0 = obj;
+          __ mem2reg_opt(t0, Address(klass, Klass::layout_helper_offset()), false);
+          __ z_sra(t0, Klass::_lh_array_tag_shift);
+          int tag = ((id == new_type_array_id)
+                     ? Klass::_lh_array_tag_type_value
+                     : Klass::_lh_array_tag_obj_value);
+          __ compare32_and_branch(t0, tag, Assembler::bcondEqual, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        if (UseTLAB && FastTLABRefill) {
+          // sapjvm: must call RT to generate allocation events.
+        }
+
+        OopMap* map = save_live_registers_except_r2(sasm);
+        int call_offset;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r2(sasm);
+
+        __ verify_oop(obj);
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case new_multi_array_id:
+      { __ set_info("new_multi_array", dont_gc_arguments);
+        // Z_R3,: klass
+        // Z_R4,: rank
+        // Z_R5: address of 1st dimension
+        OopMap* map = save_live_registers(sasm);
+        int call_offset = __ call_RT(Z_R2, noreg, CAST_FROM_FN_PTR(address, new_multi_array), Z_R3, Z_R4, Z_R5);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r2(sasm);
+
+        // Z_R2,: new multi array
+        __ verify_oop(Z_R2);
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+
+        // Load the klass and check the has finalizer flag.
+        Register klass = Z_ARG2;
+        __ load_klass(klass, Z_ARG1);
+        __ testbit(Address(klass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER));
+        __ z_bcr(Assembler::bcondAllZero, Z_R14); // Return if bit is not set.
+
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg,
+                                     CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), Z_ARG1);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        // Now restore all the live registers.
+        restore_live_registers(sasm);
+
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      { __ set_info("range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+      }
+      break;
+
+    case throw_index_exception_id:
+      { __ set_info("index_range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+    case throw_div0_exception_id:
+      { __ set_info("throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+    case throw_null_pointer_exception_id:
+      { __ set_info("throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      { __ set_info("handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+    case handle_exception_from_callee_id:
+      { __ set_info("handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+    case unwind_exception_id:
+      { __ set_info("unwind_exception", dont_gc_arguments);
+        // Note: no stubframe since we are about to leave the current
+        // activation and we are calling a leaf VM function only.
+        generate_unwind_exception(sasm);
+      }
+      break;
+    case throw_array_store_exception_id:
+      { __ set_info("throw_array_store_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+    case throw_class_cast_exception_id:
+    { // Z_R1_scratch: object
+      __ set_info("throw_class_cast_exception", dont_gc_arguments);
+      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+    }
+    break;
+    case throw_incompatible_class_change_error_id:
+      { __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+    case slow_subtype_check_id:
+    {
+      // Arguments :
+      //   sub  : stack param 0
+      //   super: stack param 1
+      //   raddr: Z_R14, blown by call
+      //
+      // Result : condition code 0 for match (bcondEqual will be true),
+      //          condition code 2 for miss  (bcondNotEqual will be true)
+      NearLabel miss;
+      const Register Rsubklass   = Z_ARG2; // sub
+      const Register Rsuperklass = Z_ARG3; // super
+
+      // No args, but tmp registers that are killed.
+      const Register Rlength     = Z_ARG4; // cache array length
+      const Register Rarray_ptr  = Z_ARG5; // Current value from cache array.
+
+      if (UseCompressedOops) {
+        assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
+      }
+
+      const int frame_size = 4*BytesPerWord + frame::z_abi_160_size;
+      // Save return pc. This is not necessary, but could be helpful
+      // in the case of crashes.
+      __ save_return_pc();
+      __ push_frame(frame_size);
+      // Save registers before changing them.
+      int i = 0;
+      __ z_stg(Rsubklass,   (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_stg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_stg(Rlength,     (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_stg(Rarray_ptr,  (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+
+      // Get sub and super from stack.
+      __ z_lg(Rsubklass,   0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+      __ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+
+      __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, NULL, &miss);
+
+      // Match falls through here.
+      i = 0;
+      __ z_lg(Rsubklass,   (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rlength,     (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rarray_ptr,  (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+      __ pop_frame();
+      // Return pc is still in R_14.
+      __ clear_reg(Z_R0_scratch);         // Zero indicates a match. Set CC 0 (bcondEqual will be true)
+      __ z_br(Z_R14);
+
+      __ BIND(miss);
+      i = 0;
+      __ z_lg(Rsubklass,   (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rlength,     (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      __ z_lg(Rarray_ptr,  (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+      assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+      __ pop_frame();
+      // return pc is still in R_14
+      __ load_const_optimized(Z_R0_scratch, 1); // One indicates a miss.
+      __ z_ltgr(Z_R0_scratch, Z_R0_scratch);    // Set CC 2 (bcondNotEqual will be true).
+      __ z_br(Z_R14);
+    }
+    break;
+    case monitorenter_nofpu_id:
+    case monitorenter_id:
+      { // Z_R1_scratch : object
+        // Z_R13       : lock address (see LIRGenerator::syncTempOpr())
+        __ set_info("monitorenter", dont_gc_arguments);
+
+        int save_fpu_registers = (id == monitorenter_id);
+        // Make a frame and preserve the caller's caller-save registers.
+        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), Z_R1_scratch, Z_R13);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm, save_fpu_registers);
+
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+    case monitorexit_id:
+      { // Z_R1_scratch : lock address
+        // Note: really a leaf routine but must setup last java sp
+        //   => Use call_RT for now (speed can be improved by
+        //      doing last java sp setup manually).
+        __ set_info("monitorexit", dont_gc_arguments);
+
+        int save_fpu_registers = (id == monitorexit_id);
+        // Make a frame and preserve the caller's caller-save registers.
+        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), Z_R1_scratch);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm, save_fpu_registers);
+
+        __ z_br(Z_R14);
+      }
+      break;
+
+    case deoptimize_id:
+      { // Args: Z_R1_scratch: trap request
+        __ set_info("deoptimize", dont_gc_arguments);
+        Register trap_request = Z_R1_scratch;
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), trap_request);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        AddressLiteral dest(deopt_blob->unpack_with_reexecution());
+        __ load_const_optimized(Z_R1_scratch, dest);
+        __ z_br(Z_R1_scratch);
+      }
+      break;
+
+    case access_field_patching_id:
+      { __ set_info("access_field_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      { __ set_info("load_klass_patching", dont_gc_arguments);
+        // We should set up register map.
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case load_mirror_patching_id:
+      { __ set_info("load_mirror_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+      }
+      break;
+
+    case load_appendix_patching_id:
+      { __ set_info("load_appendix_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+      }
+      break;
+#if 0
+    case dtrace_object_alloc_id:
+      { // rax,: object
+        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+        // We can't gc here so skip the oopmap but make sure that all
+        // the live registers get saved.
+        save_live_registers(sasm, 1);
+
+        __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
+        NOT_LP64(__ pop(rax));
+
+        restore_live_registers(sasm);
+      }
+      break;
+
+    case fpu2long_stub_id:
+      {
+        // rax, and rdx are destroyed, but should be free since the result is returned there
+        // preserve rsi,ecx
+        __ push(rsi);
+        __ push(rcx);
+        LP64_ONLY(__ push(rdx);)
+
+        // check for NaN
+        Label return0, do_return, return_min_jlong, do_convert;
+
+        Address value_high_word(rsp, wordSize + 4);
+        Address value_low_word(rsp, wordSize);
+        Address result_high_word(rsp, 3*wordSize + 4);
+        Address result_low_word(rsp, 3*wordSize);
+
+        __ subptr(rsp, 32);                    // more than enough on 32bit
+        __ fst_d(value_low_word);
+        __ movl(rax, value_high_word);
+        __ andl(rax, 0x7ff00000);
+        __ cmpl(rax, 0x7ff00000);
+        __ jcc(Assembler::notEqual, do_convert);
+        __ movl(rax, value_high_word);
+        __ andl(rax, 0xfffff);
+        __ orl(rax, value_low_word);
+        __ jcc(Assembler::notZero, return0);
+
+        __ bind(do_convert);
+        __ fnstcw(Address(rsp, 0));
+        __ movzwl(rax, Address(rsp, 0));
+        __ orl(rax, 0xc00);
+        __ movw(Address(rsp, 2), rax);
+        __ fldcw(Address(rsp, 2));
+        __ fwait();
+        __ fistp_d(result_low_word);
+        __ fldcw(Address(rsp, 0));
+        __ fwait();
+        // This gets the entire long in rax on 64bit
+        __ movptr(rax, result_low_word);
+        // testing of high bits
+        __ movl(rdx, result_high_word);
+        __ mov(rcx, rax);
+        // What the heck is the point of the next instruction???
+        __ xorl(rcx, 0x0);
+        __ movl(rsi, 0x80000000);
+        __ xorl(rsi, rdx);
+        __ orl(rcx, rsi);
+        __ jcc(Assembler::notEqual, do_return);
+        __ fldz();
+        __ fcomp_d(value_low_word);
+        __ fnstsw_ax();
+        __ testl(rax, 0x4100);  // ZF & CF == 0
+        __ jcc(Assembler::equal, return_min_jlong);
+        // return max_jlong
+        __ mov64(rax, CONST64(0x7fffffffffffffff));
+        __ jmp(do_return);
+
+        __ bind(return_min_jlong);
+        __ mov64(rax, UCONST64(0x8000000000000000));
+        __ jmp(do_return);
+
+        __ bind(return0);
+        __ fpop();
+        __ xorptr(rax, rax);
+
+        __ bind(do_return);
+        __ addptr(rsp, 32);
+        LP64_ONLY(__ pop(rdx);)
+        __ pop(rcx);
+        __ pop(rsi);
+        __ ret(0);
+      }
+      break;
+#endif // TODO
+
+#if INCLUDE_ALL_GCS
+    case g1_pre_barrier_slow_id:
+      { // Z_R1_scratch: previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ should_not_reach_here(FILE_AND_LINE);
+          break;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        Register pre_val = Z_R1_scratch;
+        Register tmp  = Z_R6; // Must be non-volatile because it is used to save pre_val.
+        Register tmp2 = Z_R7;
+
+        Label refill, restart;
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   SATBMarkQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   SATBMarkQueue::byte_offset_of_buf());
+
+        // Save tmp registers (see assertion in G1PreBarrierStub::emit_code()).
+        __ z_stg(tmp,  0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_stg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+        __ bind(restart);
+        // Load the index into the SATB buffer. SATBMarkQueue::_index is a
+        // size_t so ld_ptr is appropriate.
+        __ z_ltg(tmp, satb_q_index_byte_offset, Z_R0, Z_thread);
+
+        // index == 0?
+        __ z_brz(refill);
+
+        __ z_lg(tmp2, satb_q_buf_byte_offset, Z_thread);
+        __ add2reg(tmp, -oopSize);
+
+        __ z_stg(pre_val, 0, tmp, tmp2); // [_buf + index] := <address_of_card>
+        __ z_stg(tmp, satb_q_index_byte_offset, Z_thread);
+
+        // Restore tmp registers (see assertion in G1PreBarrierStub::emit_code()).
+        __ z_lg(tmp,  0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_lg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_br(Z_R14);
+
+        __ bind(refill);
+        save_volatile_registers(sasm);
+        __ z_lgr(tmp, pre_val); // save pre_val
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread),
+                        Z_thread);
+        __ z_lgr(pre_val, tmp); // restore pre_val
+        restore_volatile_registers(sasm);
+        __ z_bru(restart);
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      { // Z_R1_scratch: oop address, address of updated memory slot
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ should_not_reach_here(FILE_AND_LINE);
+          break;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        Register addr_oop  = Z_R1_scratch;
+        Register addr_card = Z_R1_scratch;
+        Register r1        = Z_R6; // Must be saved/restored.
+        Register r2        = Z_R7; // Must be saved/restored.
+        Register cardtable = r1;   // Must be non-volatile, because it is used to save addr_card.
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
+        __ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+        Label not_already_dirty, restart, refill, young_card;
+
+        // Calculate address of card corresponding to the updated oop slot.
+        AddressLiteral rs(byte_map_base);
+        __ z_srlg(addr_card, addr_oop, CardTableModRefBS::card_shift);
+        addr_oop = noreg; // dead now
+        __ load_const_optimized(cardtable, rs); // cardtable := <card table base>
+        __ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable
+
+        __ z_cli(0, addr_card, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+        __ z_bre(young_card);
+
+        __ z_sync(); // Required to support concurrent cleaning.
+
+        __ z_cli(0, addr_card, (int)CardTableModRefBS::dirty_card_val());
+        __ z_brne(not_already_dirty);
+
+        __ bind(young_card);
+        // We didn't take the branch, so we're already dirty: restore
+        // used registers and return.
+        __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_br(Z_R14);
+
+        // Not dirty.
+        __ bind(not_already_dirty);
+
+        // First, dirty it: [addr_card] := 0
+        __ z_mvi(0, addr_card, CardTableModRefBS::dirty_card_val());
+
+        Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card.
+        Register buf = r2;
+        cardtable = noreg; // now dead
+
+        // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
+        __ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+        ByteSize dirty_card_q_index_byte_offset =
+          JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index();
+        ByteSize dirty_card_q_buf_byte_offset =
+          JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_buf();
+
+        __ bind(restart);
+
+        // Get the index into the update buffer. DirtyCardQueue::_index is
+        // a size_t so z_ltg is appropriate here.
+        __ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
+
+        // index == 0?
+        __ z_brz(refill);
+
+        __ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset));
+        __ add2reg(idx, -oopSize);
+
+        __ z_stg(addr_card, 0, idx, buf); // [_buf + index] := <address_of_card>
+        __ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
+        // Restore killed registers and return.
+        __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+        __ z_br(Z_R14);
+
+        __ bind(refill);
+        save_volatile_registers(sasm);
+        __ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile.
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread),
+                                         Z_thread);
+        __ z_lgr(addr_card, idx);
+        restore_volatile_registers(sasm); // Restore addr_card.
+        __ z_bru(restart);
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+    case predicate_failed_trap_id:
+      {
+        __ set_info("predicate_failed_trap", dont_gc_arguments);
+
+        OopMap* map = save_live_registers(sasm);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+        __ load_const_optimized(Z_R1_scratch, deopt_blob->unpack_with_reexecution());
+        __ z_br(Z_R1_scratch);
+      }
+      break;
+
+    default:
+      {
+        __ should_not_reach_here(FILE_AND_LINE, id);
+      }
+      break;
+  }
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // incoming parameters: Z_EXC_OOP, Z_EXC_PC
+
+  // Save registers if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  Register reg_fp = Z_R1_scratch;
+
+  switch (id) {
+    case forward_exception_id: {
+      // We're handling an exception in the context of a compiled frame.
+      // The registers have been saved in the standard places. Perform
+      // an exception lookup in the caller and dispatch to the handler
+      // if found. Otherwise unwind and dispatch to the callers
+      // exception handler.
+      oop_map = generate_oop_map(sasm);
+
+      // Load and clear pending exception oop into.
+      __ z_lg(Z_EXC_OOP, Address(Z_thread, Thread::pending_exception_offset()));
+      __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), 8);
+
+      // Different stubs forward their exceptions; they should all have similar frame layouts
+      // (a) to find their return address (b) for a correct oop_map generated above.
+      assert(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers) ==
+             RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers_except_r2), "requirement");
+
+      // Load issuing PC (the return address for this stub).
+      const int frame_size_in_bytes = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size;
+      __ z_lg(Z_EXC_PC, Address(Z_SP, frame_size_in_bytes + _z_abi16(return_pc)));
+      DEBUG_ONLY(__ z_lay(reg_fp, Address(Z_SP, frame_size_in_bytes));)
+
+      // Make sure that the vm_results are cleared (may be unnecessary).
+      __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()),   sizeof(oop));
+      __ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(Metadata*));
+      break;
+    }
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      // At this point all registers MAY be live.
+      DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
+      oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Z_EXC_PC);
+      break;
+    case handle_exception_from_callee_id: {
+      // At this point all registers except Z_EXC_OOP and Z_EXC_PC are dead.
+      DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
+      __ save_return_pc(Z_EXC_PC);
+      const int frame_size_in_bytes = __ push_frame_abi160(0);
+      oop_map = new OopMap(frame_size_in_bytes / VMRegImpl::stack_slot_size, 0);
+      sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+      break;
+    }
+    default:  ShouldNotReachHere();
+  }
+
+  // Verify that only Z_EXC_OOP, and Z_EXC_PC are valid at this time.
+  __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC, reg_fp);
+  // Verify that Z_EXC_OOP, contains a valid exception.
+  __ verify_not_null_oop(Z_EXC_OOP);
+
+  // Check that fields in JavaThread for exception oop and issuing pc
+  // are empty before writing to them.
+  __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0);
+  __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0);
+
+  // Save exception oop and issuing pc into JavaThread.
+  // (Exception handler will load it from here.)
+  __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
+  __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+#ifdef ASSERT
+  { NearLabel ok;
+    __ z_cg(Z_EXC_PC, Address(reg_fp, _z_abi16(return_pc)));
+    __ branch_optimized(Assembler::bcondEqual, ok);
+    __ stop("use throwing pc as return address (has bci & oop map)");
+    __ bind(ok);
+  }
+#endif
+
+  // Compute the exception handler.
+  // The exception oop and the throwing pc are read from the fields in JavaThread.
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // Z_RET(Z_R2): handler address
+  //   will be the deopt blob if nmethod was deoptimized while we looked up
+  //   handler regardless of whether handler existed in the nmethod.
+
+  // Only Z_R2, is valid at this time, all other registers have been destroyed by the runtime call.
+  __ invalidate_registers(Z_R2);
+
+  switch(id) {
+    case forward_exception_id:
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      // Restore the registers that were saved at the beginning.
+      __ z_lgr(Z_R1_scratch, Z_R2);   // Restoring live registers kills Z_R2.
+      restore_live_registers(sasm, id != handle_exception_nofpu_id);  // Pops as well the frame.
+      __ z_br(Z_R1_scratch);
+      break;
+    case handle_exception_from_callee_id: {
+      __ pop_frame();
+      __ z_br(Z_R2); // Jump to exception handler.
+    }
+    break;
+    default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+
+#undef __
+
+const char *Runtime1::pd_name_for_address(address entry) {
+  return "<unknown function>";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_globals_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_GLOBALS_S390_HPP
+#define CPU_S390_VM_C1_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+// Flags sorted according to sparc.
+
+#ifndef TIERED
+define_pd_global(bool,     BackgroundCompilation,        true);
+define_pd_global(bool,     CICompileOSR,                 true);
+define_pd_global(bool,     InlineIntrinsics,             true);
+define_pd_global(bool,     PreferInterpreterNativeStubs, false);
+define_pd_global(bool,     ProfileTraps,                 false);
+define_pd_global(bool,     UseOnStackReplacement,        true);
+define_pd_global(bool,     TieredCompilation,            false);
+define_pd_global(intx,     CompileThreshold,             1000);
+
+define_pd_global(intx,     OnStackReplacePercentage,     1400);
+define_pd_global(bool,     UseTLAB,                      true);
+define_pd_global(bool,     ProfileInterpreter,           false);
+define_pd_global(intx,     FreqInlineSize,               325);
+define_pd_global(bool,     ResizeTLAB,                   true);
+define_pd_global(intx,     ReservedCodeCacheSize,        32*M);
+define_pd_global(uintx,    NonProfiledCodeHeapSize,      13*M);
+define_pd_global(uintx,    ProfiledCodeHeapSize,         14*M);
+define_pd_global(uintx,    NonNMethodCodeHeapSize,       5*M);
+define_pd_global(uintx,    CodeCacheExpansionSize,       32*K);
+define_pd_global(uintx,    CodeCacheMinBlockLength,      1);
+define_pd_global(uintx,    CodeCacheMinimumUseSpace,     400*K);
+define_pd_global(size_t,   MetaspaceSize,                12*M);
+define_pd_global(bool,     NeverActAsServerClassMachine, true);
+define_pd_global(size_t,   NewSizeThreadIncrease,        16*K);
+define_pd_global(uint64_t, MaxRAM,                       1ULL*G);
+define_pd_global(uintx,    InitialCodeCacheSize,         160*K);
+#endif // !TIERED
+
+define_pd_global(bool,     UseTypeProfile,               false);
+define_pd_global(bool,     RoundFPResults,               false);
+
+define_pd_global(bool,     LIRFillDelaySlots,            false);
+define_pd_global(bool,     OptimizeSinglePrecision,      false);
+define_pd_global(bool,     CSEArrayLength,               true);
+define_pd_global(bool,     TwoOperandLIRForm,            true);
+
+#endif // CPU_S390_VM_C1_GLOBALS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C2_GLOBALS_S390_HPP
+#define CPU_S390_VM_C2_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).
+// Sorted according to sparc.
+
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+define_pd_global(bool, ProfileInterpreter,           true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
+define_pd_global(intx, CompileThreshold,             10000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         4);
+define_pd_global(intx, FLOATPRESSURE,                15);
+define_pd_global(intx, FreqInlineSize,               175);
+// 10 prevents spill-split-recycle sanity check in JVM2008.xml.transform.
+define_pd_global(intx, INTPRESSURE,                  10); // Medium size register set, 6 special purpose regs, 3 SOE regs.
+define_pd_global(intx, InteriorEntryAlignment,       2);
+define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
+define_pd_global(intx, RegisterCostAreaRatio,        12000);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(intx, LoopUnrollLimit,              60);
+define_pd_global(intx, LoopPercentProfileLimit,      10);
+define_pd_global(intx, PostLoopMultiversioning,      false);
+define_pd_global(intx, MinJumpTableSize,             18);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 true);
+define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoScheduling,               false);
+define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
+// On s390x, we can clear the array with a single instruction,
+// so don't idealize it.
+define_pd_global(bool, IdealizeClearArrayNode,       false);
+
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(uintx, InitialCodeCacheSize,        2048*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(uintx, ReservedCodeCacheSize,       48*M);
+define_pd_global(uintx, NonProfiledCodeHeapSize,     21*M);
+define_pd_global(uintx, ProfiledCodeHeapSize,        22*M);
+define_pd_global(uintx, NonNMethodCodeHeapSize,      5*M);
+define_pd_global(uintx, CodeCacheExpansionSize,      64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed on z/Architecture.
+
+// Heap related flags
+define_pd_global(size_t, MetaspaceSize,              ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_S390_VM_C2_GLOBALS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c2_init_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// Processor dependent initialization for z/Architecture.
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_CODEBUFFER_S390_HPP
+#define CPU_S390_VM_CODEBUFFER_S390_HPP
+
+ private:
+  void pd_initialize() {}
+
+ public:
+  void flush_bundle(bool start_new_bundle) {}
+
+  void getCpuData(const CodeBuffer * const cb) {}
+
+#endif // CPU_S390_VM_CODEBUFFER_S390_HPP
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/compiledIC_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+#ifdef COMPILER2
+#include "opto/matcher.hpp"
+#endif
+
+// ----------------------------------------------------------------------------
+
+#undef  __
+#define __ _masm.
+
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) {
+#ifdef COMPILER2
+  // Stub is fixed up when the corresponding call is converted from calling
+  // compiled code to calling interpreted code.
+  if (mark == NULL) {
+    // Get the mark within main instrs section which is set to the address of the call.
+    mark = cbuf.insts_mark();
+  }
+  assert(mark != NULL, "mark must not be NULL");
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address stub = __ start_a_stub(Compile::MAX_stubs_size);
+  if (stub == NULL) {
+    return NULL;  // CodeBuffer::expand failed.
+  }
+  __ relocate(static_stub_Relocation::spec(mark));
+
+  AddressLiteral meta = __ allocate_metadata_address(NULL);
+  bool success = __ load_const_from_toc(as_Register(Matcher::inline_cache_reg_encode()), meta);
+
+  __ set_inst_mark();
+  AddressLiteral a((address)-1);
+  success = success && __ load_const_from_toc(Z_R1, a);
+  if (!success) {
+    return NULL;  // CodeCache is full.
+  }
+
+  __ z_br(Z_R1);
+  __ end_a_stub(); // Update current stubs pointer and restore insts_end.
+  return stub;
+#else
+  ShouldNotReachHere();
+#endif
+}
+
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+  return 2 * MacroAssembler::load_const_from_toc_size() +
+         2; // branch
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 5; // 4 in emit_java_to_interp + 1 in Java_Static_Call
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+  address stub = find_stub();
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  p2i(instruction_address()),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+
+  // A generated lambda form might be deleted from the Lambdaform
+  // cache in MethodTypeForm.  If a jit compiled lambdaform method
+  // becomes not entrant and the cache access returns null, the new
+  // resolve will lead to a new generated LambdaForm.
+
+  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee() || callee->is_compiled_lambda_form(),
+         "a) MT-unsafe modification of inline cache");
+  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  jump->set_jump_destination(entry);
+
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+  method_holder->set_data(0);
+  jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+  // Verify call.
+  NativeCall::verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub.
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/copy_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1134 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by LS
+
+#ifndef CPU_S390_VM_COPY_S390_HPP
+#define CPU_S390_VM_COPY_S390_HPP
+
+// Inline functions for memory copy and fill.
+
+// HeapWordSize (the size of class HeapWord) is 8 Bytes (the size of a
+// pointer variable), since we always run the _LP64 model. As a consequence,
+// HeapWord* memory ranges are always assumed to be doubleword-aligned,
+// having a size which is an integer multiple of HeapWordSize.
+//
+// Dealing only with doubleword-aligned doubleword units has important
+// positive performance and data access consequences. Many of the move
+// instructions perform particularly well under these circumstances.
+// Data access is "doubleword-concurrent", except for MVC and XC.
+// Furthermore, data access can be forced to be sequential (MVCL and MVCLE)
+// by use of the special padding byte 0xb1, where required. For copying,
+// we use padding byte 0xb0 to prevent the D-cache from being polluted.
+//
+// On z/Architecture, gcc optimizes memcpy into a series of MVC instructions.
+// This is optimal, even if just one HeapWord is copied. However, MVC
+// copying is not atomic, i.e. not "doubleword concurrent" by definition.
+//
+// If the -mmvcle compiler option is specified, memcpy translates into
+// code such that the entire memory range is copied or preset with just
+// one MVCLE instruction.
+//
+// *to = *from is transformed into a MVC instruction already with -O1.
+// Thus, for atomic copy operations, (inline) assembler code is required
+// to guarantee atomic data accesses.
+//
+// For large (len >= MVCLEThreshold) chunks of memory, we exploit
+// special H/W support of z/Architecture:
+// 1) copy short piece of memory to page-align address(es)
+// 2) copy largest part (all contained full pages) of memory using mvcle instruction.
+//    z/Architecture processors have special H/W support for page-aligned storage
+//    where len is an int multiple of page size. In that case, up to 4 cache lines are
+//    processed in parallel and L1 cache is not polluted.
+// 3) copy the remaining piece of memory.
+//
+//  Measurement classifications:
+//  very rare - <=     10.000 calls AND <=     1.000 usec elapsed
+//       rare - <=    100.000 calls AND <=    10.000 usec elapsed
+//       some - <=  1.000.000 calls AND <=   100.000 usec elapsed
+//       freq - <= 10.000.000 calls AND <= 1.000.000 usec elapsed
+//  very freq - >  10.000.000 calls OR  >  1.000.000 usec elapsed
+
+#undef USE_INLINE_ASM
+
+static void copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  if (from > to) {
+    while (count-- > 0) {
+      // Copy forwards
+      *to++ = *from++;
+    }
+  } else {
+    from += count - 1;
+    to   += count - 1;
+    while (count-- > 0) {
+      // Copy backwards
+      *to-- = *from--;
+    }
+  }
+}
+
+static void copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  if (from > to) {
+    while (count-- > 0) {
+      // Copy forwards
+      *to++ = *from++;
+    }
+  } else {
+    from += count - 1;
+    to   += count - 1;
+    while (count-- > 0) {
+      // Copy backwards
+      *to-- = *from--;
+    }
+  }
+}
+
+static bool has_destructive_overlap(char* from, char* to, size_t byte_count) {
+  return (from < to) && ((to-from) < (ptrdiff_t)byte_count);
+}
+
+#ifdef USE_INLINE_ASM
+
+  //--------------------------------------------------------------
+  // Atomic copying. Atomicity is given by the minimum of source
+  // and target alignment. Refer to mail comm with Tim Slegel/IBM.
+  // Only usable for disjoint source and target.
+  //--------------------------------------------------------------
+  #define MOVE8_ATOMIC_4(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    unsigned long fromaddr;                                      \
+    asm(                                                         \
+      "LG      %[toaddr],%[to]     \n\t" /* address of to area   */ \
+      "LG      %[fromaddr],%[from] \n\t" /* address of from area */ \
+      "MVC     0(32,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+      : [to]       "+Q"  (_to)          /* outputs   */          \
+      , [from]     "+Q"  (_from)                                 \
+      , [toaddr]   "=a"  (toaddr)                                \
+      , [fromaddr] "=a"  (fromaddr)                              \
+      :                                                          \
+      : "cc"                            /* clobbered */          \
+    );                                                           \
+  }
+  #define MOVE8_ATOMIC_3(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    unsigned long fromaddr;                                      \
+    asm(                                                         \
+      "LG      %[toaddr],%[to]     \n\t" /* address of to area   */ \
+      "LG      %[fromaddr],%[from] \n\t" /* address of from area */ \
+      "MVC     0(24,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+      : [to]       "+Q"  (_to)          /* outputs   */          \
+      , [from]     "+Q"  (_from)                                 \
+      , [toaddr]   "=a"  (toaddr)                                \
+      , [fromaddr] "=a"  (fromaddr)                              \
+      :                                                          \
+      : "cc"                            /* clobbered */          \
+    );                                                           \
+  }
+  #define MOVE8_ATOMIC_2(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    unsigned long fromaddr;                                      \
+    asm(                                                         \
+      "LG      %[toaddr],%[to]     \n\t" /* address of to area   */ \
+      "LG      %[fromaddr],%[from] \n\t" /* address of from area */ \
+      "MVC     0(16,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+      : [to]       "+Q"  (_to)          /* outputs   */          \
+      , [from]     "+Q"  (_from)                                 \
+      , [toaddr]   "=a"  (toaddr)                                \
+      , [fromaddr] "=a"  (fromaddr)                              \
+      :                                                          \
+      : "cc"                            /* clobbered */          \
+    );                                                           \
+  }
+  #define MOVE8_ATOMIC_1(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    unsigned long fromaddr;                                      \
+    asm(                                                         \
+      "LG      %[toaddr],%[to]     \n\t" /* address of to area   */ \
+      "LG      %[fromaddr],%[from] \n\t" /* address of from area */ \
+      "MVC     0(8,%[toaddr]),0(%[fromaddr]) \n\t"  /* move data */ \
+      : [to]       "+Q"  (_to)          /* outputs   */          \
+      , [from]     "+Q"  (_from)                                 \
+      , [toaddr]   "=a"  (toaddr)                                \
+      , [fromaddr] "=a"  (fromaddr)                              \
+      :                                                          \
+      : "cc"                            /* clobbered */          \
+    );                                                           \
+  }
+
+  //--------------------------------------------------------------
+  // Atomic copying of 8-byte entities.
+  // Conjoint/disjoint property does not matter. Entities are first
+  // loaded and then stored.
+  // _to and _from must be 8-byte aligned.
+  //--------------------------------------------------------------
+  #define COPY8_ATOMIC_4(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      3,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LMG     0,3,0(3)         \n\t" /* load data            */ \
+      "STMG    0,3,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1", "r2", "r3" /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY8_ATOMIC_3(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      2,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LMG     0,2,0(2)         \n\t" /* load data            */ \
+      "STMG    0,2,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1", "r2"       /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY8_ATOMIC_2(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      1,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LMG     0,1,0(1)         \n\t" /* load data            */ \
+      "STMG    0,1,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1"             /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY8_ATOMIC_1(_to,_from) {                            \
+    unsigned long addr;                                          \
+    asm(                                                         \
+      "LG      %[addr],%[from]  \n\t" /* address of from area */ \
+      "LG      0,0(0,%[addr])   \n\t" /* load data            */ \
+      "LG      %[addr],%[to]    \n\t" /* address of to area   */ \
+      "STG     0,0(0,%[addr])   \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [addr]   "=a"  (addr)         /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0"                   /* clobbered */            \
+    );                                                           \
+  }
+
+  //--------------------------------------------------------------
+  // Atomic copying of 4-byte entities.
+  // Exactly 4 (four) entities are copied.
+  // Conjoint/disjoint property does not matter. Entities are first
+  // loaded and then stored.
+  // _to and _from must be 4-byte aligned.
+  //--------------------------------------------------------------
+  #define COPY4_ATOMIC_4(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      3,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LM      0,3,0(3)         \n\t" /* load data            */ \
+      "STM     0,3,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1", "r2", "r3" /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY4_ATOMIC_3(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      2,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LM      0,2,0(2)         \n\t" /* load data            */ \
+      "STM     0,2,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1", "r2"       /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY4_ATOMIC_2(_to,_from) {                            \
+    unsigned long toaddr;                                        \
+    asm(                                                         \
+      "LG      1,%[from]        \n\t" /* address of from area */ \
+      "LG      %[toaddr],%[to]  \n\t" /* address of to area   */ \
+      "LM      0,1,0(1)         \n\t" /* load data            */ \
+      "STM     0,1,0(%[toaddr]) \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [toaddr] "=a"  (toaddr)       /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0", "r1"             /* clobbered */            \
+    );                                                           \
+  }
+  #define COPY4_ATOMIC_1(_to,_from) {                            \
+    unsigned long addr;                                          \
+    asm(                                                         \
+      "LG      %[addr],%[from]  \n\t" /* address of from area */ \
+      "L       0,0(0,%[addr])   \n\t" /* load data            */ \
+      "LG      %[addr],%[to]    \n\t" /* address of to area   */ \
+      "ST      0,0(0,%[addr])   \n\t" /* store data           */ \
+      : [to]     "+Q"  (_to)          /* outputs   */            \
+      , [from]   "+Q"  (_from)        /* outputs   */            \
+      , [addr]   "=a"  (addr)         /* inputs    */            \
+      :                                                          \
+      : "cc",  "r0"                   /* clobbered */            \
+    );                                                           \
+  }
+
+#if 0  // Waiting for gcc to support EXRL.
+  #define MVC_MEMCOPY(_to,_from,_len)                                \
+    if (VM_Version::has_ExecuteExtensions()) {                       \
+      asm("\t"                                                       \
+      "    LAY     1,-1(0,%[len])      \n\t" /* decr for MVC  */     \
+      "    EXRL    1,1f                \n\t" /* execute MVC instr */ \
+      "    BRC     15,2f               \n\t" /* skip template */     \
+      "1:  MVC     0(%[len],%[to]),0(%[from]) \n\t"                  \
+      "2:  BCR     0,0                 \n\t"                         \
+      : [to]   "+Q"  (_to)             /* outputs   */               \
+      , [from] "+Q"  (_from)           /* outputs   */               \
+      : [len]  "r"   (_len)            /* inputs    */               \
+      : "cc",  "r1"                    /* clobbered */               \
+      );                                                             \
+    } else {                                                         \
+      asm("\t"                                                       \
+      "    LARL    2,3f                \n\t"                         \
+      "    LAY     1,-1(0,%[len])      \n\t" /* decr for MVC  */     \
+      "    EX      1,0(2)              \n\t" /* execute MVC instr */ \
+      "    BRC     15,4f               \n\t" /* skip template */     \
+      "3:  MVC     0(%[len],%[to]),0(%[from])  \n\t"                 \
+      "4:  BCR     0,0                 \n\t"                         \
+      : [to]   "+Q"  (_to)             /* outputs   */               \
+      , [from] "+Q"  (_from)           /* outputs   */               \
+      : [len]  "r"   (_len)            /* inputs    */               \
+      : "cc",  "r1", "r2"              /* clobbered */               \
+      );                                                             \
+    }
+#else
+  #define MVC_MEMCOPY(_to,_from,_len)                                \
+  { unsigned long toaddr;   unsigned long tolen;                     \
+    unsigned long fromaddr; unsigned long target;                    \
+      asm("\t"                                                       \
+      "    LTGR    %[tolen],%[len]     \n\t" /* decr for MVC  */     \
+      "    BRC     8,2f                \n\t" /* do nothing for l=0*/ \
+      "    AGHI    %[tolen],-1         \n\t"                         \
+      "    LG      %[toaddr],%[to]     \n\t"                         \
+      "    LG      %[fromaddr],%[from] \n\t"                         \
+      "    LARL    %[target],1f        \n\t" /* addr of MVC instr */ \
+      "    EX      %[tolen],0(%[target])         \n\t" /* execute MVC instr */ \
+      "    BRC     15,2f                         \n\t" /* skip template */     \
+      "1:  MVC     0(1,%[toaddr]),0(%[fromaddr]) \n\t"                         \
+      "2:  BCR     0,0                 \n\t" /* nop a branch target*/\
+      : [to]       "+Q"  (_to)         /* outputs   */               \
+      , [from]     "+Q"  (_from)                                     \
+      , [tolen]    "=a"  (tolen)                                     \
+      , [toaddr]   "=a"  (toaddr)                                    \
+      , [fromaddr] "=a"  (fromaddr)                                  \
+      , [target]   "=a"  (target)                                    \
+      : [len]       "r"  (_len)        /* inputs    */               \
+      : "cc"                           /* clobbered */               \
+      );                                                             \
+  }
+#endif
+
+  #if 0  // code snippet to be used for debugging
+      /* ASSERT code BEGIN */                                                \
+      "    LARL    %[len],5f       \n\t"                                     \
+      "    LARL    %[mta],4f       \n\t"                                     \
+      "    SLGR    %[len],%[mta]   \n\t"                                     \
+      "    CGHI    %[len],16       \n\t"                                     \
+      "    BRC     7,9f            \n\t"      /* block size !=  16 */        \
+                                                                             \
+      "    LARL    %[len],1f       \n\t"                                     \
+      "    SLGR    %[len],%[mta]   \n\t"                                     \
+      "    CGHI    %[len],256      \n\t"                                     \
+      "    BRC     7,9f            \n\t"      /* list len   != 256 */        \
+                                                                             \
+      "    LGR     0,0             \n\t"      /* artificial SIGILL */        \
+      "9:  BRC     7,-2            \n\t"                                     \
+      "    LARL    %[mta],1f       \n\t"      /* restore MVC table begin */  \
+      /* ASSERT code END   */
+  #endif
+
+  // Optimized copying for data less than 4k
+  // - no destructive overlap
+  // - 0 <= _n_bytes <= 4096
+  // This macro needs to be gcc-compiled with -march=z990. Otherwise, the
+  // LAY instruction is not available.
+  #define MVC_MULTI(_to,_from,_n_bytes)                                      \
+  { unsigned long toaddr;                                                    \
+    unsigned long fromaddr;                                                  \
+    unsigned long movetable;                                                 \
+    unsigned long len;                                                       \
+      asm("\t"                                                               \
+      "    LTGFR   %[len],%[nby]   \n\t"                                     \
+      "    LG      %[ta],%[to]     \n\t"      /* address of to area   */     \
+      "    BRC     8,1f            \n\t"      /* nothing to copy   */        \
+                                                                             \
+      "    NILL    %[nby],255      \n\t"      /* # bytes mod 256      */     \
+      "    LG      %[fa],%[from]   \n\t"      /* address of from area */     \
+      "    BRC     8,3f            \n\t"      /* no rest, skip copying */    \
+                                                                             \
+      "    LARL    %[mta],2f       \n\t"      /* MVC template addr */        \
+      "    AHI     %[nby],-1       \n\t"      /* adjust for EX MVC  */       \
+                                                                             \
+      "    EX      %[nby],0(%[mta]) \n\t"     /* only rightmost */           \
+                                              /* 8 bits of nby used */       \
+      /* Since nby is <= 4096 on entry to this code, we do need */           \
+      /* no zero extension before using it in addr calc.        */           \
+      "    LA      %[fa],1(%[nby],%[fa]) \n\t"/* adjust from addr */         \
+      "    LA      %[ta],1(%[nby],%[ta]) \n\t"/* adjust to   addr */         \
+                                                                             \
+      "3:  SRAG    %[nby],%[len],8 \n\t"      /* # cache lines     */        \
+      "    LARL    %[mta],1f       \n\t"      /* MVC table begin   */        \
+      "    BRC     8,1f            \n\t"      /* nothing to copy   */        \
+                                                                             \
+      /* Insert ASSERT code here if required. */                             \
+                                                                             \
+                                                                             \
+      "    LNGFR   %[nby],%[nby]   \n\t"      /* negative offset into     */ \
+      "    SLLG    %[nby],%[nby],4 \n\t"      /* MVC table 16-byte blocks */ \
+      "    BC      15,0(%[nby],%[mta]) \n\t"  /* branch to block #ncl  */    \
+                                                                             \
+      "2:  MVC     0(1,%[ta]),0(%[fa]) \n\t"  /* MVC template */             \
+                                                                             \
+      "4:  MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 4096 == l        */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "5:  MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 3840 <= l < 4096 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 3548 <= l < 3328 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 3328 <= l < 3328 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 3072 <= l < 3328 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 2816 <= l < 3072 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 2560 <= l < 2816 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 2304 <= l < 2560 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 2048 <= l < 2304 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 1792 <= l < 2048 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 1536 <= l < 1792 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 1280 <= l < 1536 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /* 1024 <= l < 1280 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /*  768 <= l < 1024 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /*  512 <= l <  768 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "    MVC     0(256,%[ta]),0(%[fa])   \n\t" /*  256 <= l <  512 */      \
+      "    LAY     %[ta],256(0,%[ta])      \n\t"                             \
+      "    LA      %[fa],256(0,%[fa])      \n\t"                             \
+      "1:  BCR     0,0                     \n\t" /* nop as branch target */  \
+      : [to]       "+Q"  (_to)          /* outputs   */          \
+      , [from]     "+Q"  (_from)                                 \
+      , [ta]       "=a"  (toaddr)                                \
+      , [fa]       "=a"  (fromaddr)                              \
+      , [mta]      "=a"  (movetable)                             \
+      , [nby]      "+a"  (_n_bytes)                              \
+      , [len]      "=a"  (len)                                   \
+      :                                                          \
+      : "cc"                            /* clobbered */          \
+    );                                                           \
+  }
+
+  #define MVCLE_MEMCOPY(_to,_from,_len)                           \
+    asm(                                                          \
+      "    LG      0,%[to]     \n\t"   /* address of to area   */ \
+      "    LG      2,%[from]   \n\t"   /* address of from area */ \
+      "    LGR     1,%[len]    \n\t"   /* len of to area       */ \
+      "    LGR     3,%[len]    \n\t"   /* len of from area     */ \
+      "1:  MVCLE   0,2,176     \n\t"   /* copy storage, bypass cache (0xb0) */ \
+      "    BRC     1,1b        \n\t"   /* retry if interrupted */ \
+      : [to]   "+Q"  (_to)             /* outputs   */            \
+      , [from] "+Q"  (_from)           /* outputs   */            \
+      : [len]  "r"   (_len)            /* inputs    */            \
+      : "cc",  "r0", "r1", "r2", "r3"  /* clobbered */            \
+    );
+
+  #define MVCLE_MEMINIT(_to,_val,_len)                            \
+    asm(                                                          \
+      "    LG      0,%[to]       \n\t" /* address of to area   */ \
+      "    LGR     1,%[len]      \n\t" /* len of to area       */ \
+      "    XGR     3,3           \n\t" /* from area len = 0    */ \
+      "1:  MVCLE   0,2,0(%[val]) \n\t" /* init storage         */ \
+      "    BRC     1,1b          \n\t" /* retry if interrupted */ \
+      : [to]   "+Q"  (_to)             /* outputs   */            \
+      : [len]  "r"   (_len)            /* inputs    */            \
+      , [val]  "r"   (_val)            /* inputs    */            \
+      : "cc",  "r0", "r1", "r3"        /* clobbered */            \
+    );
+  #define MVCLE_MEMZERO(_to,_len)                                 \
+    asm(                                                          \
+      "    LG      0,%[to]       \n\t" /* address of to area   */ \
+      "    LGR     1,%[len]      \n\t" /* len of to area       */ \
+      "    XGR     3,3           \n\t" /* from area len = 0    */ \
+      "1:  MVCLE   0,2,0         \n\t" /* clear storage        */ \
+      "    BRC     1,1b          \n\t" /* retry if interrupted */ \
+      : [to]   "+Q"  (_to)             /* outputs   */            \
+      : [len]  "r"   (_len)            /* inputs    */            \
+      : "cc",  "r0", "r1", "r3"        /* clobbered */            \
+    );
+
+  // Clear a stretch of memory, 0 <= _len <= 256.
+  // There is no alignment prereq.
+  // There is no test for len out of range specified above.
+  #define XC_MEMZERO_256(_to,_len)                                 \
+{ unsigned long toaddr;   unsigned long tolen;                     \
+  unsigned long target;                                            \
+    asm("\t"                                                       \
+    "    LTGR    %[tolen],%[len]     \n\t" /* decr for MVC  */     \
+    "    BRC     8,2f                \n\t" /* do nothing for l=0*/ \
+    "    AGHI    %[tolen],-1         \n\t" /* adjust for EX XC  */ \
+    "    LARL    %[target],1f        \n\t" /* addr of XC instr  */ \
+    "    LG      %[toaddr],%[to]     \n\t" /* addr of data area */ \
+    "    EX      %[tolen],0(%[target])       \n\t" /* execute MVC instr */ \
+    "    BRC     15,2f                       \n\t" /* skip template */     \
+    "1:  XC      0(1,%[toaddr]),0(%[toaddr]) \n\t"                         \
+    "2:  BCR     0,0                 \n\t" /* nop a branch target*/\
+    : [to]       "+Q"  (_to)         /* outputs   */               \
+    , [tolen]    "=a"  (tolen)                                     \
+    , [toaddr]   "=a"  (toaddr)                                    \
+    , [target]   "=a"  (target)                                    \
+    : [len]       "r"  (_len)        /* inputs    */               \
+    : "cc"                           /* clobbered */               \
+    );                                                             \
+}
+
+  // Clear a stretch of memory, 256 < _len.
+  // XC_MEMZERO_256 may be used to clear shorter areas.
+  //
+  // The code
+  // - first zeroes a few bytes to align on a HeapWord.
+  //   This step is currently inactive because all calls seem
+  //   to have their data aligned on HeapWord boundaries.
+  // - then zeroes a few HeapWords to align on a cache line.
+  // - then zeroes entire cache lines in a loop.
+  // - then zeroes the remaining (partial) cache line.
+#if 1
+  #define XC_MEMZERO_ANY(_to,_len)                                    \
+{ unsigned long toaddr;   unsigned long tolen;                        \
+  unsigned long len8;     unsigned long len256;                       \
+  unsigned long target;   unsigned long lenx;                         \
+    asm("\t"                                                          \
+    "    LTGR    %[tolen],%[len]      \n\t" /*                   */   \
+    "    BRC     8,2f                 \n\t" /* do nothing for l=0*/   \
+    "    LG      %[toaddr],%[to]      \n\t" /* addr of data area */   \
+    "    LARL    %[target],1f         \n\t" /* addr of XC instr  */   \
+    " "                                                               \
+    "    LCGR    %[len256],%[toaddr]  \n\t" /* cache line alignment */\
+    "    NILL    %[len256],0xff       \n\t"                           \
+    "    BRC     8,4f                 \n\t" /* already aligned     */ \
+    "    NILH    %[len256],0x00       \n\t" /* zero extend         */ \
+    "    LLGFR   %[len256],%[len256]  \n\t"                           \
+    "    LAY     %[lenx],-1(,%[len256]) \n\t"                         \
+    "    EX      %[lenx],0(%[target]) \n\t" /* execute MVC instr   */ \
+    "    LA      %[toaddr],0(%[len256],%[toaddr]) \n\t"               \
+    "    SGR     %[tolen],%[len256]   \n\t" /* adjust len          */ \
+    " "                                                               \
+    "4:  SRAG    %[lenx],%[tolen],8   \n\t" /* # cache lines       */ \
+    "    BRC     8,6f                 \n\t" /* no full cache lines */ \
+    "5:  XC      0(256,%[toaddr]),0(%[toaddr]) \n\t"                  \
+    "    LA      %[toaddr],256(,%[toaddr]) \n\t"                      \
+    "    BRCTG   %[lenx],5b           \n\t" /* iterate             */ \
+    " "                                                               \
+    "6:  NILL    %[tolen],0xff        \n\t" /* leftover bytes      */ \
+    "    BRC     8,2f                 \n\t" /* done if none        */ \
+    "    LAY     %[lenx],-1(,%[tolen]) \n\t"                          \
+    "    EX      %[lenx],0(%[target]) \n\t" /* execute MVC instr   */ \
+    "    BRC     15,2f                \n\t" /* skip template       */ \
+    " "                                                               \
+    "1:  XC      0(1,%[toaddr]),0(%[toaddr]) \n\t"                    \
+    "2:  BCR     0,0                  \n\t" /* nop a branch target */ \
+    : [to]       "+Q"  (_to)         /* outputs   */               \
+    , [lenx]     "=a"  (lenx)                                      \
+    , [len256]   "=a"  (len256)                                    \
+    , [tolen]    "=a"  (tolen)                                     \
+    , [toaddr]   "=a"  (toaddr)                                    \
+    , [target]   "=a"  (target)                                    \
+    : [len]       "r"  (_len)        /* inputs    */               \
+    : "cc"                           /* clobbered */               \
+    );                                                             \
+}
+#else
+  #define XC_MEMZERO_ANY(_to,_len)                                    \
+{ unsigned long toaddr;   unsigned long tolen;                        \
+  unsigned long len8;     unsigned long len256;                       \
+  unsigned long target;   unsigned long lenx;                         \
+    asm("\t"                                                          \
+    "    LTGR    %[tolen],%[len]      \n\t" /*                   */   \
+    "    BRC     8,2f                 \n\t" /* do nothing for l=0*/   \
+    "    LG      %[toaddr],%[to]      \n\t" /* addr of data area */   \
+    "    LARL    %[target],1f         \n\t" /* addr of XC instr  */   \
+    " "                                                               \
+    "    LCGR    %[len8],%[toaddr]    \n\t" /* HeapWord alignment  */ \
+    "    NILL    %[len8],0x07         \n\t"                           \
+    "    BRC     8,3f                 \n\t" /* already aligned     */ \
+    "    NILH    %[len8],0x00         \n\t" /* zero extend         */ \
+    "    LLGFR   %[len8],%[len8]      \n\t"                           \
+    "    LAY     %[lenx],-1(,%[len8]) \n\t"                           \
+    "    EX      %[lenx],0(%[target]) \n\t" /* execute MVC instr */   \
+    "    LA      %[toaddr],0(%[len8],%[toaddr]) \n\t"                 \
+    "    SGR     %[tolen],%[len8]     \n\t" /* adjust len          */ \
+    " "                                                               \
+    "3:  LCGR    %[len256],%[toaddr]  \n\t" /* cache line alignment */\
+    "    NILL    %[len256],0xff       \n\t"                           \
+    "    BRC     8,4f                 \n\t" /* already aligned     */ \
+    "    NILH    %[len256],0x00       \n\t" /* zero extend         */ \
+    "    LLGFR   %[len256],%[len256]  \n\t"                           \
+    "    LAY     %[lenx],-1(,%[len256]) \n\t"                         \
+    "    EX      %[lenx],0(%[target]) \n\t" /* execute MVC instr   */ \
+    "    LA      %[toaddr],0(%[len256],%[toaddr]) \n\t"               \
+    "    SGR     %[tolen],%[len256]   \n\t" /* adjust len          */ \
+    " "                                                               \
+    "4:  SRAG    %[lenx],%[tolen],8   \n\t" /* # cache lines       */ \
+    "    BRC     8,6f                 \n\t" /* no full cache lines */ \
+    "5:  XC      0(256,%[toaddr]),0(%[toaddr]) \n\t"                  \
+    "    LA      %[toaddr],256(,%[toaddr]) \n\t"                      \
+    "    BRCTG   %[lenx],5b           \n\t" /* iterate             */ \
+    " "                                                               \
+    "6:  NILL    %[tolen],0xff        \n\t" /* leftover bytes      */ \
+    "    BRC     8,2f                 \n\t" /* done if none        */ \
+    "    LAY     %[lenx],-1(,%[tolen]) \n\t"                          \
+    "    EX      %[lenx],0(%[target]) \n\t" /* execute MVC instr   */ \
+    "    BRC     15,2f                \n\t" /* skip template       */ \
+    " "                                                               \
+    "1:  XC      0(1,%[toaddr]),0(%[toaddr]) \n\t"                    \
+    "2:  BCR     0,0                  \n\t" /* nop a branch target */ \
+    : [to]       "+Q"  (_to)         /* outputs   */               \
+    , [lenx]     "=a"  (lenx)                                      \
+    , [len8]     "=a"  (len8)                                      \
+    , [len256]   "=a"  (len256)                                    \
+    , [tolen]    "=a"  (tolen)                                     \
+    , [toaddr]   "=a"  (toaddr)                                    \
+    , [target]   "=a"  (target)                                    \
+    : [len]       "r"  (_len)        /* inputs    */               \
+    : "cc"                           /* clobbered */               \
+    );                                                             \
+}
+#endif
+#endif // USE_INLINE_ASM
+
+//*************************************//
+//   D I S J O I N T   C O P Y I N G   //
+//*************************************//
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  // JVM2008: very frequent, some tests frequent.
+
+  // Copy HeapWord (=DW) aligned storage. Use MVCLE in inline-asm code.
+  // MVCLE guarantees DW concurrent (i.e. atomic) accesses if both the addresses of the operands
+  // are DW aligned and the length is an integer multiple of a DW. Should always be true here.
+  //
+  // No special exploit needed. H/W discovers suitable situations itself.
+  //
+  // For large chunks of memory, exploit special H/W support of z/Architecture:
+  // 1) copy short piece of memory to page-align address(es)
+  // 2) copy largest part (all contained full pages) of memory using mvcle instruction.
+  //    z/Architecture processors have special H/W support for page-aligned storage
+  //    where len is an int multiple of page size. In that case, up to 4 cache lines are
+  //    processed in parallel and L1 cache is not polluted.
+  // 3) copy the remaining piece of memory.
+  //
+#ifdef USE_INLINE_ASM
+  jbyte* to_bytes   = (jbyte*)to;
+  jbyte* from_bytes = (jbyte*)from;
+  size_t len_bytes  = count*HeapWordSize;
+
+  // Optimized copying for data less than 4k
+  switch (count) {
+    case 0: return;
+    case 1: MOVE8_ATOMIC_1(to,from)
+            return;
+    case 2: MOVE8_ATOMIC_2(to,from)
+            return;
+//  case 3: MOVE8_ATOMIC_3(to,from)
+//          return;
+//  case 4: MOVE8_ATOMIC_4(to,from)
+//          return;
+    default:
+      if (len_bytes <= 4096) {
+        MVC_MULTI(to,from,len_bytes)
+        return;
+      }
+      // else
+      MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+      return;
+  }
+#else
+  // Fallback code.
+  switch (count) {
+    case 0:
+      return;
+
+    case 1:
+      *to = *from;
+      return;
+
+    case 2:
+      *to++ = *from++;
+      *to = *from;
+      return;
+
+    case 3:
+      *to++ = *from++;
+      *to++ = *from++;
+      *to = *from;
+      return;
+
+    case 4:
+      *to++ = *from++;
+      *to++ = *from++;
+      *to++ = *from++;
+      *to = *from;
+      return;
+
+    default:
+      while (count-- > 0)
+        *(to++) = *(from++);
+      return;
+  }
+#endif
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  // JVM2008: < 4k calls.
+  assert(((((size_t)from) & 0x07L) | (((size_t)to) & 0x07L)) == 0, "No atomic copy w/o aligned data");
+  pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  // JVM2008: very rare.
+  pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
+}
+
+
+//*************************************//
+//   C O N J O I N T   C O P Y I N G   //
+//*************************************//
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  // JVM2008: between some and lower end of frequent.
+
+#ifdef USE_INLINE_ASM
+  size_t  count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+    switch (count_in) {
+      case 4: COPY8_ATOMIC_4(to,from)
+              return;
+      case 3: COPY8_ATOMIC_3(to,from)
+              return;
+      case 2: COPY8_ATOMIC_2(to,from)
+              return;
+      case 1: COPY8_ATOMIC_1(to,from)
+              return;
+      case 0: return;
+      default:
+        from += count_in;
+        to   += count_in;
+        while (count_in-- > 0)
+          *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+        return;
+    }
+  }
+  // else
+  jbyte* to_bytes   = (jbyte*)to;
+  jbyte* from_bytes = (jbyte*)from;
+  size_t len_bytes  = count_in*BytesPerLong;
+  MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+  return;
+#else
+  // Fallback code.
+  if (has_destructive_overlap((char*)from, (char*)to, count*BytesPerLong)) {
+    HeapWord t1, t2, t3;
+    switch (count) {
+      case 0:
+        return;
+
+      case 1:
+        *to = *from;
+        return;
+
+      case 2:
+        t1 = *(from+1);
+        *to = *from;
+        *(to+1) = t1;
+        return;
+
+      case 3:
+        t1 = *(from+1);
+        t2 = *(from+2);
+        *to = *from;
+        *(to+1) = t1;
+        *(to+2) = t2;
+        return;
+
+      case 4:
+        t1 = *(from+1);
+        t2 = *(from+2);
+        t3 = *(from+3);
+        *to = *from;
+        *(to+1) = t1;
+        *(to+2) = t2;
+        *(to+3) = t3;
+        return;
+
+      default:
+        from += count;
+        to   += count;
+        while (count-- > 0)
+          *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+        return;
+    }
+  }
+  // else
+  // Just delegate. HeapWords are optimally aligned anyway.
+  pd_aligned_disjoint_words(from, to, count);
+#endif
+}
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+
+  // Just delegate. HeapWords are optimally aligned anyway.
+  pd_aligned_conjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in))
+    (void)memmove(to, from, count_in);
+  else {
+    jbyte*  to_bytes   = (jbyte*)to;
+    jbyte*  from_bytes = (jbyte*)from;
+    size_t  len_bytes  = count_in;
+    MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+  }
+#else
+  if (has_destructive_overlap((char*)from, (char*)to, count))
+    (void)memmove(to, from, count);
+  else
+    (void)memcpy(to, from, count);
+#endif
+}
+
+//**************************************************//
+//   C O N J O I N T  A T O M I C   C O P Y I N G   //
+//**************************************************//
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  // Call arraycopy stubs to do the job.
+  pd_conjoint_bytes(from, to, count); // bytes are always accessed atomically.
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerShort)) {
+    // Use optimizations from shared code where no z-specific optimization exists.
+    copy_conjoint_jshorts_atomic(from, to, count);
+  } else {
+    jbyte* to_bytes   = (jbyte*)to;
+    jbyte* from_bytes = (jbyte*)from;
+    size_t len_bytes  = count_in*BytesPerShort;
+    MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+  }
+#else
+  // Use optimizations from shared code where no z-specific optimization exists.
+  copy_conjoint_jshorts_atomic(from, to, count);
+#endif
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerInt)) {
+    switch (count_in) {
+      case 4: COPY4_ATOMIC_4(to,from)
+              return;
+      case 3: COPY4_ATOMIC_3(to,from)
+              return;
+      case 2: COPY4_ATOMIC_2(to,from)
+              return;
+      case 1: COPY4_ATOMIC_1(to,from)
+              return;
+      case 0: return;
+      default:
+        // Use optimizations from shared code where no z-specific optimization exists.
+        copy_conjoint_jints_atomic(from, to, count_in);
+        return;
+    }
+  }
+  // else
+  jbyte* to_bytes   = (jbyte*)to;
+  jbyte* from_bytes = (jbyte*)from;
+  size_t len_bytes  = count_in*BytesPerInt;
+  MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+  // Use optimizations from shared code where no z-specific optimization exists.
+  copy_conjoint_jints_atomic(from, to, count);
+#endif
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+    switch (count_in) {
+      case 4: COPY8_ATOMIC_4(to,from) return;
+      case 3: COPY8_ATOMIC_3(to,from) return;
+      case 2: COPY8_ATOMIC_2(to,from) return;
+      case 1: COPY8_ATOMIC_1(to,from) return;
+      case 0: return;
+      default:
+        from += count_in;
+        to   += count_in;
+        while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
+        return;
+    }
+  }
+  // else {
+  jbyte* to_bytes   = (jbyte*)to;
+  jbyte* from_bytes = (jbyte*)from;
+  size_t len_bytes  = count_in*BytesPerLong;
+  MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+    if (count_in < 8) {
+      from += count_in;
+      to   += count_in;
+      while (count_in-- > 0)
+         *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+      return;
+    }
+    // else {
+    from += count_in-1;
+    to   += count_in-1;
+    if (count_in&0x01) {
+      *(to--) = *(from--);
+      count_in--;
+    }
+    for (; count_in>0; count_in-=2) {
+      *to     = *from;
+      *(to-1) = *(from-1);
+      to     -= 2;
+      from   -= 2;
+    }
+  }
+  else
+    pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
+#endif
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
+    switch (count_in) {
+      case 4: COPY8_ATOMIC_4(to,from) return;
+      case 3: COPY8_ATOMIC_3(to,from) return;
+      case 2: COPY8_ATOMIC_2(to,from) return;
+      case 1: COPY8_ATOMIC_1(to,from) return;
+      case 0: return;
+      default:
+        from += count_in;
+        to   += count_in;
+        while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
+        return;
+    }
+  }
+  // else
+  jbyte* to_bytes   = (jbyte*)to;
+  jbyte* from_bytes = (jbyte*)from;
+  size_t len_bytes  = count_in*BytesPerOop;
+  MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+  size_t count_in = count;
+  if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
+    from += count_in;
+    to   += count_in;
+    while (count_in-- > 0) *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+    return;
+  }
+  // else
+  pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
+  return;
+#endif
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_bytes_atomic(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+//**********************************************//
+//  M E M O R Y   I N I T I A L I S A T I O N   //
+//**********************************************//
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  // JVM2008: very rare, only in some tests.
+#ifdef USE_INLINE_ASM
+  // Initialize storage to a given value. Use memset instead of copy loop.
+  // For large chunks of memory, exploit special H/W support of z/Architecture:
+  // 1) init short piece of memory to page-align address
+  // 2) init largest part (all contained full pages) of memory using mvcle instruction.
+  //    z/Architecture processors have special H/W support for page-aligned storage
+  //    where len is an int multiple of page size. In that case, up to 4 cache lines are
+  //    processed in parallel and L1 cache is not polluted.
+  // 3) init the remaining piece of memory.
+  // Atomicity cannot really be an issue since gcc implements the loop body with XC anyway.
+  // If atomicity is a problem, we have to prevent gcc optimization. Best workaround: inline asm.
+
+  jbyte*  to_bytes  = (jbyte*)to;
+  size_t  len_bytes = count;
+
+  MVCLE_MEMINIT(to_bytes, value, len_bytes)
+
+#else
+  // Memset does the best job possible: loop over 256-byte MVCs, with
+  // the last MVC EXecuted. With the -mmvcle option, initialization
+  // is done using MVCLE -> slight advantage for large areas.
+  (void)memset(to, value, count);
+#endif
+}
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  // Occurs in dbg builds only. Usually memory poisoning with BAADBABE, DEADBEEF, etc.
+  // JVM2008: < 4k calls.
+  if (value == 0) {
+    pd_zero_to_words(tohw, count);
+    return;
+  }
+  if (value == ~(juint)(0)) {
+    pd_fill_to_bytes(tohw, count*HeapWordSize, (jubyte)(~(juint)(0)));
+    return;
+  }
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  // JVM2008: very frequent, but virtually all calls are with value == 0.
+  pd_fill_to_words(tohw, count, value);
+}
+
+//**********************************//
+//  M E M O R Y   C L E A R I N G   //
+//**********************************//
+
+// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic.
+// Distinguish between simple and large zero_to_words.
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_zero_to_bytes(tohw, count*HeapWordSize);
+}
+
+// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic.
+static void pd_zero_to_words_large(HeapWord* tohw, size_t count) {
+  // JVM2008: generally frequent, some tests show very frequent calls.
+  pd_zero_to_bytes(tohw, count*HeapWordSize);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  // JVM2008: some calls (generally), some tests frequent
+#ifdef USE_INLINE_ASM
+  // Even zero_to_bytes() requires HeapWord-atomic, or, at least, sequential
+  // zeroing of the memory. MVCLE is not fit for that job:
+  //   "As observed by other CPUs and by the channel subsystem,
+  //    that portion of the first operand which is filled
+  //    with the padding byte is not necessarily stored into in
+  //    a left-to-right direction and may appear to be stored
+  //    into more than once."
+  // Therefore, implementation was changed to use (multiple) XC instructions.
+
+  const long line_size = 256;
+  jbyte* to_bytes  = (jbyte*)to;
+  size_t len_bytes = count;
+
+  if (len_bytes <= line_size) {
+    XC_MEMZERO_256(to_bytes, len_bytes);
+  } else {
+    XC_MEMZERO_ANY(to_bytes, len_bytes);
+  }
+
+#else
+  // Memset does the best job possible: loop over 256-byte MVCs, with
+  // the last MVC EXecuted. With the -mmvcle option, initialization
+  // is done using MVCLE -> slight advantage for large areas.
+  (void)memset(to, 0, count);
+#endif
+}
+
+#endif // CPU_S390_VM_COPY_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/debug_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+
+void pd_ps(frame f) {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/depChecker_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_DEPCHECKER_S390_HPP
+#define CPU_S390_VM_DEPCHECKER_S390_HPP
+
+// Nothing to do on z/Architecture
+
+#endif // CPU_S390_VM_DEPCHECKER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/disassembler_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_DISASSEMBLER_S390_HPP
+#define CPU_S390_VM_DISASSEMBLER_S390_HPP
+
+  static int pd_instruction_alignment() {
+    return 1;
+  }
+
+  static const char* pd_cpu_opts() {
+    return "zarch";
+  }
+
+#endif // CPU_S390_VM_DISASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+// Major contributions by Aha, AS.
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif // ASSERT
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  bool safe = false;
+  address cursp = (address)sp();
+  address curfp = (address)fp();
+  if ((cursp != NULL && curfp != NULL &&
+      (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) &&
+      (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) {
+    safe = true;
+  }
+  return safe;
+}
+
+bool frame::is_interpreted_frame() const {
+  return Interpreter::contains(pc());
+}
+
+// sender_sp
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  return sender_sp();
+}
+
+frame frame::sender_for_entry_frame(RegisterMap *map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C. Skip all C frames and return top C
+  // frame of that chunk as the sender.
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+
+  assert(!entry_frame_is_first(), "next Java sp must be non zero");
+  assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack");
+
+  map->clear();
+
+  assert(map->include_argument_oops(), "should be set by clear");
+
+  if (jfa->last_Java_pc() != NULL) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_pc());
+    return fr;
+  }
+  // Last_java_pc is not set if we come here from compiled code.
+  frame fr(jfa->last_Java_sp());
+  return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap *map) const {
+  // Pass callers sender_sp as unextended_sp.
+  return frame(sender_sp(), sender_pc(), (intptr_t*)(ijava_state()->sender_sp));
+}
+
+frame frame::sender_for_compiled_frame(RegisterMap *map) const {
+  assert(map != NULL, "map must be set");
+  // Frame owned by compiler.
+
+  address pc = *compiled_sender_pc_addr(_cb);
+  frame caller(compiled_sender_sp(_cb), pc);
+
+  // Now adjust the map.
+
+  // Get the rest.
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+  }
+
+  return caller;
+}
+
+intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const {
+  return sender_sp();
+}
+
+address* frame::compiled_sender_pc_addr(CodeBlob* cb) const {
+  return sender_pc_addr();
+}
+
+frame frame::sender(RegisterMap* map) const {
+  // Default is we don't have to follow them. The sender_for_xxx will
+  // update it accordingly.
+  map->set_include_argument_oops(false);
+
+  if (is_entry_frame()) {
+    return sender_for_entry_frame(map);
+  }
+  if (is_interpreted_frame()) {
+    return sender_for_interpreter_frame(map);
+  }
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), sender_pc());
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address  " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "] ",
+                  p2i(&((address*) _sp)[-1]), p2i(((address*) _sp)[-1]), p2i(pc));
+  }
+  own_abi()->return_pc = (uint64_t)pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // Leave _pc as is.
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+  // Is there anything to do?
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  if (method->is_native()) {
+    address lresult = (address)&(ijava_state()->lresult);
+    address fresult = (address)&(ijava_state()->fresult);
+
+    switch (type) {
+      case T_OBJECT:
+      case T_ARRAY: {
+        *oop_result = (oop) (void*) ijava_state()->oop_tmp;
+        break;
+      }
+      // We use std/stfd to store the values.
+      case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break;
+      case T_INT     : value_result->i = (jint)     *(long*)lresult;          break;
+      case T_CHAR    : value_result->c = (jchar)    *(unsigned long*)lresult; break;
+      case T_SHORT   : value_result->s = (jshort)   *(long*)lresult;          break;
+      case T_BYTE    : value_result->z = (jbyte)    *(long*)lresult;          break;
+      case T_LONG    : value_result->j = (jlong)    *(long*)lresult;          break;
+      case T_FLOAT   : value_result->f = (jfloat)   *(float*)fresult;        break;
+      case T_DOUBLE  : value_result->d = (jdouble)  *(double*)fresult;        break;
+      case T_VOID    : break; // Nothing to do.
+      default        : ShouldNotReachHere();
+    }
+  } else {
+    intptr_t* tos_addr = interpreter_frame_tos_address();
+    switch (type) {
+      case T_OBJECT:
+      case T_ARRAY: {
+       oop obj = *(oop*)tos_addr;
+       assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+       *oop_result = obj;
+       break;
+      }
+      case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break;
+      case T_BYTE    : value_result->b = (jbyte) *(jint*)tos_addr; break;
+      case T_CHAR    : value_result->c = (jchar) *(jint*)tos_addr; break;
+      case T_SHORT   : value_result->s = (jshort) *(jint*)tos_addr; break;
+      case T_INT     : value_result->i = *(jint*)tos_addr; break;
+      case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+      case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
+      case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+      case T_VOID    : break; // Nothing to do.
+      default        : ShouldNotReachHere();
+    }
+  }
+
+  return type;
+}
+
+
+// Dump all frames starting a given C stack-pointer.
+// Use max_frames to limit the number of traced frames.
+void frame::back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc, unsigned long flags, int max_frames) {
+
+  static char buf[ 150 ];
+
+  bool print_outgoing_arguments = flags & 0x1;
+  bool print_istate_pointers    = flags & 0x2;
+  int num = 0;
+
+  intptr_t* current_sp = (intptr_t*) start_sp;
+  int last_num_jargs = 0;
+  int frame_type = 0;
+  int last_frame_type = 0;
+
+  while (current_sp) {
+    intptr_t* current_fp = (intptr_t*) *current_sp;
+    address   current_pc = (num == 0)
+                           ? (address) top_pc
+                           : (address) *((intptr_t*)(((address) current_sp) + _z_abi(return_pc)));
+
+    if ((intptr_t*) current_fp != 0 && (intptr_t*) current_fp <= current_sp) {
+      st->print_cr("ERROR: corrupt stack");
+      return;
+    }
+
+    st->print("#%-3d ", num);
+    const char* type_name = "    ";
+    const char* function_name = NULL;
+
+    // Detect current frame's frame_type, default to 'C frame'.
+    frame_type = 0;
+
+    CodeBlob* blob = NULL;
+
+    if (Interpreter::contains(current_pc)) {
+      frame_type = 1;
+    } else if (StubRoutines::contains(current_pc)) {
+      if (StubRoutines::returns_to_call_stub(current_pc)) {
+        frame_type = 2;
+      } else {
+        frame_type = 4;
+        type_name = "stu";
+        StubCodeDesc* desc = StubCodeDesc::desc_for (current_pc);
+        if (desc) {
+          function_name = desc->name();
+        } else {
+          function_name = "unknown stub";
+        }
+      }
+    } else if (CodeCache::contains(current_pc)) {
+      blob = CodeCache::find_blob_unsafe(current_pc);
+      if (blob) {
+        if (blob->is_nmethod()) {
+          frame_type = 3;
+        } else if (blob->is_deoptimization_stub()) {
+          frame_type = 4;
+          type_name = "deo";
+          function_name = "deoptimization blob";
+        } else if (blob->is_uncommon_trap_stub()) {
+          frame_type = 4;
+          type_name = "uct";
+          function_name = "uncommon trap blob";
+        } else if (blob->is_exception_stub()) {
+          frame_type = 4;
+          type_name = "exc";
+          function_name = "exception blob";
+        } else if (blob->is_safepoint_stub()) {
+          frame_type = 4;
+          type_name = "saf";
+          function_name = "safepoint blob";
+        } else if (blob->is_runtime_stub()) {
+          frame_type = 4;
+          type_name = "run";
+          function_name = ((RuntimeStub *)blob)->name();
+        } else if (blob->is_method_handles_adapter_blob()) {
+          frame_type = 4;
+          type_name = "mha";
+          function_name = "method handles adapter blob";
+        } else {
+          frame_type = 4;
+          type_name = "blo";
+          function_name = "unknown code blob";
+        }
+      } else {
+        frame_type = 4;
+        type_name = "blo";
+        function_name = "unknown code blob";
+      }
+    }
+
+    st->print("sp=" PTR_FORMAT " ", p2i(current_sp));
+
+    if (frame_type == 0) {
+      current_pc = (address) *((intptr_t*)(((address) current_sp) + _z_abi(gpr14)));
+    }
+
+    st->print("pc=" PTR_FORMAT " ", p2i(current_pc));
+    st->print(" ");
+
+    switch (frame_type) {
+      case 0: // C frame:
+        {
+          st->print("    ");
+          if (current_pc == 0) {
+            st->print("? ");
+          } else {
+             // name
+            int func_offset;
+            char demangled_name[256];
+            int demangled_name_len = 256;
+            if (os::dll_address_to_function_name(current_pc, demangled_name, demangled_name_len, &func_offset)) {
+              demangled_name[demangled_name_len-1] = '\0';
+              st->print(func_offset == -1 ? "%s " : "%s+0x%x", demangled_name, func_offset);
+            } else {
+              st->print("? ");
+            }
+          }
+        }
+        break;
+
+      case 1: // interpreter frame:
+        {
+          st->print(" i  ");
+
+          if (last_frame_type != 1) last_num_jargs = 8;
+
+          // name
+          Method* method = *(Method**)((address)current_fp + _z_ijava_state_neg(method));
+          if (method) {
+            if (method->is_synchronized()) st->print("synchronized ");
+            if (method->is_static()) st->print("static ");
+            if (method->is_native()) st->print("native ");
+            method->name_and_sig_as_C_string(buf, sizeof(buf));
+            st->print("%s ", buf);
+          }
+          else
+            st->print("? ");
+
+          intptr_t* tos = (intptr_t*) *(intptr_t*)((address)current_fp + _z_ijava_state_neg(esp));
+          if (print_istate_pointers) {
+            st->cr();
+            st->print("     ");
+            st->print("ts=" PTR_FORMAT " ", p2i(tos));
+          }
+
+          // Dump some Java stack slots.
+          if (print_outgoing_arguments) {
+            if (method->is_native()) {
+#ifdef ASSERT
+              intptr_t* cargs = (intptr_t*) (((address)current_sp) + _z_abi(carg_1));
+              for (int i = 0; i < last_num_jargs; i++) {
+                // Cargs is not prepushed.
+                st->cr();
+                st->print("        ");
+                st->print(PTR_FORMAT, *(cargs));
+                cargs++;
+              }
+#endif /* ASSERT */
+            }
+            else {
+              if (tos) {
+                for (int i = 0; i < last_num_jargs; i++) {
+                  // tos+0 is prepushed, ignore.
+                  tos++;
+                  if (tos >= (intptr_t *)((address)current_fp + _z_ijava_state_neg(monitors)))
+                    break;
+                  st->cr();
+                  st->print("        ");
+                  st->print(PTR_FORMAT " %+.3e %+.3le", *(tos), *(float*)(tos), *(double*)(tos));
+                }
+              }
+            }
+            last_num_jargs = method->size_of_parameters();
+          }
+        }
+        break;
+
+      case 2: // entry frame:
+        {
+          st->print("v2i ");
+
+          // name
+          st->print("call stub");
+        }
+        break;
+
+      case 3: // compiled frame:
+        {
+          st->print(" c  ");
+
+          // name
+          Method* method = ((nmethod *)blob)->method();
+          if (method) {
+            method->name_and_sig_as_C_string(buf, sizeof(buf));
+            st->print("%s ", buf);
+          }
+          else
+            st->print("? ");
+        }
+        break;
+
+      case 4: // named frames
+        {
+          st->print("%s ", type_name);
+
+          // name
+          if (function_name)
+            st->print("%s", function_name);
+        }
+        break;
+
+      default:
+        break;
+    }
+
+    st->cr();
+    st->flush();
+
+    current_sp = current_fp;
+    last_frame_type = frame_type;
+    num++;
+    // Check for maximum # of frames, and stop when reached.
+    if (max_frames > 0 && --max_frames == 0)
+      break;
+  }
+
+}
+
+// Convenience function for calls from the debugger.
+
+extern "C" void bt(intptr_t* start_sp,intptr_t* top_pc) {
+  frame::back_trace(tty,start_sp, top_pc, 0);
+}
+
+extern "C" void bt_full(intptr_t* start_sp,intptr_t* top_pc) {
+  frame::back_trace(tty,start_sp, top_pc, (unsigned long)(long)-1);
+}
+
+
+// Function for tracing a limited number of frames.
+// Use this one if you only need to see the "top of stack" frames.
+extern "C" void bt_max(intptr_t *start_sp, intptr_t *top_pc, int max_frames) {
+  frame::back_trace(tty, start_sp, top_pc, 0, max_frames);
+}
+
+#if !defined(PRODUCT)
+
+#define DESCRIBE_ADDRESS(name) \
+  values.describe(frame_no, (intptr_t*)&ijava_state()->name, #name);
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    // Describe z_ijava_state elements.
+    DESCRIBE_ADDRESS(method);
+    DESCRIBE_ADDRESS(locals);
+    DESCRIBE_ADDRESS(monitors);
+    DESCRIBE_ADDRESS(cpoolCache);
+    DESCRIBE_ADDRESS(bcp);
+    DESCRIBE_ADDRESS(mdx);
+    DESCRIBE_ADDRESS(esp);
+    DESCRIBE_ADDRESS(sender_sp);
+    DESCRIBE_ADDRESS(top_frame_sp);
+    DESCRIBE_ADDRESS(oop_tmp);
+    DESCRIBE_ADDRESS(lresult);
+    DESCRIBE_ADDRESS(fresult);
+  }
+}
+
+#endif // !PRODUCT
+
+intptr_t *frame::initial_deoptimization_info() {
+  // Used to reset the saved FP.
+  return fp();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by ML, AHa.
+
+#ifndef CPU_S390_VM_FRAME_S390_HPP
+#define CPU_S390_VM_FRAME_S390_HPP
+
+#include "runtime/synchronizer.hpp"
+
+  //  C frame layout on ZARCH_64.
+  //
+  //  In this figure the stack grows upwards, while memory grows
+  //  downwards. See "Linux for zSeries: ELF Application Binary Interface Supplement",
+  //  IBM Corp. (LINUX-1107-01)
+  //
+  //  Square brackets denote stack regions possibly larger
+  //  than a single 64 bit slot.
+  //
+  //  STACK:
+  //    0       [C_FRAME]               <-- SP after prolog (mod 8 = 0)
+  //            [C_FRAME]               <-- SP before prolog
+  //            ...
+  //            [C_FRAME]
+  //
+  //  C_FRAME:
+  //    0       [ABI_160]
+  //
+  //  ABI_160:
+  //    0       [ABI_16]
+  //    16      CARG_1: spill slot for outgoing arg 1. used by next callee.
+  //    24      CARG_2: spill slot for outgoing arg 2. used by next callee.
+  //    32      CARG_3: spill slot for outgoing arg 3. used by next callee.
+  //    40      CARG_4: spill slot for outgoing arg 4. used by next callee.
+  //    48      GPR_6:  spill slot for GPR_6. used by next callee.
+  //    ...     ...
+  //    120     GPR_15:  spill slot for GPR_15. used by next callee.
+  //    128     CFARG_1: spill slot for outgoing fp arg 1. used by next callee.
+  //    136     CFARG_2: spill slot for outgoing fp arg 2. used by next callee.
+  //    144     CFARG_3: spill slot for outgoing fp arg 3. used by next callee.
+  //    152     CFARG_4: spill slot for outgoing fp arg 4. used by next callee.
+  //    160     [REMAINING CARGS]
+  //
+  //  ABI_16:
+  //    0       callers_sp
+  //    8       return_pc
+
+ public:
+
+  // C frame layout
+
+  typedef enum {
+     // stack alignment
+     alignment_in_bytes = 8,
+     // log_2(8*8 bits) = 6.
+     log_2_of_alignment_in_bits = 6
+  } frame_constants;
+
+  struct z_abi_16 {
+    uint64_t callers_sp;
+    uint64_t return_pc;
+  };
+
+  enum {
+    z_abi_16_size = sizeof(z_abi_16)
+  };
+
+  #define _z_abi16(_component) \
+          (offset_of(frame::z_abi_16, _component))
+
+  // ABI_160:
+
+  // REMARK: This structure should reflect the "minimal" ABI frame
+  // layout, but it doesn't. There is an extra field at the end of the
+  // structure that marks the area where arguments are passed, when
+  // the argument registers "overflow". Thus, sizeof(z_abi_160)
+  // doesn't yield the expected (and desired) result. Therefore, as
+  // long as we do not provide extra infrastructure, one should use
+  // either z_abi_160_size, or _z_abi(remaining_cargs) instead of
+  // sizeof(...).
+  struct z_abi_160 {
+    uint64_t callers_sp;
+    uint64_t return_pc;
+    uint64_t carg_1;
+    uint64_t carg_2;
+    uint64_t carg_3;
+    uint64_t carg_4;
+    uint64_t gpr6;
+    uint64_t gpr7;
+    uint64_t gpr8;
+    uint64_t gpr9;
+    uint64_t gpr10;
+    uint64_t gpr11;
+    uint64_t gpr12;
+    uint64_t gpr13;
+    uint64_t gpr14;
+    uint64_t gpr15;
+    uint64_t cfarg_1;
+    uint64_t cfarg_2;
+    uint64_t cfarg_3;
+    uint64_t cfarg_4;
+    uint64_t remaining_cargs;
+  };
+
+  enum {
+    z_abi_160_size = 160
+  };
+
+  #define _z_abi(_component) \
+          (offset_of(frame::z_abi_160, _component))
+
+  struct z_abi_160_spill : z_abi_160 {
+   // Additional spill slots. Use as 'offset_of(z_abi_160_spill, spill[n])'.
+    uint64_t spill[0];
+    // Aligned to frame::alignment_in_bytes (16).
+  };
+
+
+  // non-volatile GPRs:
+
+  struct z_spill_nonvolatiles {
+    uint64_t r6;
+    uint64_t r7;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+  };
+
+  enum {
+    z_spill_nonvolatiles_size = sizeof(z_spill_nonvolatiles)
+  };
+
+  #define _z_spill_nonvolatiles_neg(_component) \
+          (-frame::z_spill_nonvolatiles_size + offset_of(frame::z_spill_nonvolatiles, _component))
+
+  // Frame layout for the Java template interpreter on z/Architecture.
+  //
+  // In these figures the stack grows upwards, while memory grows
+  // downwards. Square brackets denote regions possibly larger than
+  // single 64 bit slots.
+  //
+  // STACK (no JNI, no compiled code, no library calls, template interpreter is active):
+  //
+  //   0       [TOP_IJAVA_FRAME]
+  //           [PARENT_IJAVA_FRAME]
+  //           [PARENT_IJAVA_FRAME]
+  //           ...
+  //           [PARENT_IJAVA_FRAME]
+  //           [ENTRY_FRAME]
+  //           [C_FRAME]
+  //           ...
+  //           [C_FRAME]
+  //
+  // TOP_IJAVA_FRAME:
+  //
+  //   0       [TOP_IJAVA_FRAME_ABI]
+  //   16      [operand stack]
+  //           [monitors]      (optional)
+  //           [IJAVA_STATE]
+  //           note: Own locals are located in the caller frame.
+  //
+  // PARENT_IJAVA_FRAME:
+  //
+  //   0       [PARENT_IJAVA_FRAME_ABI]
+  //           [callee's locals w/o arguments]
+  //           [outgoing arguments]
+  //           [used part of operand stack w/o arguments]
+  //           [monitors]      (optional)
+  //           [IJAVA_STATE]
+  //
+  // ENTRY_FRAME:
+  //
+  //   0       [PARENT_IJAVA_FRAME_ABI]
+  //           [callee's locals w/o arguments]
+  //           [outgoing arguments]
+  //           [ENTRY_FRAME_LOCALS]
+  //
+  // TOP_IJAVA_FRAME_ABI:
+  //
+  //   0       [ABI_160]
+  //
+  //
+  // PARENT_IJAVA_FRAME_ABI:
+  //
+  //   0       [ABI_16]
+  //
+  // IJAVA_STATE:
+  //
+  //   0       method
+  //   8       locals
+  //           monitors               : monitor block top (i.e. lowest address)
+  //           cpoolCache
+  //           bcp
+  //           mdx
+  //           esp                    : Points to first slot above operands.
+  //           sender_sp              : See comment in z_ijava_state.
+  //           top_frame_sp           : Own SP before modification by i2c adapter.
+  //           oop_tmp
+  //           lresult
+  //           fresult
+  //
+  // EXAMPLE:
+  // ---------
+  //
+  // 3 monitors, 5 operand stack slots max. / 3 allocated
+  //
+  //    F0      callers_sp               <- Z_SP (callers_sp == Z_fp (own fp))
+  //            return_pc
+  //            [rest of ABI_160]
+  //           /slot 4: free
+  //    oper. | slot 3: free             <- Z_esp points to first free slot
+  //    stack | slot 2: ref val v2                caches IJAVA_STATE.esp
+  //          | slot 1: unused
+  //           \slot 0: long val v1
+  //           /slot 5                   <- IJAVA_STATE.monitors  = monitor block top
+  //          | slot 4
+  //  monitors| slot 3
+  //          | slot 2
+  //          | slot 1
+  //           \slot 0
+  //            [IJAVA_STATE]            <- monitor block bot (points to first byte in IJAVA_STATE)
+  //    F1      [PARENT_IJAVA_FRAME_ABI] <- Z_fp (== *Z_SP, points to slot just below IJAVA_STATE)
+  //            [F0's locals]            <- Z_locals, locals[i] := *(Z_locals - i*BytesPerWord)
+  //            [F1's operand stack]
+  //            [F1's monitors]      (optional)
+  //            [IJAVA_STATE]
+
+ public:
+
+  // PARENT_IJAVA_FRAME_ABI
+
+  struct z_parent_ijava_frame_abi : z_abi_16 {
+  };
+
+  enum {
+    z_parent_ijava_frame_abi_size = sizeof(z_parent_ijava_frame_abi)
+  };
+
+  #define _z_parent_ijava_frame_abi(_component) \
+          (offset_of(frame::z_parent_ijava_frame_abi, _component))
+
+  // TOP_IJAVA_FRAME_ABI
+
+  struct z_top_ijava_frame_abi : z_abi_160 {
+  };
+
+  enum {
+    z_top_ijava_frame_abi_size = sizeof(z_top_ijava_frame_abi)
+  };
+
+  #define _z_top_ijava_frame_abi(_component) \
+          (offset_of(frame::z_top_ijava_frame_abi, _component))
+
+  // IJAVA_STATE
+
+  struct z_ijava_state{
+    DEBUG_ONLY(uint64_t magic;) // wrong magic -> wrong state!
+    uint64_t method;
+    uint64_t mirror;
+    uint64_t locals;       // Z_locals
+    uint64_t monitors;
+    uint64_t cpoolCache;
+    uint64_t bcp;          // Z_bcp
+    uint64_t mdx;
+    uint64_t esp;          // Z_esp
+    // Caller's original SP before modification by c2i adapter (if caller is compiled)
+    // and before top -> parent frame conversion by the interpreter entry.
+    // Note: for i2i calls a correct sender_sp is required, too, because there
+    // we cannot use the caller's top_frame_sp as sp when removing the callee
+    // frame (caller could be compiled or entry frame). Therefore the sender_sp
+    // has to be the interpreted caller's sp as TOP_IJAVA_FRAME. See also
+    // AbstractInterpreter::layout_activation() used by deoptimization.
+    uint64_t sender_sp;
+    // Own SP before modification by i2c adapter and top-2-parent-resize
+    // by interpreted callee.
+    uint64_t top_frame_sp;
+    // Slots only needed for native calls. Maybe better to move elsewhere.
+    uint64_t oop_tmp;
+    uint64_t lresult;
+    uint64_t fresult;
+  };
+
+  enum  {
+    z_ijava_state_size = sizeof(z_ijava_state)
+  };
+
+#ifdef ASSERT
+  enum  {
+    z_istate_magic_number = 0x900d // ~= good magic
+  };
+#endif
+
+#define _z_ijava_state_neg(_component) \
+         (int) (-frame::z_ijava_state_size + offset_of(frame::z_ijava_state, _component))
+
+  // ENTRY_FRAME
+
+  struct z_entry_frame_locals {
+    uint64_t call_wrapper_address;
+    uint64_t result_address;
+    uint64_t result_type;
+    uint64_t arguments_tos_address;
+    // Callee saved registers are spilled to caller frame.
+    // Caller must have z_abi_160.
+  };
+
+  enum {
+    z_entry_frame_locals_size = sizeof(z_entry_frame_locals)
+  };
+
+  #define _z_entry_frame_locals_neg(_component) \
+          (int) (-frame::z_entry_frame_locals_size + offset_of(frame::z_entry_frame_locals, _component))
+
+  //  Frame layout for JIT generated methods
+  //
+  //  In these figures the stack grows upwards, while memory grows
+  //  downwards. Square brackets denote regions possibly larger than single
+  //  64 bit slots.
+  //
+  //  STACK (interpreted Java calls JIT generated Java):
+  //
+  //          [JIT_FRAME]                                <-- SP (mod 16 = 0)
+  //          [TOP_IJAVA_FRAME]
+  //         ...
+  //
+  //
+  //  JIT_FRAME (is a C frame according to z/Architecture ABI):
+  //
+  //          [out_preserve]
+  //          [out_args]
+  //          [spills]
+  //          [monitor] (optional)
+  //       ...
+  //          [monitor] (optional)
+  //          [in_preserve] added / removed by prolog / epilog
+
+ public:
+
+   struct z_top_jit_abi_32 {
+     uint64_t callers_sp;
+     uint64_t return_pc;
+     uint64_t toc;
+     uint64_t tmp;
+   };
+
+  #define _z_top_jit_abi(_component) \
+          (offset_of(frame::z_top_jit_abi_32, _component))
+
+  struct jit_monitor {
+        uint64_t monitor[1];
+  };
+
+  struct jit_in_preserve {
+    // Used to provide a z/Architecture ABI on top of a jit frame.
+    // nothing to add here!
+  };
+
+  struct jit_out_preserve : z_top_jit_abi_32 {
+    // Nothing to add here!
+  };
+
+  enum {
+    z_jit_out_preserve_size = sizeof(jit_out_preserve)
+  };
+
+  typedef enum {
+     jit_monitor_size_in_4_byte_units = sizeof(jit_monitor) / 4,
+
+     // Stack alignment requirement. Log_2 of alignment size in bits.
+     // log_2(16*8 bits) = 7.
+     jit_log_2_of_stack_alignment_in_bits = 7,
+
+     jit_out_preserve_size_in_4_byte_units = sizeof(jit_out_preserve) / 4,
+
+     jit_in_preserve_size_in_4_byte_units = sizeof(jit_in_preserve) / 4
+  } jit_frame_constants;
+
+
+  // C2I adapter frames:
+  //
+  // STACK (interpreted called from compiled, on entry to frame manager):
+  //
+  //       [TOP_C2I_FRAME]
+  //       [JIT_FRAME]
+  //       ...
+  //
+  //
+  // STACK (interpreted called from compiled, after interpreter has been pushed):
+  //
+  //       [TOP_IJAVA_FRAME]
+  //       [PARENT_C2I_FRAME]
+  //       [JIT_FRAME]
+  //       ...
+  //
+  //
+  // TOP_C2I_FRAME:
+  //
+  //       [TOP_IJAVA_FRAME_ABI]
+  //       [outgoing Java arguments]
+  //       alignment (optional)
+  //
+  //
+  // PARENT_C2I_FRAME:
+  //
+  //       [PARENT_IJAVA_FRAME_ABI]
+  //       alignment (optional)
+  //       [callee's locals w/o arguments]
+  //       [outgoing Java arguments]
+  //       alignment (optional)
+
+ private:
+
+  //  STACK:
+  //            ...
+  //            [THIS_FRAME]             <-- this._sp (stack pointer for this frame)
+  //            [CALLER_FRAME]           <-- this.fp() (_sp of caller's frame)
+  //            ...
+  //
+
+  // NOTE: Stack pointer is now held in the base class, so remove it from here.
+
+  // Frame pointer for this frame.
+  intptr_t* _fp;
+
+  // Needed by deoptimization.
+  intptr_t* _unextended_sp;
+
+ public:
+
+  // Interface for all frames:
+
+  // Accessors
+
+  inline intptr_t* fp() const { return _fp; }
+
+ private:
+
+  inline void find_codeblob_and_set_pc_and_deopt_state(address pc);
+
+ // Constructors
+
+ public:
+  frame(intptr_t* sp);
+  // To be used, if sp was not extended to match callee's calling convention.
+  frame(intptr_t* sp, address pc);
+  frame(intptr_t* sp, address pc, intptr_t* unextended_sp);
+
+  // Access frame via stack pointer.
+  inline intptr_t* sp_addr_at(int index) const  { return &sp()[index]; }
+  inline intptr_t  sp_at(     int index) const  { return *sp_addr_at(index); }
+
+  // Access ABIs.
+  inline z_abi_16*  own_abi()     const { return (z_abi_16*) sp(); }
+  inline z_abi_160* callers_abi() const { return (z_abi_160*) fp(); }
+
+ private:
+
+  intptr_t* compiled_sender_sp(CodeBlob* cb) const;
+  address*  compiled_sender_pc_addr(CodeBlob* cb) const;
+
+  address* sender_pc_addr(void) const;
+
+ public:
+
+  // Additional interface for interpreter frames:
+  static int interpreter_frame_interpreterstate_size_in_bytes();
+  static int interpreter_frame_monitor_size_in_bytes();
+
+ private:
+
+  // template interpreter state
+  inline z_ijava_state* ijava_state() const;
+
+  // Where z_ijava_state.monitors is saved.
+  inline BasicObjectLock**  interpreter_frame_monitors_addr() const;
+  // Where z_ijava_state.esp is saved.
+  inline intptr_t** interpreter_frame_esp_addr() const;
+
+ public:
+  inline intptr_t* interpreter_frame_top_frame_sp();
+  inline void interpreter_frame_set_tos_address(intptr_t* x);
+  inline void interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp);
+  inline void interpreter_frame_set_sender_sp(intptr_t* sender_sp);
+#ifdef ASSERT
+  inline void interpreter_frame_set_magic();
+#endif
+
+  // monitors:
+
+  // Next two functions read and write z_ijava_state.monitors.
+ private:
+  inline BasicObjectLock* interpreter_frame_monitors() const;
+  inline void interpreter_frame_set_monitors(BasicObjectLock* monitors);
+
+ public:
+
+  // Additional interface for entry frames:
+  inline z_entry_frame_locals* entry_frame_locals() const {
+    return (z_entry_frame_locals*) (((address) fp()) - z_entry_frame_locals_size);
+  }
+
+ public:
+
+  // Get caller pc from stack slot of gpr14.
+  address native_sender_pc() const;
+  // Get caller pc from stack slot of gpr10.
+  address callstub_sender_pc() const;
+
+  // Dump all frames starting at a given C stack pointer.
+  // max_frames: Limit number of traced frames.
+  //             <= 0 --> full trace
+  //             > 0  --> trace the #max_frames topmost frames
+  static void back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc,
+                         unsigned long flags, int max_frames = 0);
+
+  enum {
+    // This enum value specifies the offset from the pc remembered by
+    // call instructions to the location where control returns to
+    // after a normal return. Most architectures remember the return
+    // location directly, i.e. the offset is zero. This is the case
+    // for z/Architecture, too.
+    //
+    // Normal return address is the instruction following the branch.
+    pc_return_offset =  0,
+  };
+
+#endif // CPU_S390_VM_FRAME_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_FRAME_S390_INLINE_HPP
+#define CPU_S390_VM_FRAME_S390_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for z/Architecture frames:
+
+inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {
+  assert(pc != NULL, "precondition: must have PC");
+
+  _cb = CodeCache::find_blob(pc);
+  _pc = pc;   // Must be set for get_deopt_original_pc().
+
+  _fp = (intptr_t *) own_abi()->callers_sp;
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+
+  assert(((uint64_t)_sp & 0x7) == 0, "SP must be 8-byte aligned");
+}
+
+// Constructors
+
+// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state.
+inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {}
+
+inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) {
+  find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->return_pc);
+}
+
+inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) {
+  find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp.
+}
+
+inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) {
+  find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp.
+}
+
+// Generic constructor. Used by pns() in debug.cpp only
+#ifndef PRODUCT
+inline frame::frame(void* sp, void* pc, void* unextended_sp) :
+  _sp((intptr_t*)sp), _unextended_sp((intptr_t*)unextended_sp), _cb(NULL), _pc(NULL) {
+  find_codeblob_and_set_pc_and_deopt_state((address)pc); // Also sets _fp and adjusts _unextended_sp.
+}
+#endif
+
+// template interpreter state
+inline frame::z_ijava_state* frame::ijava_state() const {
+  z_ijava_state* state = (z_ijava_state*) ((uintptr_t)fp() - z_ijava_state_size);
+  assert(state->magic == (intptr_t) frame::z_istate_magic_number,
+         "wrong z_ijava_state in interpreter frame (no magic found)");
+  return state;
+}
+
+inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const {
+  return (BasicObjectLock**) &(ijava_state()->monitors);
+}
+
+// The next two funcions read and write z_ijava_state.monitors.
+inline BasicObjectLock* frame::interpreter_frame_monitors() const {
+  return *interpreter_frame_monitors_addr();
+}
+inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
+  *interpreter_frame_monitors_addr() = monitors;
+}
+
+// Accessors
+
+// Return unique id for this frame. The id must have a value where we
+// can distinguish identity and younger/older relationship. NULL
+// represents an invalid (incomparable) frame.
+inline intptr_t* frame::id(void) const {
+  // Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
+  return _fp;
+}
+
+// Return true if this frame is younger (more recent activation) than
+// the frame represented by id.
+inline bool frame::is_younger(intptr_t* id) const {
+  assert(this->id() != NULL && id != NULL, "NULL frame id");
+  // Stack grows towards smaller addresses on z/Architecture.
+  return this->id() < id;
+}
+
+// Return true if this frame is older (less recent activation) than
+// the frame represented by id.
+inline bool frame::is_older(intptr_t* id) const {
+  assert(this->id() != NULL && id != NULL, "NULL frame id");
+  // Stack grows towards smaller addresses on z/Architecture.
+  return this->id() > id;
+}
+
+inline int frame::frame_size(RegisterMap* map) const {
+  // Stack grows towards smaller addresses on z/Linux: sender is at a higher address.
+  return sender_sp() - sp();
+}
+
+// Ignore c2i adapter frames.
+inline intptr_t* frame::unextended_sp() const {
+  return _unextended_sp;
+}
+
+inline address frame::sender_pc() const {
+  return (address) callers_abi()->return_pc;
+}
+
+// Get caller pc, if caller is native from stack slot of gpr14.
+inline address frame::native_sender_pc() const {
+  return (address) callers_abi()->gpr14;
+}
+
+// Get caller pc from stack slot of gpr10.
+inline address frame::callstub_sender_pc() const {
+  return (address) callers_abi()->gpr10;
+}
+
+inline address* frame::sender_pc_addr() const {
+  return (address*) &(callers_abi()->return_pc);
+}
+
+inline intptr_t* frame::sender_sp() const {
+  return (intptr_t*) callers_abi();
+}
+
+inline intptr_t* frame::link() const {
+  return (intptr_t*) callers_abi()->callers_sp;
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**) &(ijava_state()->locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+  return (intptr_t*) &(ijava_state()->bcp);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+  return (intptr_t*) &(ijava_state()->mdx);
+}
+
+// Bottom(base) of the expression stack (highest address).
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  return (intptr_t*)interpreter_frame_monitor_end() - 1;
+}
+
+inline jint frame::interpreter_frame_expression_stack_direction() {
+  return -1;
+}
+
+inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  return &interpreter_frame_tos_address()[offset];
+}
+
+
+// monitor elements
+
+// End is lower in memory than begin, and beginning element is oldest element.
+// Also begin is one past last monitor.
+
+inline intptr_t* frame::interpreter_frame_top_frame_sp() {
+  return (intptr_t*)ijava_state()->top_frame_sp;
+}
+
+inline void frame::interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp) {
+  ijava_state()->top_frame_sp = (intptr_t) top_frame_sp;
+}
+
+inline void frame::interpreter_frame_set_sender_sp(intptr_t* sender_sp) {
+  ijava_state()->sender_sp = (intptr_t) sender_sp;
+}
+
+#ifdef ASSERT
+inline void frame::interpreter_frame_set_magic() {
+  ijava_state()->magic = (intptr_t) frame::z_istate_magic_number;
+}
+#endif
+
+// Where z_ijava_state.esp is saved.
+inline intptr_t** frame::interpreter_frame_esp_addr() const {
+  return (intptr_t**) &(ijava_state()->esp);
+}
+
+// top of expression stack (lowest address)
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  return *interpreter_frame_esp_addr() + 1;
+}
+
+inline void frame::interpreter_frame_set_tos_address(intptr_t* x) {
+  *interpreter_frame_esp_addr() = x - 1;
+}
+
+// Stack slot needed for native calls and GC.
+inline oop * frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *) ((address) _fp + _z_ijava_state_neg(oop_tmp));
+}
+
+// In keeping with Intel side: end is lower in memory than begin.
+// Beginning element is oldest element. Also begin is one past last monitor.
+inline BasicObjectLock * frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*)ijava_state();
+}
+
+inline BasicObjectLock * frame::interpreter_frame_monitor_end() const {
+  return interpreter_frame_monitors();
+}
+
+inline void frame::interpreter_frame_set_monitor_end(BasicObjectLock* monitors) {
+  interpreter_frame_set_monitors((BasicObjectLock *)monitors);
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  // Number of stack slots for a monitor
+  return round_to(BasicObjectLock::size() /* number of stack slots */,
+                  WordsPerLong /* Number of stack slots for a Java long. */);
+}
+
+inline int frame::interpreter_frame_monitor_size_in_bytes() {
+  // Number of bytes for a monitor.
+  return frame::interpreter_frame_monitor_size() * wordSize;
+}
+
+inline int frame::interpreter_frame_interpreterstate_size_in_bytes() {
+  return z_ijava_state_size;
+}
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  return (Method**)&(ijava_state()->method);
+}
+
+inline oop* frame::interpreter_frame_mirror_addr() const {
+  return (oop*)&(ijava_state()->mirror);
+}
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  return (ConstantPoolCache**)&(ijava_state()->cpoolCache);
+}
+
+// entry frames
+
+inline intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // Since an entry frame always calls the interpreter first,
+  // the parameters are on the stack and relative to known register in the
+  // entry frame.
+  intptr_t* tos = (intptr_t*) entry_frame_locals()->arguments_tos_address;
+  return &tos[offset + 1]; // prepushed tos
+}
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+  return (JavaCallWrapper**) &entry_frame_locals()->call_wrapper_address;
+}
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+  return *((oop*) map->location(Z_R2->as_VMReg()));  // R2 is return register.
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  *((oop*) map->location(Z_R2->as_VMReg())) = obj;  // R2 is return register.
+}
+
+inline intptr_t* frame::real_fp() const {
+  return fp();
+}
+
+#endif // CPU_S390_VM_FRAME_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
+#define CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
+
+#ifdef CC_INTERP
+#error "CC_INTERP is not supported on z/Architecture."
+#endif
+
+// Convenience macro that produces a string literal with the filename
+// and linenumber of the location where the macro was used.
+#ifndef FILE_AND_LINE
+#define FILE_AND_LINE __FILE__ ":" XSTR(__LINE__)
+#endif
+
+#define ShortenBranches true
+
+const int StackAlignmentInBytes = 16;
+
+#define SUPPORTS_NATIVE_CX8
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+// This is the case on z/Architecture.
+const bool CCallingConventionRequiresIntsAsLongs = true;
+
+// Contended Locking reorder and cache line bucket.
+// This setting should be kept compatible with vm_version_s390.cpp.
+// The expected size in bytes of a cache line, used to pad data structures.
+#define DEFAULT_CACHE_LINE_SIZE 256
+
+#endif // CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/globals_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_GLOBALS_S390_HPP
+#define CPU_S390_VM_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+// Sorted according to sparc.
+
+// z/Architecture remembers branch targets, so don't share vtables.
+define_pd_global(bool,  ShareVtableStubs,            false);
+define_pd_global(bool,  NeedsDeoptSuspend,           false); // Only register window machines need this.
+
+define_pd_global(bool,  ImplicitNullChecks,          true);  // Generate code for implicit null checks.
+define_pd_global(bool,  TrapBasedNullChecks,         true);
+define_pd_global(bool,  UncommonNullCast,            true);  // Uncommon-trap NULLs passed to check cast.
+
+define_pd_global(uintx, CodeCacheSegmentSize,        256);
+// This shall be at least 32 for proper branch target alignment.
+// Ideally, this is 256 (cache line size). This keeps code end data
+// on separate lines. But we reduced it to 64 since 256 increased
+// code size significantly by padding nops between IVC and second UEP.
+define_pd_global(intx,  CodeEntryAlignment,          64);
+define_pd_global(intx,  OptoLoopAlignment,           2);
+define_pd_global(intx,  InlineFrequencyCount,        100);
+define_pd_global(intx,  InlineSmallCode,             2000);
+
+#define DEFAULT_STACK_YELLOW_PAGES   (2)
+#define DEFAULT_STACK_RED_PAGES      (1)
+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
+// stack. To pass stack overflow tests we need 20 shadow pages.
+#define DEFAULT_STACK_SHADOW_PAGES   (20 DEBUG_ONLY(+2))
+#define DEFAULT_STACK_RESERVED_PAGES (0)
+
+#define MIN_STACK_YELLOW_PAGES     DEFAULT_STACK_YELLOW_PAGES
+#define MIN_STACK_RED_PAGES        DEFAULT_STACK_RED_PAGES
+#define MIN_STACK_SHADOW_PAGES     DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_RESERVED_PAGES   (0)
+
+define_pd_global(intx,  StackYellowPages,            DEFAULT_STACK_YELLOW_PAGES);
+define_pd_global(intx,  StackRedPages,               DEFAULT_STACK_RED_PAGES);
+define_pd_global(intx,  StackShadowPages,            DEFAULT_STACK_SHADOW_PAGES);
+define_pd_global(intx,  StackReservedPages,          DEFAULT_STACK_RESERVED_PAGES);
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, UseMembar,            false);
+
+define_pd_global(bool, PreserveFramePointer, false);
+
+// GC Ergo Flags
+define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, CompactStrings, true);
+
+// 8146801 (Short Array Allocation): No performance work done here yet.
+define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint, writeable) \
+                                                                              \
+  /* Reoptimize code-sequences of calls at runtime, e.g. replace an */        \
+  /* indirect call by a direct call.                                */        \
+  product(bool, ReoptimizeCallSequences, true,                                \
+          "Reoptimize code-sequences of calls at runtime.")                   \
+                                                                              \
+  product(bool, UseCountLeadingZerosInstruction, true,                        \
+          "Use count leading zeros instruction.")                             \
+                                                                              \
+  product(bool, UseByteReverseInstruction, true,                              \
+          "Use byte reverse instruction.")                                    \
+                                                                              \
+  product(bool, ExpandLoadingBaseDecode, true, "Expand the assembler "        \
+          "instruction required to load the base from DecodeN nodes during "  \
+          "matching.")                                                        \
+  product(bool, ExpandLoadingBaseDecode_NN, true, "Expand the assembler "     \
+          "instruction required to load the base from DecodeN_NN nodes "      \
+          "during matching.")                                                 \
+  product(bool, ExpandLoadingBaseEncode, true, "Expand the assembler "        \
+          "instruction required to load the base from EncodeP nodes during "  \
+          "matching.")                                                        \
+  product(bool, ExpandLoadingBaseEncode_NN, true, "Expand the assembler "     \
+          "instruction required to load the base from EncodeP_NN nodes "      \
+          "during matching.")                                                 \
+                                                                              \
+  /* Seems to pay off with 2 pages already. */                                \
+  product(size_t, MVCLEThreshold, +2*(4*K),                                   \
+          "Threshold above which page-aligned MVCLE copy/init is used.")      \
+                                                                              \
+  product(bool, PreferLAoverADD, false,                                       \
+          "Use LA/LAY instructions over ADD instructions (z/Architecture).")  \
+                                                                              \
+  develop(bool, ZapEmptyStackFields, false, "Write 0x0101... to empty stack"  \
+          " fields. Use this to ease stack debugging.")                       \
+                                                                              \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler"       \
+          "handles.")
+
+#endif // CPU_S390_VM_GLOBALS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icBuffer_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+
+#define __ masm.
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return MacroAssembler::load_const_size() + Assembler::z_brul_size();
+}
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_oop, address entry_point) {
+  ResourceMark rm;
+  CodeBuffer code(code_begin, ic_stub_code_size());
+  MacroAssembler masm(&code);
+  // Note: even though the code contains an embedded oop, we do not need reloc info
+  // because
+  // (1) the oop is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear.
+
+  // Load the oop,
+  __ load_const(Z_method, (address) cached_oop); // inline cache reg = Z_method
+  // and do a tail-call (pc-relative).
+  __ z_brul((address) entry_point);
+  __ flush();
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // Creation also verifies the object.
+  return MacroAssembler::get_target_addr_pcrel(move->next_instruction_address());
+}
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // Creation also verifies the object.
+  return (void*)move->data();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icache_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/icache.hpp"
+
+// interface (see ICache::flush_icache_stub_t):
+//   address   addr   (Z_R2, ignored)
+//   int       lines  (Z_R3, ignored)
+//   int       magic  (Z_R4)
+//
+//   returns: int (Z_R2)
+//
+//   Note: z/Architecture doesn't need explicit flushing, so this is implemented as a nop.
+
+// Call c function (which just does nothing).
+int z_flush_icache(address start, int lines, int magic) { return magic; }
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+  *flush_icache_stub = (ICache::flush_icache_stub_t)z_flush_icache;
+
+  // First call to flush itself.
+  ICache::invalidate_range((address)(*flush_icache_stub), 0);
+};
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icache_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ICACHE_S390_HPP
+#define CPU_S390_VM_ICACHE_S390_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  enum {
+    stub_size      = 0,   // Size of the icache flush stub in bytes.
+    line_size      = 2,   // There is no explicit flushing on z/Architecture.
+                          // This value is ignored by the flush stub (a nop !).
+    log2_line_size = 1
+  };
+
+  // Use default implementation.
+};
+
+#endif // CPU_S390_VM_ICACHE_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2127 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by AHa, AS, JL, ML.
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interp_masm_s390.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of InterpreterMacroAssembler.
+// This file specializes the assember with interpreter-specific macros.
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str)
+#define BIND(label)        bind(label);
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) {
+  assert(entry != NULL, "Entry must have been generated by now");
+  assert(Rscratch != Z_R0, "Can't use R0 for addressing");
+  branch_optimized(Assembler::bcondAlways, entry);
+}
+
+void InterpreterMacroAssembler::empty_expression_stack(void) {
+  get_monitors(Z_R1_scratch);
+  add2reg(Z_esp, -Interpreter::stackElementSize, Z_R1_scratch);
+}
+
+// Dispatch code executed in the prolog of a bytecode which does not do it's
+// own dispatch.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) {
+  // On z/Architecture we are short on registers, therefore we do not preload the
+  // dispatch address of the next bytecode.
+}
+
+// Dispatch code executed in the epilog of a bytecode which does not do it's
+// own dispatch.
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+  dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
+  z_llgc(Z_bytecode, bcp_incr, Z_R0, Z_bcp);  // Load next bytecode.
+  add2reg(Z_bcp, bcp_incr);                   // Advance bcp. Add2reg produces optimal code.
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+// Common code to dispatch and dispatch_only.
+// Dispatch value in Lbyte_code and increment Lbcp.
+
+void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
+  verify_FPU(1, state);
+
+#ifdef ASSERT
+  address reentry = NULL;
+  { Label OK;
+    // Check if the frame pointer in Z_fp is correct.
+    z_cg(Z_fp, 0, Z_SP);
+    z_bre(OK);
+    reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp: " FILE_AND_LINE);
+    bind(OK);
+  }
+  { Label OK;
+    // check if the locals pointer in Z_locals is correct
+    z_cg(Z_locals, _z_ijava_state_neg(locals), Z_fp);
+    z_bre(OK);
+    reentry = stop_chain_static(reentry, "invalid locals pointer Z_locals: " FILE_AND_LINE);
+    bind(OK);
+  }
+#endif
+
+  // TODO: Maybe implement +VerifyActivationFrameSize here.
+  // verify_thread(); // Too slow. We will just verify on method entry & exit.
+  verify_oop(Z_tos, state);
+#ifdef FAST_DISPATCH
+  if (table == Interpreter::dispatch_table(state)) {
+    // Use IdispatchTables.
+    add(Lbyte_code, Interpreter::distance_from_dispatch_table(state), Lbyte_code);
+                                                        // Add offset to correct dispatch table.
+    sll(Lbyte_code, LogBytesPerWord, Lbyte_code);       // Multiply by wordSize.
+    ld_ptr(IdispatchTables, Lbyte_code, G3_scratch);    // Get entry addr.
+  } else
+#endif
+  {
+    // Dispatch table to use.
+    load_absolute_address(Z_tmp_1, (address) table);  // Z_tmp_1 = table;
+
+    // 0 <= Z_bytecode < 256 => Use a 32 bit shift, because it is shorter than sllg.
+    // Z_bytecode must have been loaded zero-extended for this approach to be correct.
+    z_sll(Z_bytecode, LogBytesPerWord, Z_R0);   // Multiply by wordSize.
+    z_lg(Z_tmp_1, 0, Z_bytecode, Z_tmp_1);      // Get entry addr.
+  }
+  z_br(Z_tmp_1);
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address *table) {
+  // Load current bytecode.
+  z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t)0));
+  dispatch_base(state, table);
+}
+
+// The following call_VM*_base() methods overload and mask the respective
+// declarations/definitions in class MacroAssembler. They are meant as a "detour"
+// to perform additional, template interpreter specific tasks before actually
+// calling their MacroAssembler counterparts.
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point) {
+  bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated.
+  // interpreter specific
+  // Note: No need to save/restore bcp (Z_R13) pointer since these are callee
+  // saved registers and no blocking/ GC can happen in leaf calls.
+
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
+  // interpreter specific
+  // Note: No need to save/restore bcp (Z_R13) pointer since these are callee
+  // saved registers and no blocking/ GC can happen in leaf calls.
+
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp,
+                                             address entry_point, bool check_exceptions) {
+  bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated.
+  // interpreter specific
+
+  save_bcp();
+  save_esp();
+  // super call
+  MacroAssembler::call_VM_base(oop_result, last_java_sp,
+                               entry_point, allow_relocation, check_exceptions);
+  restore_bcp();
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp,
+                                             address entry_point, bool allow_relocation,
+                                             bool check_exceptions) {
+  // interpreter specific
+
+  save_bcp();
+  save_esp();
+  // super call
+  MacroAssembler::call_VM_base(oop_result, last_java_sp,
+                               entry_point, allow_relocation, check_exceptions);
+  restore_bcp();
+}
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
+  if (JvmtiExport::can_pop_frame()) {
+    BLOCK_COMMENT("check_and_handle_popframe {");
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed. If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // TODO: Check if all four state combinations could be visible.
+    // If (processing and !pending) is an invisible/impossible state,
+    // there is optimization potential by testing both bits at once.
+    // Then, All_Zeroes and All_Ones means skip, Mixed means doit.
+    testbit(Address(Z_thread, JavaThread::popframe_condition_offset()),
+            exact_log2(JavaThread::popframe_pending_bit));
+    z_bfalse(L);
+    testbit(Address(Z_thread, JavaThread::popframe_condition_offset()),
+            exact_log2(JavaThread::popframe_processing_bit));
+    z_btrue(L);
+
+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
+    // address of the same-named entrypoint in the generated interpreter code.
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    // The above call should (as its only effect) return the contents of the field
+    // _remove_activation_preserving_args_entry in Z_RET.
+    // We just jump there to have the work done.
+    z_br(Z_RET);
+    // There is no way for control to fall thru here.
+
+    bind(L);
+    BLOCK_COMMENT("} check_and_handle_popframe");
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  Register RjvmtiState = Z_R1_scratch;
+  int      tos_off     = in_bytes(JvmtiThreadState::earlyret_tos_offset());
+  int      oop_off     = in_bytes(JvmtiThreadState::earlyret_oop_offset());
+  int      val_off     = in_bytes(JvmtiThreadState::earlyret_value_offset());
+  int      state_off   = in_bytes(JavaThread::jvmti_thread_state_offset());
+
+  z_lg(RjvmtiState, state_off, Z_thread);
+
+  switch (state) {
+    case atos: z_lg(Z_tos, oop_off, RjvmtiState);
+      store_const(Address(RjvmtiState, oop_off), 0L, 8, 8, Z_R0_scratch);
+                                                    break;
+    case ltos: z_lg(Z_tos, val_off, RjvmtiState);   break;
+    case btos: // fall through
+    case ztos: // fall through
+    case ctos: // fall through
+    case stos: // fall through
+    case itos: z_llgf(Z_tos, val_off, RjvmtiState); break;
+    case ftos: z_le(Z_ftos, val_off, RjvmtiState);  break;
+    case dtos: z_ld(Z_ftos, val_off, RjvmtiState);  break;
+    case vtos:   /* nothing to do */                break;
+    default  : ShouldNotReachHere();
+  }
+
+  // Clean up tos value in the jvmti thread state.
+  store_const(Address(RjvmtiState, val_off),   0L, 8, 8, Z_R0_scratch);
+  // Set tos state field to illegal value.
+  store_const(Address(RjvmtiState, tos_off), ilgl, 4, 1, Z_R0_scratch);
+}
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
+  if (JvmtiExport::can_force_early_return()) {
+    BLOCK_COMMENT("check_and_handle_earlyret {");
+    Label L;
+    // arg regs are save, because we are just behind the call in call_VM_base
+    Register jvmti_thread_state = Z_ARG2;
+    Register tmp                = Z_ARG3;
+    load_and_test_long(jvmti_thread_state, Address(Z_thread, JavaThread::jvmti_thread_state_offset()));
+    z_bre(L); // if (thread->jvmti_thread_state() == NULL) exit;
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+
+    assert((JvmtiThreadState::earlyret_pending != 0) && (JvmtiThreadState::earlyret_inactive == 0),
+          "must fix this check, when changing the values of the earlyret enum");
+    assert(JvmtiThreadState::earlyret_pending == 1, "must fix this check, when changing the values of the earlyret enum");
+
+    load_and_test_int(tmp, Address(jvmti_thread_state, JvmtiThreadState::earlyret_state_offset()));
+    z_brz(L); // if (thread->jvmti_thread_state()->_earlyret_state != JvmtiThreadState::earlyret_pending) exit;
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    assert(sizeof(TosState) == 4, "unexpected size");
+    z_l(Z_ARG1, Address(jvmti_thread_state, JvmtiThreadState::earlyret_tos_offset()));
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), Z_ARG1);
+    // The above call should (as its only effect) return the contents of the field
+    // _remove_activation_preserving_args_entry in Z_RET.
+    // We just jump there to have the work done.
+    z_br(Z_RET);
+    // There is no way for control to fall thru here.
+
+    bind(L);
+    BLOCK_COMMENT("} check_and_handle_earlyret");
+  }
+}
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+  lgr_if_needed(Z_ARG1, arg_1);
+  assert(arg_2 != Z_ARG1, "smashed argument");
+  lgr_if_needed(Z_ARG2, arg_2);
+  MacroAssembler::call_VM_leaf_base(entry_point, true);
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size) {
+  Address param(Z_bcp, bcp_offset);
+
+  BLOCK_COMMENT("get_cache_index_at_bcp {");
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    load_sized_value(index, param, 2, false /*signed*/);
+  } else if (index_size == sizeof(u4)) {
+
+    load_sized_value(index, param, 4, false);
+
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+    not_(index);  // Convert to plain index.
+  } else if (index_size == sizeof(u1)) {
+    z_llgc(index, param);
+  } else {
+    ShouldNotReachHere();
+  }
+  BLOCK_COMMENT("}");
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register cpe_offset,
+                                                           int bcp_offset, size_t index_size) {
+  BLOCK_COMMENT("get_cache_and_index_at_bcp {");
+  assert_different_registers(cache, cpe_offset);
+  get_cache_index_at_bcp(cpe_offset, bcp_offset, index_size);
+  z_lg(cache, Address(Z_fp, _z_ijava_state_neg(cpoolCache)));
+  // Convert from field index to ConstantPoolCache offset in bytes.
+  z_sllg(cpe_offset, cpe_offset, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord));
+  BLOCK_COMMENT("}");
+}
+
+// Kills Z_R0_scratch.
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register cpe_offset,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  BLOCK_COMMENT("get_cache_and_index_and_bytecode_at_bcp {");
+  get_cache_and_index_at_bcp(cache, cpe_offset, bcp_offset, index_size);
+
+  // We want to load (from CP cache) the bytecode that corresponds to the passed-in byte_no.
+  // It is located at (cache + cpe_offset + base_offset + indices_offset + (8-1) (last byte in DW) - (byte_no+1).
+  // Instead of loading, shifting and masking a DW, we just load that one byte of interest with z_llgc (unsigned).
+  const int base_ix_off = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset());
+  const int off_in_DW   = (8-1) - (1+byte_no);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == 0xff, "");
+  load_sized_value(bytecode, Address(cache, cpe_offset, base_ix_off+off_in_DW), 1, false /*signed*/);
+
+  BLOCK_COMMENT("}");
+}
+
+// Load object from cpool->resolved_references(index).
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+  assert_different_registers(result, index);
+  get_constant_pool(result);
+
+  // Convert
+  //  - from field index to resolved_references() index and
+  //  - from word index to byte offset.
+  // Since this is a java object, it is potentially compressed.
+  Register tmp = index;  // reuse
+  z_sllg(index, index, LogBytesPerHeapOop); // Offset into resolved references array.
+  // Load pointer for resolved_references[] objArray.
+  z_lg(result, ConstantPool::resolved_references_offset_in_bytes(), result);
+  // JNIHandles::resolve(result)
+  z_lg(result, 0, result); // Load resolved references array itself.
+#ifdef ASSERT
+  NearLabel index_ok;
+  z_lgf(Z_R0, Address(result, arrayOopDesc::length_offset_in_bytes()));
+  z_sllg(Z_R0, Z_R0, LogBytesPerHeapOop);
+  compare64_and_branch(tmp, Z_R0, Assembler::bcondLow, index_ok);
+  stop("resolved reference index out of bounds", 0x09256);
+  bind(index_ok);
+#endif
+  z_agr(result, index);    // Address of indexed array element.
+  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  BLOCK_COMMENT("get_cache_entry_pointer_at_bcp {");
+    get_cache_and_index_at_bcp(cache, tmp, bcp_offset, index_size);
+    add2reg_with_index(cache, in_bytes(ConstantPoolCache::base_offset()), tmp, cache);
+  BLOCK_COMMENT("}");
+}
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is
+// a subtype of super_klass. Blows registers Rsuper_klass, Rsub_klass, tmp1, tmp2.
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+                                                  Register Rsuper_klass,
+                                                  Register Rtmp1,
+                                                  Register Rtmp2,
+                                                  Label &ok_is_subtype) {
+  // Profile the not-null value's klass.
+  profile_typecheck(Rtmp1, Rsub_klass, Rtmp2);
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype);
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(Rtmp1, Rtmp2);
+}
+
+// Pop topmost element from stack. It just disappears.
+// Useful if consumed previously by access via stackTop().
+void InterpreterMacroAssembler::popx(int len) {
+  add2reg(Z_esp, len*Interpreter::stackElementSize);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+// Get Address object of stack top. No checks. No pop.
+// Purpose: - Provide address of stack operand to exploit reg-mem operations.
+//          - Avoid RISC-like mem2reg - reg-reg-op sequence.
+Address InterpreterMacroAssembler::stackTop() {
+  return Address(Z_esp, Interpreter::expr_offset_in_bytes(0));
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  z_l(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+  add2reg(Z_esp, Interpreter::stackElementSize);
+  assert_different_registers(r, Z_R1_scratch);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+  add2reg(Z_esp, Interpreter::stackElementSize);
+  assert_different_registers(r, Z_R1_scratch);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+  add2reg(Z_esp, 2*Interpreter::stackElementSize);
+  assert_different_registers(r, Z_R1_scratch);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister f) {
+  mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), false);
+  add2reg(Z_esp, Interpreter::stackElementSize);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister f) {
+  mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), true);
+  add2reg(Z_esp, 2*Interpreter::stackElementSize);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  assert_different_registers(r, Z_R1_scratch);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+  z_st(r, Address(Z_esp));
+  add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  z_stg(r, Address(Z_esp));
+  add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  assert_different_registers(r, Z_R1_scratch);
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+  int offset = -Interpreter::stackElementSize;
+  z_stg(r, Address(Z_esp, offset));
+  clear_mem(Address(Z_esp), Interpreter::stackElementSize);
+  add2reg(Z_esp, 2 * offset);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister f) {
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+  freg2mem_opt(f, Address(Z_esp), false);
+  add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister d) {
+  debug_only(verify_esp(Z_esp, Z_R1_scratch));
+  int offset = -Interpreter::stackElementSize;
+  freg2mem_opt(d, Address(Z_esp, offset));
+  add2reg(Z_esp, 2 * offset);
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+  verify_oop(Z_tos, state);
+  switch (state) {
+    case atos: push_ptr();           break;
+    case btos: push_i();             break;
+    case ztos:
+    case ctos:
+    case stos: push_i();             break;
+    case itos: push_i();             break;
+    case ltos: push_l();             break;
+    case ftos: push_f();             break;
+    case dtos: push_d();             break;
+    case vtos: /* nothing to do */   break;
+    default  : ShouldNotReachHere();
+  }
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+    case atos: pop_ptr(Z_tos);       break;
+    case btos: pop_i(Z_tos);         break;
+    case ztos:
+    case ctos:
+    case stos: pop_i(Z_tos);         break;
+    case itos: pop_i(Z_tos);         break;
+    case ltos: pop_l(Z_tos);         break;
+    case ftos: pop_f(Z_ftos);        break;
+    case dtos: pop_d(Z_ftos);        break;
+    case vtos: /* nothing to do */   break;
+    default  : ShouldNotReachHere();
+  }
+  verify_oop(Z_tos, state);
+}
+
+// Helpers for swap and dup.
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  z_lg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  z_stg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted(Register method) {
+  // Satisfy interpreter calling convention (see generate_normal_entry()).
+  z_lgr(Z_R10, Z_SP); // Set sender sp (aka initial caller sp, aka unextended sp).
+  // Record top_frame_sp, because the callee might modify it, if it's compiled.
+  z_stg(Z_SP, _z_ijava_state_neg(top_frame_sp), Z_fp);
+  save_bcp();
+  save_esp();
+  z_lgr(Z_method, method); // Set Z_method (kills Z_fp!).
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry.
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  assert_different_registers(method, Z_R10 /*used for initial_caller_sp*/, temp);
+  prepare_to_jump_from_interpreted(method);
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled. Check here for
+    // interp_only_mode if these events CAN be enabled.
+    z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset()));
+    MacroAssembler::load_and_test_int(Z_R0_scratch, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+    z_bcr(bcondEqual, Z_R1_scratch); // Run compiled code if zero.
+    // Run interpreted.
+    z_lg(Z_R1_scratch, Address(method, Method::interpreter_entry_offset()));
+    z_br(Z_R1_scratch);
+  } else {
+    // Run compiled code.
+    z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset()));
+    z_br(Z_R1_scratch);
+  }
+}
+
+#ifdef ASSERT
+void InterpreterMacroAssembler::verify_esp(Register Resp, Register Rtemp) {
+  // About to read or write Resp[0].
+  // Make sure it is not in the monitors or the TOP_IJAVA_FRAME_ABI.
+  address reentry = NULL;
+
+  {
+    // Check if the frame pointer in Z_fp is correct.
+    NearLabel OK;
+    z_cg(Z_fp, 0, Z_SP);
+    z_bre(OK);
+    reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp");
+    bind(OK);
+  }
+  {
+    // Resp must not point into or below the operand stack,
+    // i.e. IJAVA_STATE.monitors > Resp.
+    NearLabel OK;
+    Register Rmonitors = Rtemp;
+    z_lg(Rmonitors, _z_ijava_state_neg(monitors), Z_fp);
+    compareU64_and_branch(Rmonitors, Resp, bcondHigh, OK);
+    reentry = stop_chain_static(reentry, "too many pops: Z_esp points into monitor area");
+    bind(OK);
+  }
+  {
+    // Resp may point to the last word of TOP_IJAVA_FRAME_ABI, but not below
+    // i.e. !(Z_SP + frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize > Resp).
+    NearLabel OK;
+    Register Rabi_bottom = Rtemp;
+    add2reg(Rabi_bottom, frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize, Z_SP);
+    compareU64_and_branch(Rabi_bottom, Resp, bcondNotHigh, OK);
+    reentry = stop_chain_static(reentry, "too many pushes: Z_esp points into TOP_IJAVA_FRAME_ABI");
+    bind(OK);
+  }
+}
+
+void InterpreterMacroAssembler::asm_assert_ijava_state_magic(Register tmp) {
+  Label magic_ok;
+  load_const_optimized(tmp, frame::z_istate_magic_number);
+  z_cg(tmp, Address(Z_fp, _z_ijava_state_neg(magic)));
+  z_bre(magic_ok);
+  stop_static("error: wrong magic number in ijava_state access");
+  bind(magic_ok);
+}
+#endif // ASSERT
+
+void InterpreterMacroAssembler::save_bcp() {
+  z_stg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp)));
+  asm_assert_ijava_state_magic(Z_bcp);
+  NOT_PRODUCT(z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp))));
+}
+
+void InterpreterMacroAssembler::restore_bcp() {
+  asm_assert_ijava_state_magic(Z_bcp);
+  z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp)));
+}
+
+void InterpreterMacroAssembler::save_esp() {
+  z_stg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp)));
+}
+
+void InterpreterMacroAssembler::restore_esp() {
+  asm_assert_ijava_state_magic(Z_esp);
+  z_lg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp)));
+}
+
+void InterpreterMacroAssembler::get_monitors(Register reg) {
+  asm_assert_ijava_state_magic(reg);
+  mem2reg_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors)));
+}
+
+void InterpreterMacroAssembler::save_monitors(Register reg) {
+  reg2mem_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors)));
+}
+
+void InterpreterMacroAssembler::get_mdp(Register mdp) {
+  z_lg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+}
+
+void InterpreterMacroAssembler::save_mdp(Register mdp) {
+  z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+}
+
+// Values that are only read (besides initialization).
+void InterpreterMacroAssembler::restore_locals() {
+  asm_assert_ijava_state_magic(Z_locals);
+  z_lg(Z_locals, Address(Z_fp, _z_ijava_state_neg(locals)));
+}
+
+void InterpreterMacroAssembler::get_method(Register reg) {
+  asm_assert_ijava_state_magic(reg);
+  z_lg(reg, Address(Z_fp, _z_ijava_state_neg(method)));
+}
+
+void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register Rdst, int bcp_offset,
+                                                          signedOrNot is_signed) {
+  // Rdst is an 8-byte return value!!!
+
+  // Unaligned loads incur only a small penalty on z/Architecture. The penalty
+  // is a few (2..3) ticks, even when the load crosses a cache line
+  // boundary. In case of a cache miss, the stall could, of course, be
+  // much longer.
+
+  switch (is_signed) {
+    case Signed:
+      z_lgh(Rdst, bcp_offset, Z_R0, Z_bcp);
+     break;
+   case Unsigned:
+     z_llgh(Rdst, bcp_offset, Z_R0, Z_bcp);
+     break;
+   default:
+     ShouldNotReachHere();
+  }
+}
+
+
+void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register Rdst, int bcp_offset,
+                                                          setCCOrNot set_cc) {
+  // Rdst is an 8-byte return value!!!
+
+  // Unaligned loads incur only a small penalty on z/Architecture. The penalty
+  // is a few (2..3) ticks, even when the load crosses a cache line
+  // boundary. In case of a cache miss, the stall could, of course, be
+  // much longer.
+
+  // Both variants implement a sign-extending int2long load.
+  if (set_cc == set_CC) {
+    load_and_test_int2long(Rdst, Address(Z_bcp, (intptr_t)bcp_offset));
+  } else {
+    mem2reg_signed_opt(    Rdst, Address(Z_bcp, (intptr_t)bcp_offset));
+  }
+}
+
+void InterpreterMacroAssembler::get_constant_pool(Register Rdst) {
+  get_method(Rdst);
+  mem2reg_opt(Rdst, Address(Rdst, Method::const_offset()));
+  mem2reg_opt(Rdst, Address(Rdst, ConstMethod::constants_offset()));
+}
+
+void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) {
+  get_constant_pool(Rcpool);
+  mem2reg_opt(Rtags, Address(Rcpool, ConstantPool::tags_offset_in_bytes()));
+}
+
+// Unlock if synchronized method.
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+//
+// If there are locked Java monitors
+//   If throw_monitor_exception
+//     throws IllegalMonitorStateException
+//   Else if install_monitor_exception
+//     installs IllegalMonitorStateException
+//   Else
+//     no error processing
+void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state,
+                                                              bool throw_monitor_exception,
+                                                              bool install_monitor_exception) {
+  NearLabel unlocked, unlock, no_unlock;
+
+  {
+    Register R_method = Z_ARG2;
+    Register R_do_not_unlock_if_synchronized = Z_ARG3;
+
+    // Get the value of _do_not_unlock_if_synchronized into G1_scratch.
+    const Address do_not_unlock_if_synchronized(Z_thread,
+                                                JavaThread::do_not_unlock_if_synchronized_offset());
+    load_sized_value(R_do_not_unlock_if_synchronized, do_not_unlock_if_synchronized, 1, false /*unsigned*/);
+    z_mvi(do_not_unlock_if_synchronized, false); // Reset the flag.
+
+    // Check if synchronized method.
+    get_method(R_method);
+    verify_oop(Z_tos, state);
+    push(state); // Save tos/result.
+    testbit(method2_(R_method, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+    z_bfalse(unlocked);
+
+    // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+    // is set.
+    compareU64_and_branch(R_do_not_unlock_if_synchronized, (intptr_t)0L, bcondNotEqual, no_unlock);
+  }
+
+  // unlock monitor
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  const Address monitor(Z_fp, -(frame::z_ijava_state_size + (int) sizeof(BasicObjectLock)));
+  // We use Z_ARG2 so that if we go slow path it will be the correct
+  // register for unlock_object to pass to VM directly.
+  load_address(Z_ARG2, monitor); // Address of first monitor.
+  z_lg(Z_ARG3, Address(Z_ARG2, BasicObjectLock::obj_offset_in_bytes()));
+  compareU64_and_branch(Z_ARG3, (intptr_t)0L, bcondNotEqual, unlock);
+
+  if (throw_monitor_exception) {
+    // Entry already unlocked need to throw an exception.
+    MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll.
+    // If requested, install an illegal_monitor_state_exception.
+    // Continue with stack unrolling.
+    if (install_monitor_exception) {
+      MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
+    }
+   z_bru(unlocked);
+  }
+
+  bind(unlock);
+
+  unlock_object(Z_ARG2);
+
+  bind(unlocked);
+
+  // I0, I1: Might contain return value
+
+  // Check that all monitors are unlocked.
+  {
+    NearLabel loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    // We use Z_ARG2 so that if we go slow path it will be the correct
+    // register for unlock_object to pass to VM directly.
+    Register R_current_monitor = Z_ARG2;
+    Register R_monitor_block_bot = Z_ARG1;
+    const Address monitor_block_top(Z_fp, _z_ijava_state_neg(monitors));
+    const Address monitor_block_bot(Z_fp, -frame::z_ijava_state_size);
+
+    bind(restart);
+    // Starting with top-most entry.
+    z_lg(R_current_monitor, monitor_block_top);
+    // Points to word before bottom of monitor block.
+    load_address(R_monitor_block_bot, monitor_block_bot);
+    z_bru(entry);
+
+    // Entry already locked, need to throw exception.
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception.
+      MacroAssembler::call_VM(noreg,
+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
+                                               throw_illegal_monitor_state_exception));
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
+      // Unlock does not block, so don't have to worry about the frame.
+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
+      unlock_object(R_current_monitor);
+      if (install_monitor_exception) {
+        call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::
+                                        new_illegal_monitor_state_exception));
+      }
+      z_bru(restart);
+    }
+
+    bind(loop);
+    // Check if current entry is used.
+    load_and_test_long(Z_R0_scratch, Address(R_current_monitor, BasicObjectLock::obj_offset_in_bytes()));
+    z_brne(exception);
+
+    add2reg(R_current_monitor, entry_size); // Otherwise advance to next entry.
+    bind(entry);
+    compareU64_and_branch(R_current_monitor, R_monitor_block_bot, bcondNotEqual, loop);
+  }
+
+  bind(no_unlock);
+  pop(state);
+  verify_oop(Z_tos, state);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//   If throw_monitor_exception
+//     throws IllegalMonitorStateException
+//   Else if install_monitor_exception
+//     installs IllegalMonitorStateException
+//   Else
+//     no error processing
+void InterpreterMacroAssembler::remove_activation(TosState state,
+                                                  Register return_pc,
+                                                  bool throw_monitor_exception,
+                                                  bool install_monitor_exception,
+                                                  bool notify_jvmti) {
+
+  unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception);
+
+  // Save result (push state before jvmti call and pop it afterwards) and notify jvmti.
+  notify_method_exit(false, state, notify_jvmti ? NotifyJVMTI : SkipNotifyJVMTI);
+
+  verify_oop(Z_tos, state);
+  verify_thread();
+
+  pop_interpreter_frame(return_pc, Z_ARG2, Z_ARG3);
+}
+
+// lock object
+//
+// Registers alive
+//   monitor - Address of the BasicObjectLock to be used for locking,
+//             which must be initialized with the object to lock.
+//   object  - Address of the object to be locked.
+void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            monitor, /*check_for_exceptions=*/false);
+    return;
+  }
+
+  // template code:
+  //
+  // markOop displaced_header = obj->mark().set_unlocked();
+  // monitor->lock()->set_displaced_header(displaced_header);
+  // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+  //   // We stored the monitor address into the object's mark word.
+  // } else if (THREAD->is_lock_owned((address)displaced_header))
+  //   // Simple recursive case.
+  //   monitor->lock()->set_displaced_header(NULL);
+  // } else {
+  //   // Slow path.
+  //   InterpreterRuntime::monitorenter(THREAD, monitor);
+  // }
+
+  const Register displaced_header = Z_ARG5;
+  const Register object_mark_addr = Z_ARG4;
+  const Register current_header   = Z_ARG5;
+
+  NearLabel done;
+  NearLabel slow_case;
+
+  // markOop displaced_header = obj->mark().set_unlocked();
+
+  // Load markOop from object into displaced_header.
+  z_lg(displaced_header, oopDesc::mark_offset_in_bytes(), object);
+
+  if (UseBiasedLocking) {
+    biased_locking_enter(object, displaced_header, Z_R1, Z_R0, done, &slow_case);
+  }
+
+  // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
+  z_oill(displaced_header, markOopDesc::unlocked_value);
+
+  // monitor->lock()->set_displaced_header(displaced_header);
+
+  // Initialize the box (Must happen before we update the object mark!).
+  z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
+                          BasicLock::displaced_header_offset_in_bytes(), monitor);
+
+  // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+
+  // Store stack address of the BasicObjectLock (this is monitor) into object.
+  add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object);
+
+  z_csg(displaced_header, monitor, 0, object_mark_addr);
+  assert(current_header==displaced_header, "must be same register"); // Identified two registers from z/Architecture.
+
+  z_bre(done);
+
+  // } else if (THREAD->is_lock_owned((address)displaced_header))
+  //   // Simple recursive case.
+  //   monitor->lock()->set_displaced_header(NULL);
+
+  // We did not see an unlocked object so try the fast recursive case.
+
+  // Check if owner is self by comparing the value in the markOop of object
+  // (current_header) with the stack pointer.
+  z_sgr(current_header, Z_SP);
+
+  assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+
+  // The prior sequence "LGR, NGR, LTGR" can be done better
+  // (Z_R1 is temp and not used after here).
+  load_const_optimized(Z_R0, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+  z_ngr(Z_R0, current_header); // AND sets CC (result eq/ne 0)
+
+  // If condition is true we are done and hence we can store 0 in the displaced
+  // header indicating it is a recursive lock and be done.
+  z_brne(slow_case);
+  z_release();  // Membar unnecessary on zarch AND because the above csg does a sync before and after.
+  z_stg(Z_R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
+                      BasicLock::displaced_header_offset_in_bytes(), monitor);
+  z_bru(done);
+
+  // } else {
+  //   // Slow path.
+  //   InterpreterRuntime::monitorenter(THREAD, monitor);
+
+  // None of the above fast optimizations worked so we have to get into the
+  // slow case of monitor enter.
+  bind(slow_case);
+
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+          monitor, /*check_for_exceptions=*/false);
+
+  // }
+
+  bind(done);
+}
+
+// Unlocks an object. Used in monitorexit bytecode and remove_activation.
+//
+// Registers alive
+//   monitor - address of the BasicObjectLock to be used for locking,
+//             which must be initialized with the object to lock.
+//
+// Throw IllegalMonitorException if object is not locked by current thread.
+void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) {
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            monitor, /*check_for_exceptions=*/ true);
+    return;
+  }
+
+// else {
+  // template code:
+  //
+  // if ((displaced_header = monitor->displaced_header()) == NULL) {
+  //   // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
+  //   monitor->set_obj(NULL);
+  // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+  //   // We swapped the unlocked mark in displaced_header into the object's mark word.
+  //   monitor->set_obj(NULL);
+  // } else {
+  //   // Slow path.
+  //   InterpreterRuntime::monitorexit(THREAD, monitor);
+  // }
+
+  const Register displaced_header = Z_ARG4;
+  const Register current_header   = Z_R1;
+  Address obj_entry(monitor, BasicObjectLock::obj_offset_in_bytes());
+  Label done;
+
+  if (object == noreg) {
+    // In the template interpreter, we must assure that the object
+    // entry in the monitor is cleared on all paths. Thus we move
+    // loading up to here, and clear the entry afterwards.
+    object = Z_ARG3; // Use Z_ARG3 if caller didn't pass object.
+    z_lg(object, obj_entry);
+  }
+
+  assert_different_registers(monitor, object, displaced_header, current_header);
+
+  // if ((displaced_header = monitor->displaced_header()) == NULL) {
+  //   // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
+  //   monitor->set_obj(NULL);
+
+  clear_mem(obj_entry, sizeof(oop));
+
+  if (UseBiasedLocking) {
+    // The object address from the monitor is in object.
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+    biased_locking_exit(object, displaced_header, done);
+  }
+
+  // Test first if we are in the fast recursive case.
+  MacroAssembler::load_and_test_long(displaced_header,
+                                     Address(monitor, BasicObjectLock::lock_offset_in_bytes() +
+                                                      BasicLock::displaced_header_offset_in_bytes()));
+  z_bre(done); // displaced_header == 0 -> goto done
+
+  // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+  //   // We swapped the unlocked mark in displaced_header into the object's mark word.
+  //   monitor->set_obj(NULL);
+
+  // If we still have a lightweight lock, unlock the object and be done.
+
+  // The markword is expected to be at offset 0.
+  assert(oopDesc::mark_offset_in_bytes() == 0, "unlock_object: review code below");
+
+  // We have the displaced header in displaced_header. If the lock is still
+  // lightweight, it will contain the monitor address and we'll store the
+  // displaced header back into the object's mark word.
+  z_lgr(current_header, monitor);
+  z_csg(current_header, displaced_header, 0, object);
+  z_bre(done);
+
+  // } else {
+  //   // Slow path.
+  //   InterpreterRuntime::monitorexit(THREAD, monitor);
+
+  // The lock has been converted into a heavy lock and hence
+  // we need to get into the slow case.
+  z_stg(object, obj_entry);   // Restore object entry, has been cleared above.
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+          monitor,  /*check_for_exceptions=*/false);
+
+  // }
+
+  bind(done);
+}
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  load_and_test_long(mdp, Address(Z_fp, _z_ijava_state_neg(mdx)));
+  z_brz(zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label    set_mdp;
+  Register mdp    = Z_ARG4;
+  Register method = Z_ARG5;
+
+  get_method(method);
+  // Test MDO to avoid the call if it is NULL.
+  load_and_test_long(mdp, method2_(method, method_data));
+  z_brz(set_mdp);
+
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), method, Z_bcp);
+  // Z_RET: mdi
+  // Mdo is guaranteed to be non-zero here, we checked for it before the call.
+  assert(method->is_nonvolatile(), "choose nonvolatile reg or reload from frame");
+  z_lg(mdp, method2_(method, method_data)); // Must reload, mdp is volatile reg.
+  add2reg_with_index(mdp, in_bytes(MethodData::data_offset()), Z_RET, mdp);
+
+  bind(set_mdp);
+  save_mdp(mdp);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  NearLabel verify_continue;
+  Register bcp_expected = Z_ARG3;
+  Register mdp    = Z_ARG4;
+  Register method = Z_ARG5;
+
+  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
+  get_method(method);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp. The converse is highly probable also.
+  load_sized_value(bcp_expected, Address(mdp, DataLayout::bci_offset()), 2, false /*signed*/);
+  z_ag(bcp_expected, Address(method, Method::const_offset()));
+  load_address(bcp_expected, Address(bcp_expected, ConstMethod::codes_offset()));
+  compareU64_and_branch(bcp_expected, Z_bcp, bcondEqual, verify_continue);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, Z_bcp, mdp);
+  bind(verify_continue);
+#endif // ASSERT
+}
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int constant, Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  z_stg(value, constant, mdp_in);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      Register tmp,
+                                                      bool decrement) {
+  assert_different_registers(mdp_in, tmp);
+  // counter address
+  Address data(mdp_in, constant);
+  const int delta = decrement ? -DataLayout::counter_increment : DataLayout::counter_increment;
+  add2mem_64(Address(mdp_in, constant), delta, tmp);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // Set the flag.
+  z_oi(Address(mdp_in, DataLayout::flags_offset()), flag_byte_constant);
+}
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    z_cg(value, Address(mdp_in, offset));
+    z_brne(not_equal_continue);
+  } else {
+    // Put the test value into a register, so caller can use it:
+    z_lg(test_value_out, Address(mdp_in, offset));
+    compareU64_and_branch(test_value_out, value, bcondNotEqual, not_equal_continue);
+  }
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp) {
+  update_mdp_by_offset(mdp_in, noreg, offset_of_disp);
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register dataidx,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address disp_address(mdp_in, dataidx, offset_of_disp);
+  Assembler::z_ag(mdp_in, disp_address);
+  save_mdp(mdp_in);
+}
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  add2reg(mdp_in, constant);
+  save_mdp(mdp_in);
+}
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  assert(return_bci->is_nonvolatile(), "choose nonvolatile reg or save/restore");
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+}
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch. Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    Address data(mdp, JumpData::taken_offset());
+    z_lg(bumped_count, data);
+    // 64-bit overflow is very unlikely. Saturation to 32-bit values is
+    // performed when reading the counts.
+    add2reg(bumped_count, DataLayout::counter_increment);
+    z_stg(bumped_count, data); // Store back out
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+// Kills Z_R1_scratch.
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch. Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Z_R1_scratch);
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode.
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// Kills: Z_R1_scratch.
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call. Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call. Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    NearLabel profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    NearLabel skip_receiver_profile;
+    if (receiver_can_be_null) {
+      NearLabel not_null;
+      compareU64_and_branch(receiver, (intptr_t)0L, bcondNotEqual, not_null);
+      // We are making a call. Increment the count for null receiver.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      z_bru(skip_receiver_profile);
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows. At the same time, it remembers
+// the location of the first empty row. (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree. Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2, int start_row,
+                                        Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+    return;
+  }
+
+  int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    NearLabel next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n]. Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    z_bru(done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on receiver[n]... Test for null.
+      z_ltgr(reg2, reg2);
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (is_virtual_call) {
+          z_brz(found_null);
+          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          z_bru(done);
+          bind(found_null);
+        } else {
+          z_brnz(done);
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      z_brz(found_null);
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+      // Found a null. Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  load_const_optimized(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    z_bru(done);
+  }
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+  bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
+  if (ProfileInterpreter) {
+    NearLabel profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      NearLabel next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // Return_bci is equal to bci[n]. Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)));
+      z_bru(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp, Register tmp) {
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter. We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, tmp, true);
+
+    bind (profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2, false);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count.
+    increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+// Kills: index, scratch1, scratch2.
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register scratch1,
+                                                    Register scratch2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+    assert_different_registers(index, mdp, scratch1, scratch2);
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes().
+    z_sllg(index, index, exact_log2(in_bytes(MultiBranchData::per_case_size())));
+    add2reg(index, in_bytes(MultiBranchData::case_array_offset()));
+
+    // Add the calculated base to the mdp -> address of the case' data.
+    Address case_data_addr(mdp, index);
+    Register case_data = scratch1;
+    load_address(case_data, case_data_addr);
+
+    // Update the case count.
+    increment_mdp_data_at(case_data,
+                          in_bytes(MultiBranchData::relative_count_offset()),
+                          scratch2);
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+// kills: R0, R1, flags, loads klass from obj (if not null)
+void InterpreterMacroAssembler::profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done) {
+  NearLabel null_seen, init_klass, do_nothing, do_update;
+
+  // Klass = obj is allowed.
+  const Register tmp = Z_R1;
+  assert_different_registers(obj, mdo_addr.base(), tmp, Z_R0);
+  assert_different_registers(klass, mdo_addr.base(), tmp, Z_R0);
+
+  z_lg(tmp, mdo_addr);
+  if (cmp_done) {
+    z_brz(null_seen);
+  } else {
+    compareU64_and_branch(obj, (intptr_t)0, Assembler::bcondEqual, null_seen);
+  }
+
+  verify_oop(obj);
+  load_klass(klass, obj);
+
+  // Klass seen before, nothing to do (regardless of unknown bit).
+  z_lgr(Z_R0, tmp);
+  assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+  z_nill(Z_R0, TypeEntries::type_klass_mask & 0xFFFF);
+  compareU64_and_branch(Z_R0, klass, Assembler::bcondEqual, do_nothing);
+
+  // Already unknown. Nothing to do anymore.
+  z_tmll(tmp, TypeEntries::type_unknown);
+  z_brc(Assembler::bcondAllOne, do_nothing);
+
+  z_lgr(Z_R0, tmp);
+  assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+  z_nill(Z_R0, TypeEntries::type_mask & 0xFFFF);
+  compareU64_and_branch(Z_R0, (intptr_t)0, Assembler::bcondEqual, init_klass);
+
+  // Different than before. Cannot keep accurate profile.
+  z_oill(tmp, TypeEntries::type_unknown);
+  z_bru(do_update);
+
+  bind(init_klass);
+  // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+  z_ogr(tmp, klass);
+  z_bru(do_update);
+
+  bind(null_seen);
+  // Set null_seen if obj is 0.
+  z_oill(tmp, TypeEntries::null_seen);
+  // fallthru: z_bru(do_update);
+
+  bind(do_update);
+  z_stg(tmp, mdo_addr);
+
+  bind(do_nothing);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  assert_different_registers(mdp, callee, tmp);
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    z_cliy(in_bytes(DataLayout::tag_offset()) - off_to_start, mdp,
+           is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
+    z_brne(profile_continue);
+
+    if (MethodData::profile_arguments()) {
+      NearLabel done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      add2reg(mdp, off_to_args);
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile.
+          z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp);
+          add2reg(tmp, -i*TypeStackSlotEntries::per_arg_count());
+          compare64_and_branch(tmp, TypeStackSlotEntries::per_arg_count(), Assembler::bcondLow, done);
+        }
+        z_lg(tmp, Address(callee, Method::const_offset()));
+        z_lgh(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
+        // Stack offset o (zero based) from the start of the argument
+        // list. For n arguments translates into offset n - o - 1 from
+        // the end of the argument list. But there is an extra slot at
+        // the top of the stack. So the offset is n - o from Lesp.
+        z_sg(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
+        z_sllg(tmp, tmp, Interpreter::logStackElementSize);
+        Address stack_slot_addr(tmp, Z_esp);
+        z_ltg(tmp, stack_slot_addr);
+
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp, mdo_arg_addr, tmp, /*ltg did compare to 0*/ true);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        add2reg(mdp, to_add);
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp);
+        add2reg(tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. Tmp is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        z_sllg(tmp, tmp, exact_log2(DataLayout::cell_size));
+        z_agr(mdp, tmp);
+      }
+      z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+    }
+
+    // Mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one.
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+  assert_different_registers(mdp, ret, tmp);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // beginning of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length.
+      NearLabel do_profile;
+      Address bc(Z_bcp);
+      z_lb(tmp, bc);
+      compare32_and_branch(tmp, Bytecodes::_invokedynamic, Assembler::bcondEqual, do_profile);
+      compare32_and_branch(tmp, Bytecodes::_invokehandle, Assembler::bcondEqual, do_profile);
+      get_method(tmp);
+      // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit.
+      if (Method::intrinsic_id_size_in_bytes() == 1) {
+        z_cli(Method::intrinsic_id_offset_in_bytes(), tmp, vmIntrinsics::_compiledLambdaForm);
+      } else {
+        assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id");
+        z_lh(tmp, Method::intrinsic_id_offset_in_bytes(), Z_R0, tmp);
+        z_chi(tmp, vmIntrinsics::_compiledLambdaForm);
+      }
+      z_brne(profile_continue);
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+    profile_obj_type(ret, mdo_ret_addr, tmp);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters.
+    Address parm_di_addr(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
+    load_and_test_int2long(tmp1, parm_di_addr);
+    z_brl(profile_continue);
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+
+    // Pointer to the parameter area in the MDO.
+    z_agr(mdp, tmp1);
+
+    // Offset of the current profile entry to update.
+    const Register entry_offset = tmp1;
+    // entry_offset = array len in number of cells.
+    z_lg(entry_offset, Address(mdp, ArrayData::array_len_offset()));
+    // entry_offset (number of cells) = array len - size of 1 entry
+    add2reg(entry_offset, -TypeStackSlotEntries::per_arg_count());
+    // entry_offset in bytes
+    z_sllg(entry_offset, entry_offset, exact_log2(DataLayout::cell_size));
+
+    Label loop;
+    bind(loop);
+
+    Address arg_off(mdp, entry_offset, ParametersTypeData::stack_slot_offset(0));
+    Address arg_type(mdp, entry_offset, ParametersTypeData::type_offset(0));
+
+    // Load offset on the stack from the slot for this parameter.
+    z_lg(tmp2, arg_off);
+    z_sllg(tmp2, tmp2, Interpreter::logStackElementSize);
+    z_lcgr(tmp2); // Negate.
+
+    // Profile the parameter.
+    z_ltg(tmp2, Address(Z_locals, tmp2));
+    profile_obj_type(tmp2, arg_type, tmp2, /*ltg did compare to 0*/ true);
+
+    // Go to next parameter.
+    z_aghi(entry_offset, -TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size);
+    z_brnl(loop);
+
+    bind(profile_continue);
+  }
+}
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address          counter_addr,
+                                                        int              increment,
+                                                        Address          mask,
+                                                        Register         scratch,
+                                                        bool             preloaded,
+                                                        branch_condition cond,
+                                                        Label           *where) {
+  assert_different_registers(counter_addr.base(), scratch);
+  if (preloaded) {
+    add2reg(scratch, increment);
+    reg2mem_opt(scratch, counter_addr, false);
+  } else {
+    if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment) && counter_addr.is_RSYform()) {
+      z_alsi(counter_addr.disp20(), counter_addr.base(), increment);
+      mem2reg_signed_opt(scratch, counter_addr);
+    } else {
+      mem2reg_signed_opt(scratch, counter_addr);
+      add2reg(scratch, increment);
+      reg2mem_opt(scratch, counter_addr, false);
+    }
+  }
+  z_n(scratch, mask);
+  if (where) { z_brc(cond, *where); }
+}
+
+// Get MethodCounters object for given method. Lazily allocated if necessary.
+//   method    - Ptr to Method object.
+//   Rcounters - Ptr to MethodCounters object associated with Method object.
+//   skip      - Exit point if MethodCounters object can't be created (OOM condition).
+void InterpreterMacroAssembler::get_method_counters(Register Rmethod,
+                                                    Register Rcounters,
+                                                    Label& skip) {
+  assert_different_registers(Rmethod, Rcounters);
+
+  BLOCK_COMMENT("get MethodCounters object {");
+
+  Label has_counters;
+  load_and_test_long(Rcounters, Address(Rmethod, Method::method_counters_offset()));
+  z_brnz(has_counters);
+
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), Rmethod, false);
+  z_ltgr(Rcounters, Z_RET); // Runtime call returns MethodCounters object.
+  z_brz(skip); // No MethodCounters, out of memory.
+
+  bind(has_counters);
+
+  BLOCK_COMMENT("} get MethodCounters object");
+}
+
+// Increment invocation counter in MethodCounters object.
+// Return (invocation_counter+backedge_counter) as "result" in RctrSum.
+// Counter values are all unsigned.
+void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register RctrSum) {
+  assert(UseCompiler || LogTouchedMethods, "incrementing must be useful");
+  assert_different_registers(Rcounters, RctrSum);
+
+  int increment          = InvocationCounter::count_increment;
+  int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset());
+  int be_counter_offset  = in_bytes(MethodCounters::backedge_counter_offset()   + InvocationCounter::counter_offset());
+
+  BLOCK_COMMENT("Increment invocation counter {");
+
+  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) {
+    // Increment the invocation counter in place,
+    // then add the incremented value to the backedge counter.
+    z_l(RctrSum, be_counter_offset, Rcounters);
+    z_alsi(inv_counter_offset, Rcounters, increment);     // Atomic increment @no extra cost!
+    z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits.
+    z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters);
+  } else {
+    // This path is optimized for low register consumption
+    // at the cost of somewhat higher operand delays.
+    // It does not need an extra temp register.
+
+    // Update the invocation counter.
+    z_l(RctrSum, inv_counter_offset, Rcounters);
+    if (RctrSum == Z_R0) {
+      z_ahi(RctrSum, increment);
+    } else {
+      add2reg(RctrSum, increment);
+    }
+    z_st(RctrSum, inv_counter_offset, Rcounters);
+
+    // Mask off the state bits.
+    z_nilf(RctrSum, InvocationCounter::count_mask_value);
+
+    // Add the backedge counter to the updated invocation counter to
+    // form the result.
+    z_al(RctrSum, be_counter_offset, Z_R0, Rcounters);
+  }
+
+  BLOCK_COMMENT("} Increment invocation counter");
+
+  // Note that this macro must leave the backedge_count + invocation_count in Rtmp!
+}
+
+
+// increment backedge counter in MethodCounters object.
+// return (invocation_counter+backedge_counter) as "result" in RctrSum
+// counter values are all unsigned!
+void InterpreterMacroAssembler::increment_backedge_counter(Register Rcounters, Register RctrSum) {
+  assert(UseCompiler, "incrementing must be useful");
+  assert_different_registers(Rcounters, RctrSum);
+
+  int increment          = InvocationCounter::count_increment;
+  int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset());
+  int be_counter_offset  = in_bytes(MethodCounters::backedge_counter_offset()   + InvocationCounter::counter_offset());
+
+  BLOCK_COMMENT("Increment backedge counter {");
+
+  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) {
+    // Increment the invocation counter in place,
+    // then add the incremented value to the backedge counter.
+    z_l(RctrSum, inv_counter_offset, Rcounters);
+    z_alsi(be_counter_offset, Rcounters, increment);      // Atomic increment @no extra cost!
+    z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits.
+    z_al(RctrSum, be_counter_offset, Z_R0, Rcounters);
+  } else {
+    // This path is optimized for low register consumption
+    // at the cost of somewhat higher operand delays.
+    // It does not need an extra temp register.
+
+    // Update the invocation counter.
+    z_l(RctrSum, be_counter_offset, Rcounters);
+    if (RctrSum == Z_R0) {
+      z_ahi(RctrSum, increment);
+    } else {
+      add2reg(RctrSum, increment);
+    }
+    z_st(RctrSum, be_counter_offset, Rcounters);
+
+    // Mask off the state bits.
+    z_nilf(RctrSum, InvocationCounter::count_mask_value);
+
+    // Add the backedge counter to the updated invocation counter to
+    // form the result.
+    z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters);
+  }
+
+  BLOCK_COMMENT("} Increment backedge counter");
+
+  // Note that this macro must leave the backedge_count + invocation_count in Rtmp!
+}
+
+// Add an InterpMonitorElem to stack (see frame_s390.hpp).
+void InterpreterMacroAssembler::add_monitor_to_stack(bool     stack_is_empty,
+                                                     Register Rtemp1,
+                                                     Register Rtemp2,
+                                                     Register Rtemp3) {
+
+  const Register Rcurr_slot = Rtemp1;
+  const Register Rlimit     = Rtemp2;
+  const jint delta = -frame::interpreter_frame_monitor_size() * wordSize;
+
+  assert((delta & LongAlignmentMask) == 0,
+         "sizeof BasicObjectLock must be even number of doublewords");
+  assert(2 * wordSize == -delta, "this works only as long as delta == -2*wordSize");
+  assert(Rcurr_slot != Z_R0, "Register must be usable as base register");
+  assert_different_registers(Rlimit, Rcurr_slot, Rtemp3);
+
+  get_monitors(Rlimit);
+
+  // Adjust stack pointer for additional monitor entry.
+  resize_frame(RegisterOrConstant((intptr_t) delta), Z_fp, false);
+
+  if (!stack_is_empty) {
+    // Must copy stack contents down.
+    NearLabel next, done;
+
+    // Rtemp := addr(Tos), Z_esp is pointing below it!
+    add2reg(Rcurr_slot, wordSize, Z_esp);
+
+    // Nothing to do, if already at monitor area.
+    compareU64_and_branch(Rcurr_slot, Rlimit, bcondNotLow, done);
+
+    bind(next);
+
+    // Move one stack slot.
+    mem2reg_opt(Rtemp3, Address(Rcurr_slot));
+    reg2mem_opt(Rtemp3, Address(Rcurr_slot, delta));
+    add2reg(Rcurr_slot, wordSize);
+    compareU64_and_branch(Rcurr_slot, Rlimit, bcondLow, next); // Are we done?
+
+    bind(done);
+    // Done copying stack.
+  }
+
+  // Adjust expression stack and monitor pointers.
+  add2reg(Z_esp, delta);
+  add2reg(Rlimit, delta);
+  save_monitors(Rlimit);
+}
+
+// Note: Index holds the offset in bytes afterwards.
+// You can use this to store a new value (with Llocals as the base).
+void InterpreterMacroAssembler::access_local_int(Register index, Register dst) {
+  z_sllg(index, index, LogBytesPerWord);
+  mem2reg_opt(dst, Address(Z_locals, index), false);
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) { MacroAssembler::verify_oop(reg); }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+//   InterpreterRuntime::post_method_entry();
+// }
+
+void InterpreterMacroAssembler::notify_method_entry() {
+
+  // JVMTI
+  // Whenever JVMTI puts a thread in interp_only_mode, method
+  // entry/exit events are sent for that thread to track stack
+  // depth. If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label jvmti_post_done;
+    MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+    z_bre(jvmti_post_done);
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry), /*check_exceptions=*/false);
+    bind(jvmti_post_done);
+  }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+//   if (!native_method) save result
+//   InterpreterRuntime::post_method_exit();
+//   if (!native_method) restore result
+// }
+// if (DTraceMethodProbes) {
+//   SharedRuntime::dtrace_method_exit(thread, method);
+// }
+//
+// For native methods their result is stored in z_ijava_state.lresult
+// and z_ijava_state.fresult before coming here.
+// Java methods have their result stored in the expression stack.
+//
+// Notice the dependency to frame::interpreter_frame_result().
+void InterpreterMacroAssembler::notify_method_exit(bool native_method,
+                                                   TosState state,
+                                                   NotifyMethodExitMode mode) {
+  // JVMTI
+  // Whenever JVMTI puts a thread in interp_only_mode, method
+  // entry/exit events are sent for that thread to track stack
+  // depth. If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label jvmti_post_done;
+    MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+    z_bre(jvmti_post_done);
+    if (!native_method) push(state); // see frame::interpreter_frame_result()
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), /*check_exceptions=*/false);
+    if (!native_method) pop(state);
+    bind(jvmti_post_done);
+  }
+
+#if 0
+  // Dtrace currently not supported on z/Architecture.
+  {
+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
+    push(state);
+    get_method(c_rarg1);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+                 r15_thread, c_rarg1);
+    pop(state);
+  }
+#endif
+}
+
+void InterpreterMacroAssembler::skip_if_jvmti_mode(Label &Lskip, Register Rscratch) {
+  if (!JvmtiExport::can_post_interpreter_events()) {
+    return;
+  }
+
+  load_and_test_int(Rscratch, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+  z_brnz(Lskip);
+
+}
+
+// Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP.
+// The return pc is loaded into the register return_pc.
+//
+// Registers updated:
+//     return_pc  - The return pc of the calling frame.
+//     tmp1, tmp2 - scratch
+void InterpreterMacroAssembler::pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2) {
+  // F0  Z_SP -> caller_sp (F1's)
+  //             ...
+  //             sender_sp (F1's)
+  //             ...
+  // F1  Z_fp -> caller_sp (F2's)
+  //             return_pc (Continuation after return from F0.)
+  //             ...
+  // F2          caller_sp
+
+  // Remove F0's activation. Restoring Z_SP to sender_sp reverts modifications
+  // (a) by a c2i adapter and (b) by generate_fixed_frame().
+  // In case (a) the new top frame F1 is an unextended compiled frame.
+  // In case (b) F1 is converted from PARENT_IJAVA_FRAME to TOP_IJAVA_FRAME.
+
+  // Case (b) seems to be redundant when returning to a interpreted caller,
+  // because then the caller's top_frame_sp is installed as sp (see
+  // TemplateInterpreterGenerator::generate_return_entry_for ()). But
+  // pop_interpreter_frame() is also used in exception handling and there the
+  // frame type of the caller is unknown, therefore top_frame_sp cannot be used,
+  // so it is important that sender_sp is the caller's sp as TOP_IJAVA_FRAME.
+
+  Register R_f1_sender_sp = tmp1;
+  Register R_f2_sp = tmp2;
+
+  // Tirst check the for the interpreter frame's magic.
+  asm_assert_ijava_state_magic(R_f2_sp/*tmp*/);
+  z_lg(R_f2_sp, _z_parent_ijava_frame_abi(callers_sp), Z_fp);
+  z_lg(R_f1_sender_sp, _z_ijava_state_neg(sender_sp), Z_fp);
+  if (return_pc->is_valid())
+    z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+  // Pop F0 by resizing to R_f1_sender_sp and using R_f2_sp as fp.
+  resize_frame_absolute(R_f1_sender_sp, R_f2_sp, false/*load fp*/);
+
+#ifdef ASSERT
+  // The return_pc in the new top frame is dead... at least that's my
+  // current understanding; to assert this I overwrite it.
+  load_const_optimized(Z_ARG3, 0xb00b1);
+  z_stg(Z_ARG3, _z_parent_ijava_frame_abi(return_pc), Z_SP);
+#endif
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
+  if (VerifyFPU) {
+    unimplemented("verfiyFPU");
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
+#define CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "interpreter/invocationCounter.hpp"
+
+// This file specializes the assember with interpreter-specific macros.
+
+class InterpreterMacroAssembler: public MacroAssembler {
+
+ protected:
+  // Interpreter specific version of call_VM_base().
+  virtual void call_VM_leaf_base(address entry_point);
+  virtual void call_VM_leaf_base(address entry_point, bool allow_relocation);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register last_java_sp,
+                            address  entry_point,
+                            bool check_exceptions);
+  virtual void call_VM_base(Register oop_result,
+                            Register last_java_sp,
+                            address  entry_point,
+                            bool allow_relocation,
+                            bool check_exceptions);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // Base routine for all dispatches.
+  void dispatch_base(TosState state, address* table);
+
+ public:
+  InterpreterMacroAssembler(CodeBuffer* c)
+    : MacroAssembler(c) {}
+
+  void jump_to_entry(address entry, Register Rscratch);
+
+  virtual void load_earlyret_value(TosState state);
+
+  static const Address l_tmp;
+  static const Address d_tmp;
+
+  // Handy address generation macros.
+#define thread_(field_name) Address(Z_thread, JavaThread::field_name ## _offset())
+#define method_(field_name) Address(Z_method, Method::field_name ## _offset())
+#define method2_(Rmethod, field_name) Address(Rmethod, Method::field_name ## _offset())
+
+  // Helper routine for frame allocation/deallocation.
+  // Compute the delta by which the caller's SP has to
+  // be adjusted to accomodate for the non-argument locals.
+  void compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta);
+
+  // dispatch routines
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  void dispatch_only(TosState state);
+  // Dispatch normal table via Z_bytecode (assume Z_bytecode is loaded already).
+  void dispatch_only_normal(TosState state);
+  void dispatch_normal(TosState state);
+  void dispatch_next(TosState state, int step = 0);
+  void dispatch_next_noverify_oop(TosState state, int step = 0);
+  void dispatch_via(TosState state, address* table);
+
+  // Jump to an invoked target.
+  void prepare_to_jump_from_interpreted(Register method);
+  void jump_from_interpreted(Register method, Register temp);
+
+  // Removes the current activation (incl. unlocking of monitors).
+  // Additionally this code is used for earlyReturn in which case we
+  // want to skip throwing an exception and installing an exception.
+  void remove_activation(TosState state,
+                         Register return_pc,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmti = true);
+
+ public:
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls.
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM(Register thread_cache, Register oop_result, Register last_java_sp,
+                     address entry_point, Register arg_1, Register arg_2, bool check_exception = true);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3.
+  void gen_subtype_check(Register sub_klass, Register super_klass, Register tmp1, Register tmp2, Label &ok_is_subtype);
+
+  void get_cache_and_index_at_bcp(Register cache, Register cpe_offset, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register cpe_offset, Register bytecode,
+                                               int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void load_resolved_reference_at_index(Register result, Register index);
+
+  // Pop topmost element from stack. It just disappears. Useful if
+  // consumed previously by access via stackTop().
+  void popx(int len);
+  void pop_i()   { popx(1); }
+  void pop_ptr() { popx(1); }
+  void pop_l()   { popx(2); }
+  void pop_f()   { popx(1); }
+  void pop_d()   { popx(2); }
+  // Get Address object of stack top. No checks. No pop.
+  // Purpose: provide address of stack operand to exploit reg-mem operations.
+  // Avoid RISC-like mem2reg - reg-reg-op sequence.
+  Address stackTop();
+
+  // Helpers for expression stack.
+  void pop_i(     Register r);
+  void pop_ptr(   Register r);
+  void pop_l(     Register r);
+  void pop_f(FloatRegister f);
+  void pop_d(FloatRegister f);
+
+  void push_i(     Register r = Z_tos);
+  void push_ptr(   Register r = Z_tos);
+  void push_l(     Register r = Z_tos);
+  void push_f(FloatRegister f = Z_ftos);
+  void push_d(FloatRegister f = Z_ftos);
+
+  // Helpers for swap and dup.
+  void load_ptr(int n, Register val);
+  void store_ptr(int n, Register val);
+
+  void pop (TosState state);           // transition vtos -> state
+  void push(TosState state);           // transition state -> vtos
+  void empty_expression_stack(void);
+
+#ifdef ASSERT
+  void verify_sp(Register Rsp, Register Rtemp);
+  void verify_esp(Register Resp, Register Rtemp); // Verify that Resp points to a word in the operand stack.
+#endif // ASSERT
+
+ public:
+  void if_cmp(Condition cc, bool ptr_compare);
+
+  // Accessors to the template interpreter state.
+
+  void asm_assert_ijava_state_magic(Register tmp) PRODUCT_RETURN;
+
+  void save_bcp();
+
+  void restore_bcp();
+
+  void save_esp();
+
+  void restore_esp();
+
+  void get_monitors(Register reg);
+
+  void save_monitors(Register reg);
+
+  void get_mdp(Register mdp);
+
+  void save_mdp(Register mdp);
+
+  // Values that are only read (besides initialization).
+  void restore_locals();
+
+  void get_method(Register reg);
+
+  // Load values from bytecode stream:
+
+  enum signedOrNot { Signed, Unsigned };
+  enum setCCOrNot  { set_CC,  dont_set_CC };
+
+  void get_2_byte_integer_at_bcp(Register    Rdst,
+                                 int         bcp_offset,
+                                 signedOrNot is_signed  );
+
+  void get_4_byte_integer_at_bcp(Register   Rdst,
+                                 int        bcp_offset,
+                                 setCCOrNot should_set_CC = dont_set_CC);
+
+  // common code
+
+  void field_offset_at(int n, Register tmp, Register dest, Register base);
+  int  field_offset_at(Register object, address bcp, int offset);
+  void fast_iaaccess(int n, address bcp);
+  void fast_iaputfield(address bcp, bool do_store_check);
+
+  void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
+  void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res);
+
+  void get_constant_pool(Register Rdst);
+  void get_constant_pool_cache(Register Rdst);
+  void get_cpool_and_tags(Register Rcpool, Register Rtags);
+  void is_a(Label& L);
+
+
+  // --------------------------------------------------
+
+  void unlock_if_synchronized_method(TosState state, bool throw_monitor_exception = true, bool install_monitor_exception = true);
+
+  void add_monitor_to_stack(bool stack_is_empty,
+                            Register Rtemp,
+                            Register Rtemp2,
+                            Register Rtemp3);
+
+  void access_local_int(Register index, Register dst);
+  void access_local_ptr(Register index, Register dst);
+  void access_local_long(Register index, Register dst);
+  void access_local_float(Register index, FloatRegister dst);
+  void access_local_double(Register index, FloatRegister dst);
+#ifdef ASSERT
+  void check_for_regarea_stomp(Register Rindex, int offset, Register Rlimit, Register Rscratch, Register Rscratch1);
+#endif // ASSERT
+  void store_local_int(Register index, Register src);
+  void store_local_ptr(Register index, Register src);
+  void store_local_long(Register index, Register src);
+  void store_local_float(Register index, FloatRegister src);
+  void store_local_double(Register index, FloatRegister src);
+
+
+  Address first_local_in_stack();
+  static int top_most_monitor_byte_offset(); // Offset in bytes to top of monitor block.
+  Address top_most_monitor();
+  void compute_stack_base(Register Rdest);
+
+  enum LoadOrStore { load, store };
+  void static_iload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
+  void static_aload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
+  void static_dload_or_store(int which_local, LoadOrStore direction);
+
+  void static_iinc(          int which_local, jint increment, Register Rtmp, Register Rtmp2);
+
+  void get_method_counters(Register Rmethod, Register Rcounters, Label& skip);
+  void increment_invocation_counter(Register Rcounters, Register RctrSum);
+  void increment_backedge_counter(Register Rcounters, Register RctrSum);
+  void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp);
+
+  void record_static_call_in_profile(Register Rentry, Register Rtmp);
+  void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp);
+
+  // Object locking
+  void lock_object  (Register lock_reg, Register obj_reg);
+  void unlock_object(Register lock_reg, Register obj_reg=noreg);
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             Register tmp = Z_R1_scratch, bool decrement = false);
+  void increment_mask_and_jump(Address counter_addr,
+                               int increment, Address mask,
+                               Register scratch, bool preloaded,
+                               branch_condition cond, Label* where);
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2, bool is_virtual_call);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2, int start_row,
+                                      Label& done, bool is_virtual_call);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register dataidx, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register scratch2,
+                            bool receiver_can_be_null = false);
+  void profile_ret(Register return_bci, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register scratch);
+  void profile_typecheck_failed(Register mdp, Register tmp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register scratch1, Register scratch2);
+
+  void profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done = false);
+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+  void profile_return_type(Register mdp, Register ret, Register tmp);
+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
+
+  // Debugging
+  void verify_oop(Register reg, TosState state = atos);    // Only if +VerifyOops && state == atos.
+  void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+  // JVMTI helpers
+  void skip_if_jvmti_mode(Label &Lskip, Register Rscratch = Z_R0);
+
+  // support for JVMTI/Dtrace
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+  void notify_method_entry();
+  void notify_method_exit(bool native_method, TosState state, NotifyMethodExitMode mode);
+
+  // Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP.
+  // The return pc is loaded into the Register return_pc.
+  void pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2);
+};
+
+#endif // CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+// Access macros for Java and C arguments.
+// First Java argument is at index-1.
+#define locals_j_arg_at(index) Address(Z_R1/*locals*/, in_ByteSize((-(index)*wordSize)))
+
+#define __ _masm->
+
+static int sp_c_int_arg_offset(int arg_nr, int fp_arg_nr) {
+  int int_arg_nr = arg_nr-fp_arg_nr;
+
+  // arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0
+  if (int_arg_nr < 5) {
+    return int_arg_nr * wordSize + _z_abi(carg_1);
+  }
+  int offset = int_arg_nr - 5 + (fp_arg_nr > 4 ? fp_arg_nr - 4 : 0);
+  return offset * wordSize + _z_abi(remaining_cargs);
+}
+
+static int sp_c_fp_arg_offset(int arg_nr, int fp_arg_nr) {
+  int int_arg_nr = arg_nr-fp_arg_nr;
+
+  // Arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0.
+  if (fp_arg_nr < 5) {
+    return (fp_arg_nr - 1 ) * wordSize + _z_abi(cfarg_1);
+  }
+  int offset = fp_arg_nr - 5 + (int_arg_nr > 4 ? int_arg_nr - 4 : 0);
+  return offset * wordSize + _z_abi(remaining_cargs);
+}
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  int int_arg_nr = jni_offset() - _fp_arg_nr;
+  Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+                 as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+  __ z_lgf(r, locals_j_arg_at(offset()));
+  if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+    __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  int int_arg_nr = jni_offset() - _fp_arg_nr;
+  Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+                 as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+  __ z_lg(r, locals_j_arg_at(offset() + 1)); // Long resides in upper slot.
+  if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+    __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ?
+                           as_FloatRegister((_fp_arg_nr * 2) + Z_FARG1->encoding()) : Z_F1;
+  _fp_arg_nr++;
+  __ z_ley(fp_reg, locals_j_arg_at(offset()));
+  if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) {
+    __ z_ste(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr) + 4, Z_SP);
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ?
+                           as_FloatRegister((_fp_arg_nr*2) + Z_FARG1->encoding()) : Z_F1;
+  _fp_arg_nr++;
+  __ z_ldy(fp_reg, locals_j_arg_at(offset()+1));
+  if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) {
+    __ z_std(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+  int int_arg_nr = jni_offset() - _fp_arg_nr;
+  Register  r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+                  as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+  // The handle for a receiver will never be null.
+  bool do_NULL_check = offset() != 0 || is_static();
+
+  Label do_null;
+  if (do_NULL_check) {
+    __ clear_reg(r, true, false);
+    __ load_and_test_long(Z_R0, locals_j_arg_at(offset()));
+    __ z_bre(do_null);
+  }
+  __ add2reg(r, -offset() * wordSize, Z_R1 /* locals */);
+  __ bind(do_null);
+  if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+    __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+  }
+}
+
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  __ z_lgr(Z_R1, Z_ARG1); // Z_R1 is used in locals_j_arg_at(index) macro.
+
+  // Generate code to handle arguments.
+  iterate(fingerprint);
+  __ load_const_optimized(Z_RET, AbstractInterpreter::result_handler(method()->result_type()));
+  __ z_br(Z_R14);
+  __ flush();
+}
+
+#undef  __
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method))
+  methodHandle m(thread, method);
+  assert(m->is_native(), "sanity check");
+  Symbol *s = m->signature();
+  return (address) s->base();
+IRT_END
+
+IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method))
+  methodHandle m(thread, method);
+  assert(m->is_native(), "sanity check");
+  return AbstractInterpreter::result_handler(m->result_type());
+IRT_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_INTERPRETERRT_S390_HPP
+#define CPU_S390_VM_INTERPRETERRT_S390_HPP
+
+#include "memory/allocation.hpp"
+
+static int binary_search(int key, LookupswitchPair* array, int n);
+
+static address iload (JavaThread* thread);
+static address aload (JavaThread* thread);
+static address istore(JavaThread* thread);
+static address astore(JavaThread* thread);
+static address iinc  (JavaThread* thread);
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+  int _fp_arg_nr;
+
+  void pass_int();
+  void pass_long();
+  void pass_double();
+  void pass_float();
+  void pass_object();
+
+ public:
+  // creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+    _fp_arg_nr = 0;
+  }
+
+  // code generation
+  void generate(uint64_t fingerprint);
+};
+
+static address get_result_handler(JavaThread* thread, Method* method);
+
+static address get_signature(JavaThread* thread, Method* method);
+
+#endif // CPU_S390_VM_INTERPRETERRT_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
+#define CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
+
+ public:
+
+  // Each arch must define reset, save, restore.
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls).
+
+  inline void clear(void) {
+    // Clearing _last_Java_sp must be first.
+    OrderAccess::release();
+    _last_Java_sp = NULL;
+    // Fence?
+    OrderAccess::fence();
+
+    _last_Java_pc = NULL;
+  }
+
+  inline void set(intptr_t* sp, address pc) {
+    _last_Java_pc = pc;
+
+    OrderAccess::release();
+    _last_Java_sp = sp;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this"
+    // we must clear _last_Java_sp before copying the rest of the new data.
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing.
+    //
+    if (_last_Java_sp != src->_last_Java_sp) {
+      OrderAccess::release();
+      _last_Java_sp = NULL;
+      OrderAccess::fence();
+    }
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true.
+
+    OrderAccess::release();
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // We don't have to flush registers, so the stack is always walkable.
+  inline bool walkable(void) { return true; }
+  inline void make_walkable(JavaThread* thread) { }
+
+ public:
+
+  // We don't have a frame pointer.
+  intptr_t* last_Java_fp(void)        { return NULL; }
+
+  intptr_t* last_Java_sp() const      { return _last_Java_sp; }
+  void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; }
+
+  address last_Java_pc(void)          { return _last_Java_pc; }
+
+#endif // CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+
+// TSO ensures that loads are blocking and ordered with respect to
+// to earlier loads, so we don't need LoadLoad membars.
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*sizeof(jint)
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  // Don't use fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  // Don't use fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
+  // Don't use fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_float_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_float_field0(T_DOUBLE);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jniTypes_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_JNITYPES_S390_HPP
+#define CPU_S390_VM_JNITYPES_S390_HPP
+
+// This file holds platform-dependent routines used to write primitive
+// jni types to the array of arguments passed into JavaCalls::call.
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format) to
+  // a java stack slot array to be passed as an argument to
+  // JavaCalls:calls. I.e., they are functionally 'push' operations
+  // if they have a 'pos' formal parameter. Note that jlongs and
+  // jdoubles are written _in reverse_ of the order in which they
+  // appear in the interpreter stack. This is because call stubs (see
+  // stubGenerator_s390.cpp) reverse the argument list constructed by
+  // JavaCallArguments (see javaCalls.hpp).
+
+ public:
+  // Ints are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_int(jint  from, intptr_t *to) {
+    *(jint*) to = from;
+  }
+
+  static inline void put_int(jint  from, intptr_t *to, int& pos) {
+    *(jint*) (to + pos++) = from;
+  }
+
+  static inline void put_int(jint *from, intptr_t *to, int& pos) {
+    *(jint*) (to + pos++) = *from;
+  }
+
+  // Longs are stored in native format in one JavaCallArgument slot at *(to+1).
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+  }
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_obj(oop  from, intptr_t *to) {
+    *(oop*) to = from;
+  }
+
+  static inline void put_obj(oop  from, intptr_t *to, int& pos) {
+    *(oop*) (to + pos++) = from;
+  }
+
+  static inline void put_obj(oop *from, intptr_t *to, int& pos) {
+    *(oop*) (to + pos++) = *from;
+  }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_float(jfloat  from, intptr_t *to) {
+    *(jfloat*) to = from;
+  }
+
+  static inline void put_float(jfloat  from, intptr_t *to, int& pos) {
+    *(jfloat*) (to + pos++) = from;
+  }
+
+  static inline void put_float(jfloat *from, intptr_t *to, int& pos) {
+    *(jfloat*) (to + pos++) = *from;
+  }
+
+  // Doubles are stored in native word format in one JavaCallArgument
+  // slot at *(to+1).
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+  }
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  // No need to worry about alignment on z/Architecture.
+  static inline jint get_int(intptr_t *from) {
+    return *(jint*) from;
+  }
+
+  static inline jlong get_long(intptr_t *from) {
+    return *(jlong*) (from + 1);
+  }
+
+  static inline oop get_obj(intptr_t *from) {
+    return *(oop*) from;
+  }
+
+  static inline jfloat get_float(intptr_t *from) {
+    return *(jfloat*) from;
+  }
+
+  static inline jdouble get_double(intptr_t *from) {
+    return *(jdouble*) (from + 1);
+  }
+};
+
+#endif // CPU_S390_VM_JNITYPES_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jni_s390.h	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+  #define JNIEXPORT     __attribute__((visibility("default")))
+  #define JNIIMPORT     __attribute__((visibility("default")))
+#else
+  #define JNIEXPORT
+  #define JNIIMPORT
+#endif
+
+#define JNICALL
+
+typedef int jint;
+
+typedef long int jlong;
+
+typedef signed char jbyte;
+
+#endif // _JAVASOFT_JNI_MD_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// JVMCI (JEP 243):
+//   So far, the JVMCI is not supported/implemented on SAP platforms.
+//   This file just serves as a placeholder which may be filled with life
+//   should the JVMCI ever be implemented.
+#if INCLUDE_JVMCI
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  Unimplemented();
+  return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  Unimplemented();
+}
+
+// Convert JVMCI register indices (as used in oop maps) to HotSpot registers.
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return false;
+}
+
+#endif // INLCUDE_JVMCI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,6691 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/codeBuffer.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "oops/klass.inline.hpp"
+#include "opto/compile.hpp"
+#include "opto/intrinsicnode.hpp"
+#include "opto/matcher.hpp"
+#include "prims/methodHandles.hpp"
+#include "registerSaver_s390.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/events.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+#include <ucontext.h>
+
+#define BLOCK_COMMENT(str) block_comment(str)
+#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
+
+// Move 32-bit register if destination and source are different.
+void MacroAssembler::lr_if_needed(Register rd, Register rs) {
+  if (rs != rd) { z_lr(rd, rs); }
+}
+
+// Move register if destination and source are different.
+void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
+  if (rs != rd) { z_lgr(rd, rs); }
+}
+
+// Zero-extend 32-bit register into 64-bit register if destination and source are different.
+void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
+  if (rs != rd) { z_llgfr(rd, rs); }
+}
+
+// Move float register if destination and source are different.
+void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
+  if (rs != rd) { z_ldr(rd, rs); }
+}
+
+// Move integer register if destination and source are different.
+// It is assumed that shorter-than-int types are already
+// appropriately sign-extended.
+void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
+                                        BasicType src_type) {
+  assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
+  assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
+
+  if (dst_type == src_type) {
+    lgr_if_needed(dst, src); // Just move all 64 bits.
+    return;
+  }
+
+  switch (dst_type) {
+    // Do not support these types for now.
+    //  case T_BOOLEAN:
+    case T_BYTE:  // signed byte
+      switch (src_type) {
+        case T_INT:
+          z_lgbr(dst, src);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+      return;
+
+    case T_CHAR:
+    case T_SHORT:
+      switch (src_type) {
+        case T_INT:
+          if (dst_type == T_CHAR) {
+            z_llghr(dst, src);
+          } else {
+            z_lghr(dst, src);
+          }
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+      return;
+
+    case T_INT:
+      switch (src_type) {
+        case T_BOOLEAN:
+        case T_BYTE:
+        case T_CHAR:
+        case T_SHORT:
+        case T_INT:
+        case T_LONG:
+        case T_OBJECT:
+        case T_ARRAY:
+        case T_VOID:
+        case T_ADDRESS:
+          lr_if_needed(dst, src);
+          // llgfr_if_needed(dst, src);  // zero-extend (in case we need to find a bug).
+          return;
+
+        default:
+          assert(false, "non-integer src type");
+          return;
+      }
+    case T_LONG:
+      switch (src_type) {
+        case T_BOOLEAN:
+        case T_BYTE:
+        case T_CHAR:
+        case T_SHORT:
+        case T_INT:
+          z_lgfr(dst, src); // sign extension
+          return;
+
+        case T_LONG:
+        case T_OBJECT:
+        case T_ARRAY:
+        case T_VOID:
+        case T_ADDRESS:
+          lgr_if_needed(dst, src);
+          return;
+
+        default:
+          assert(false, "non-integer src type");
+          return;
+      }
+      return;
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_VOID:
+    case T_ADDRESS:
+      switch (src_type) {
+        // These types don't make sense to be converted to pointers:
+        //      case T_BOOLEAN:
+        //      case T_BYTE:
+        //      case T_CHAR:
+        //      case T_SHORT:
+
+        case T_INT:
+          z_llgfr(dst, src); // zero extension
+          return;
+
+        case T_LONG:
+        case T_OBJECT:
+        case T_ARRAY:
+        case T_VOID:
+        case T_ADDRESS:
+          lgr_if_needed(dst, src);
+          return;
+
+        default:
+          assert(false, "non-integer src type");
+          return;
+      }
+      return;
+    default:
+      assert(false, "non-integer dst type");
+      return;
+  }
+}
+
+// Move float register if destination and source are different.
+void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
+                                         FloatRegister src, BasicType src_type) {
+  assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
+  assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
+  if (dst_type == src_type) {
+    ldr_if_needed(dst, src); // Just move all 64 bits.
+  } else {
+    switch (dst_type) {
+      case T_FLOAT:
+        assert(src_type == T_DOUBLE, "invalid float type combination");
+        z_ledbr(dst, src);
+        return;
+      case T_DOUBLE:
+        assert(src_type == T_FLOAT, "invalid float type combination");
+        z_ldebr(dst, src);
+        return;
+      default:
+        assert(false, "non-float dst type");
+        return;
+    }
+  }
+}
+
+// Optimized emitter for reg to mem operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
+// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::freg2mem_opt(FloatRegister reg,
+                                  int64_t       disp,
+                                  Register      index,
+                                  Register      base,
+                                  void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+                                  void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+                                  Register      scratch) {
+  index = (index == noreg) ? Z_R0 : index;
+  if (Displacement::is_shortDisp(disp)) {
+    (this->*classic)(reg, disp, index, base);
+  } else {
+    if (Displacement::is_validDisp(disp)) {
+      (this->*modern)(reg, disp, index, base);
+    } else {
+      if (scratch != Z_R0 && scratch != Z_R1) {
+        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
+      } else {
+        if (scratch != Z_R0) {   // scratch == Z_R1
+          if ((scratch == index) || (index == base)) {
+            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
+          } else {
+            add2reg(scratch, disp, base);
+            (this->*classic)(reg, 0, index, scratch);
+            if (base == scratch) {
+              add2reg(base, -disp);  // Restore base.
+            }
+          }
+        } else {   // scratch == Z_R0
+          z_lgr(scratch, base);
+          add2reg(base, disp);
+          (this->*classic)(reg, 0, index, base);
+          z_lgr(base, scratch);      // Restore base.
+        }
+      }
+    }
+  }
+}
+
+void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
+  if (is_double) {
+    freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
+  } else {
+    freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
+  }
+}
+
+// Optimized emitter for mem to reg operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
+// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::mem2freg_opt(FloatRegister reg,
+                                  int64_t       disp,
+                                  Register      index,
+                                  Register      base,
+                                  void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+                                  void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+                                  Register      scratch) {
+  index = (index == noreg) ? Z_R0 : index;
+  if (Displacement::is_shortDisp(disp)) {
+    (this->*classic)(reg, disp, index, base);
+  } else {
+    if (Displacement::is_validDisp(disp)) {
+      (this->*modern)(reg, disp, index, base);
+    } else {
+      if (scratch != Z_R0 && scratch != Z_R1) {
+        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
+      } else {
+        if (scratch != Z_R0) {   // scratch == Z_R1
+          if ((scratch == index) || (index == base)) {
+            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
+          } else {
+            add2reg(scratch, disp, base);
+            (this->*classic)(reg, 0, index, scratch);
+            if (base == scratch) {
+              add2reg(base, -disp);  // Restore base.
+            }
+          }
+        } else {   // scratch == Z_R0
+          z_lgr(scratch, base);
+          add2reg(base, disp);
+          (this->*classic)(reg, 0, index, base);
+          z_lgr(base, scratch);      // Restore base.
+        }
+      }
+    }
+  }
+}
+
+void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
+  if (is_double) {
+    mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
+  } else {
+    mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
+  }
+}
+
+// Optimized emitter for reg to mem operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register
+// (Z_R0 by default)
+// CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::reg2mem_opt(Register reg,
+                                 int64_t  disp,
+                                 Register index,
+                                 Register base,
+                                 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+                                 void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
+                                 Register scratch) {
+  index = (index == noreg) ? Z_R0 : index;
+  if (Displacement::is_shortDisp(disp)) {
+    (this->*classic)(reg, disp, index, base);
+  } else {
+    if (Displacement::is_validDisp(disp)) {
+      (this->*modern)(reg, disp, index, base);
+    } else {
+      if (scratch != Z_R0 && scratch != Z_R1) {
+        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
+      } else {
+        if (scratch != Z_R0) {   // scratch == Z_R1
+          if ((scratch == index) || (index == base)) {
+            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
+          } else {
+            add2reg(scratch, disp, base);
+            (this->*classic)(reg, 0, index, scratch);
+            if (base == scratch) {
+              add2reg(base, -disp);  // Restore base.
+            }
+          }
+        } else {   // scratch == Z_R0
+          if ((scratch == reg) || (scratch == base) || (reg == base)) {
+            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
+          } else {
+            z_lgr(scratch, base);
+            add2reg(base, disp);
+            (this->*classic)(reg, 0, index, base);
+            z_lgr(base, scratch);    // Restore base.
+          }
+        }
+      }
+    }
+  }
+}
+
+int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
+  int store_offset = offset();
+  if (is_double) {
+    reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
+  } else {
+    reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
+  }
+  return store_offset;
+}
+
+// Optimized emitter for mem to reg operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) will be used as work register where possible.
+void MacroAssembler::mem2reg_opt(Register reg,
+                                 int64_t  disp,
+                                 Register index,
+                                 Register base,
+                                 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+                                 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
+  index = (index == noreg) ? Z_R0 : index;
+  if (Displacement::is_shortDisp(disp)) {
+    (this->*classic)(reg, disp, index, base);
+  } else {
+    if (Displacement::is_validDisp(disp)) {
+      (this->*modern)(reg, disp, index, base);
+    } else {
+      if ((reg == index) && (reg == base)) {
+        z_sllg(reg, reg, 1);
+        add2reg(reg, disp);
+        (this->*classic)(reg, 0, noreg, reg);
+      } else if ((reg == index) && (reg != Z_R0)) {
+        add2reg(reg, disp);
+        (this->*classic)(reg, 0, reg, base);
+      } else if (reg == base) {
+        add2reg(reg, disp);
+        (this->*classic)(reg, 0, index, reg);
+      } else if (reg != Z_R0) {
+        add2reg(reg, disp, base);
+        (this->*classic)(reg, 0, index, reg);
+      } else { // reg == Z_R0 && reg != base here
+        add2reg(base, disp);
+        (this->*classic)(reg, 0, index, base);
+        add2reg(base, -disp);
+      }
+    }
+  }
+}
+
+void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
+  if (is_double) {
+    z_lg(reg, a);
+  } else {
+    mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
+  }
+}
+
+void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
+  mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
+}
+
+void MacroAssembler::and_imm(Register r, long mask,
+                             Register tmp /* = Z_R0 */,
+                             bool wide    /* = false */) {
+  assert(wide || Immediate::is_simm32(mask), "mask value too large");
+
+  if (!wide) {
+    z_nilf(r, mask);
+    return;
+  }
+
+  assert(r != tmp, " need a different temporary register !");
+  load_const_optimized(tmp, mask);
+  z_ngr(r, tmp);
+}
+
+// Calculate the 1's complement.
+// Note: The condition code is neither preserved nor correctly set by this code!!!
+// Note: (wide == false) does not protect the high order half of the target register
+//       from alteration. It only serves as optimization hint for 32-bit results.
+void MacroAssembler::not_(Register r1, Register r2, bool wide) {
+
+  if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
+    z_xilf(r1, -1);
+    if (wide) {
+      z_xihf(r1, -1);
+    }
+  } else { // Distinct src and dst registers.
+    if (VM_Version::has_DistinctOpnds()) {
+      load_const_optimized(r1, -1);
+      z_xgrk(r1, r2, r1);
+    } else {
+      if (wide) {
+        z_lgr(r1, r2);
+        z_xilf(r1, -1);
+        z_xihf(r1, -1);
+      } else {
+        z_lr(r1, r2);
+        z_xilf(r1, -1);
+      }
+    }
+  }
+}
+
+unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
+  assert(lBitPos >=  0,      "zero is  leftmost bit position");
+  assert(rBitPos <= 63,      "63   is rightmost bit position");
+  assert(lBitPos <= rBitPos, "inverted selection interval");
+  return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
+}
+
+// Helper function for the "Rotate_then_<logicalOP>" emitters.
+// Rotate src, then mask register contents such that only bits in range survive.
+// For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
+// For oneBits == true,  all bits not in range are set to 1. Useful for preserving all bits outside range.
+// The caller must ensure that the selected range only contains bits with defined value.
+void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
+                                      int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
+  assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
+  bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
+  bool srl4rll = (nRotate <  0) && (-nRotate <= lBitPos);     // Substitute SRL(G) for RLL(G).
+  //  Pre-determine which parts of dst will be zero after shift/rotate.
+  bool llZero  =  sll4rll && (nRotate >= 16);
+  bool lhZero  = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
+  bool lfZero  = llZero && lhZero;
+  bool hlZero  = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
+  bool hhZero  =                                 (srl4rll && (nRotate <= -16));
+  bool hfZero  = hlZero && hhZero;
+
+  // rotate then mask src operand.
+  // if oneBits == true,  all bits outside selected range are 1s.
+  // if oneBits == false, all bits outside selected range are 0s.
+  if (src32bit) {   // There might be garbage in the upper 32 bits which will get masked away.
+    if (dst32bit) {
+      z_rll(dst, src, nRotate);   // Copy and rotate, upper half of reg remains undisturbed.
+    } else {
+      if      (sll4rll) { z_sllg(dst, src,  nRotate); }
+      else if (srl4rll) { z_srlg(dst, src, -nRotate); }
+      else              { z_rllg(dst, src,  nRotate); }
+    }
+  } else {
+    if      (sll4rll) { z_sllg(dst, src,  nRotate); }
+    else if (srl4rll) { z_srlg(dst, src, -nRotate); }
+    else              { z_rllg(dst, src,  nRotate); }
+  }
+
+  unsigned long  range_mask    = create_mask(lBitPos, rBitPos);
+  unsigned int   range_mask_h  = (unsigned int)(range_mask >> 32);
+  unsigned int   range_mask_l  = (unsigned int)range_mask;
+  unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
+  unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
+  unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
+  unsigned short range_mask_ll = (unsigned short)range_mask;
+  // Works for z9 and newer H/W.
+  if (oneBits) {
+    if ((~range_mask_l) != 0)                { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
+    if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
+  } else {
+    // All bits outside range become 0s
+    if (((~range_mask_l) != 0) &&              !lfZero) {
+      z_nilf(dst, range_mask_l);
+    }
+    if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
+      z_nihf(dst, range_mask_h);
+    }
+  }
+}
+
+// Rotate src, then insert selected range from rotated src into dst.
+// Clear dst before, if requested.
+void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
+                                        int nRotate, bool clear_dst) {
+  // This version does not depend on src being zero-extended int2long.
+  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
+  z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
+}
+
+// Rotate src, then and selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
+                                     int nRotate, bool test_only) {
+  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+  // This version does not depend on src being zero-extended int2long.
+  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
+  z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+// Rotate src, then or selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_or(Register dst, Register src,  int  lBitPos,  int  rBitPos,
+                                    int nRotate, bool test_only) {
+  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+  // This version does not depend on src being zero-extended int2long.
+  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
+  z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+// Rotate src, then xor selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_xor(Register dst, Register src,  int  lBitPos,  int  rBitPos,
+                                     int nRotate, bool test_only) {
+  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+    // This version does not depend on src being zero-extended int2long.
+  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
+  z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
+  if (inc.is_register()) {
+    z_agr(r1, inc.as_register());
+  } else { // constant
+    intptr_t imm = inc.as_constant();
+    add2reg(r1, imm);
+  }
+}
+// Helper function to multiply the 64bit contents of a register by a 16bit constant.
+// The optimization tries to avoid the mghi instruction, since it uses the FPU for
+// calculation and is thus rather slow.
+//
+// There is no handling for special cases, e.g. cval==0 or cval==1.
+//
+// Returns len of generated code block.
+unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
+  int block_start = offset();
+
+  bool sign_flip = cval < 0;
+  cval = sign_flip ? -cval : cval;
+
+  BLOCK_COMMENT("Reg64*Con16 {");
+
+  int bit1 = cval & -cval;
+  if (bit1 == cval) {
+    z_sllg(rval, rval, exact_log2(bit1));
+    if (sign_flip) { z_lcgr(rval, rval); }
+  } else {
+    int bit2 = (cval-bit1) & -(cval-bit1);
+    if ((bit1+bit2) == cval) {
+      z_sllg(work, rval, exact_log2(bit1));
+      z_sllg(rval, rval, exact_log2(bit2));
+      z_agr(rval, work);
+      if (sign_flip) { z_lcgr(rval, rval); }
+    } else {
+      if (sign_flip) { z_mghi(rval, -cval); }
+      else           { z_mghi(rval,  cval); }
+    }
+  }
+  BLOCK_COMMENT("} Reg64*Con16");
+
+  int block_end = offset();
+  return block_end - block_start;
+}
+
+// Generic operation r1 := r2 + imm.
+//
+// Should produce the best code for each supported CPU version.
+// r2 == noreg yields r1 := r1 + imm
+// imm == 0 emits either no instruction or r1 := r2 !
+// NOTES: 1) Don't use this function where fixed sized
+//           instruction sequences are required!!!
+//        2) Don't use this function if condition code
+//           setting is required!
+//        3) Despite being declared as int64_t, the parameter imm
+//           must be a simm_32 value (= signed 32-bit integer).
+void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
+  assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
+
+  if (r2 == noreg) { r2 = r1; }
+
+  // Handle special case imm == 0.
+  if (imm == 0) {
+    lgr_if_needed(r1, r2);
+    // Nothing else to do.
+    return;
+  }
+
+  if (!PreferLAoverADD || (r2 == Z_R0)) {
+    bool distinctOpnds = VM_Version::has_DistinctOpnds();
+
+    // Can we encode imm in 16 bits signed?
+    if (Immediate::is_simm16(imm)) {
+      if (r1 == r2) {
+        z_aghi(r1, imm);
+        return;
+      }
+      if (distinctOpnds) {
+        z_aghik(r1, r2, imm);
+        return;
+      }
+      z_lgr(r1, r2);
+      z_aghi(r1, imm);
+      return;
+    }
+  } else {
+    // Can we encode imm in 12 bits unsigned?
+    if (Displacement::is_shortDisp(imm)) {
+      z_la(r1, imm, r2);
+      return;
+    }
+    // Can we encode imm in 20 bits signed?
+    if (Displacement::is_validDisp(imm)) {
+      // Always use LAY instruction, so we don't need the tmp register.
+      z_lay(r1, imm, r2);
+      return;
+    }
+
+  }
+
+  // Can handle it (all possible values) with long immediates.
+  lgr_if_needed(r1, r2);
+  z_agfi(r1, imm);
+}
+
+// Generic operation r := b + x + d
+//
+// Addition of several operands with address generation semantics - sort of:
+//  - no restriction on the registers. Any register will do for any operand.
+//  - x == noreg: operand will be disregarded.
+//  - b == noreg: will use (contents of) result reg as operand (r := r + d).
+//  - x == Z_R0:  just disregard
+//  - b == Z_R0:  use as operand. This is not address generation semantics!!!
+//
+// The same restrictions as on add2reg() are valid!!!
+void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
+  assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
+
+  if (x == noreg) { x = Z_R0; }
+  if (b == noreg) { b = r; }
+
+  // Handle special case x == R0.
+  if (x == Z_R0) {
+    // Can simply add the immediate value to the base register.
+    add2reg(r, d, b);
+    return;
+  }
+
+  if (!PreferLAoverADD || (b == Z_R0)) {
+    bool distinctOpnds = VM_Version::has_DistinctOpnds();
+    // Handle special case d == 0.
+    if (d == 0) {
+      if (b == x)        { z_sllg(r, b, 1); return; }
+      if (r == x)        { z_agr(r, b);     return; }
+      if (r == b)        { z_agr(r, x);     return; }
+      if (distinctOpnds) { z_agrk(r, x, b); return; }
+      z_lgr(r, b);
+      z_agr(r, x);
+    } else {
+      if (x == b)             { z_sllg(r, x, 1); }
+      else if (r == x)        { z_agr(r, b); }
+      else if (r == b)        { z_agr(r, x); }
+      else if (distinctOpnds) { z_agrk(r, x, b); }
+      else {
+        z_lgr(r, b);
+        z_agr(r, x);
+      }
+      add2reg(r, d);
+    }
+  } else {
+    // Can we encode imm in 12 bits unsigned?
+    if (Displacement::is_shortDisp(d)) {
+      z_la(r, d, x, b);
+      return;
+    }
+    // Can we encode imm in 20 bits signed?
+    if (Displacement::is_validDisp(d)) {
+      z_lay(r, d, x, b);
+      return;
+    }
+    z_la(r, 0, x, b);
+    add2reg(r, d);
+  }
+}
+
+// Generic emitter (32bit) for direct memory increment.
+// For optimal code, do not specify Z_R0 as temp register.
+void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
+  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
+    z_asi(a, imm);
+  } else {
+    z_lgf(tmp, a);
+    add2reg(tmp, imm);
+    z_st(tmp, a);
+  }
+}
+
+void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
+  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
+    z_agsi(a, imm);
+  } else {
+    z_lg(tmp, a);
+    add2reg(tmp, imm);
+    z_stg(tmp, a);
+  }
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
+  switch (size_in_bytes) {
+    case  8: z_lg(dst, src); break;
+    case  4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
+    case  2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
+    case  1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
+    default: ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
+  switch (size_in_bytes) {
+    case  8: z_stg(src, dst); break;
+    case  4: z_st(src, dst); break;
+    case  2: z_sth(src, dst); break;
+    case  1: z_stc(src, dst); break;
+    default: ShouldNotReachHere();
+  }
+}
+
+// Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
+// a high-order summand in register tmp.
+//
+// return value: <  0: No split required, si20 actually has property uimm12.
+//               >= 0: Split performed. Use return value as uimm12 displacement and
+//                     tmp as index register.
+int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
+  assert(Immediate::is_simm20(si20_offset), "sanity");
+  int lg_off = (int)si20_offset &  0x0fff; // Punch out low-order 12 bits, always positive.
+  int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
+  assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
+         !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
+  assert((lg_off+ll_off) == si20_offset, "offset splitup error");
+
+  Register work = accumulate? Z_R0 : tmp;
+
+  if (fixed_codelen) {          // Len of code = 10 = 4 + 6.
+    z_lghi(work, ll_off>>12);   // Implicit sign extension.
+    z_slag(work, work, 12);
+  } else {                      // Len of code = 0..10.
+    if (ll_off == 0) { return -1; }
+    // ll_off has 8 significant bits (at most) plus sign.
+    if ((ll_off & 0x0000f000) == 0) {    // Non-zero bits only in upper halfbyte.
+      z_llilh(work, ll_off >> 16);
+      if (ll_off < 0) {                  // Sign-extension required.
+        z_lgfr(work, work);
+      }
+    } else {
+      if ((ll_off & 0x000f0000) == 0) {  // Non-zero bits only in lower halfbyte.
+        z_llill(work, ll_off);
+      } else {                           // Non-zero bits in both halfbytes.
+        z_lghi(work, ll_off>>12);        // Implicit sign extension.
+        z_slag(work, work, 12);
+      }
+    }
+  }
+  if (accumulate) { z_algr(tmp, work); } // len of code += 4
+  return lg_off;
+}
+
+void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
+  if (Displacement::is_validDisp(si20)) {
+    z_ley(t, si20, a);
+  } else {
+    // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
+    // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
+    // pool loads).
+    bool accumulate    = true;
+    bool fixed_codelen = true;
+    Register work;
+
+    if (fixed_codelen) {
+      z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
+    } else {
+      accumulate = (a == tmp);
+    }
+    work = tmp;
+
+    int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
+    if (disp12 < 0) {
+      z_le(t, si20, work);
+    } else {
+      if (accumulate) {
+        z_le(t, disp12, work);
+      } else {
+        z_le(t, disp12, work, a);
+      }
+    }
+  }
+}
+
+void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
+  if (Displacement::is_validDisp(si20)) {
+    z_ldy(t, si20, a);
+  } else {
+    // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
+    // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
+    // pool loads).
+    bool accumulate    = true;
+    bool fixed_codelen = true;
+    Register work;
+
+    if (fixed_codelen) {
+      z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
+    } else {
+      accumulate = (a == tmp);
+    }
+    work = tmp;
+
+    int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
+    if (disp12 < 0) {
+      z_ld(t, si20, work);
+    } else {
+      if (accumulate) {
+        z_ld(t, disp12, work);
+      } else {
+        z_ld(t, disp12, work, a);
+      }
+    }
+  }
+}
+
+// PCrelative TOC access.
+// Returns distance (in bytes) from current position to start of consts section.
+// Returns 0 (zero) if no consts section exists or if it has size zero.
+long MacroAssembler::toc_distance() {
+  CodeSection* cs = code()->consts();
+  return (long)((cs != NULL) ? cs->start()-pc() : 0);
+}
+
+// Implementation on x86/sparc assumes that constant and instruction section are
+// adjacent, but this doesn't hold. Two special situations may occur, that we must
+// be able to handle:
+//   1. const section may be located apart from the inst section.
+//   2. const section may be empty
+// In both cases, we use the const section's start address to compute the "TOC",
+// this seems to occur only temporarily; in the final step we always seem to end up
+// with the pc-relatice variant.
+//
+// PC-relative offset could be +/-2**32 -> use long for disp
+// Furthermore: makes no sense to have special code for
+// adjacent const and inst sections.
+void MacroAssembler::load_toc(Register Rtoc) {
+  // Simply use distance from start of const section (should be patched in the end).
+  long disp = toc_distance();
+
+  RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
+  relocate(rspec);
+  z_larl(Rtoc, RelAddr::pcrel_off32(disp));  // Offset is in halfwords.
+}
+
+// PCrelative TOC access.
+// Load from anywhere pcrelative (with relocation of load instr)
+void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
+  address          pc             = this->pc();
+  ptrdiff_t        total_distance = dataLocation - pc;
+  RelocationHolder rspec          = internal_word_Relocation::spec(dataLocation);
+
+  assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
+  assert(total_distance != 0, "sanity");
+
+  // Some extra safety net.
+  if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+    guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
+  }
+
+  (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+  z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
+}
+
+
+// PCrelative TOC access.
+// Load from anywhere pcrelative (with relocation of load instr)
+// loaded addr has to be relocated when added to constant pool.
+void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
+  address          pc             = this->pc();
+  ptrdiff_t        total_distance = addrLocation - pc;
+  RelocationHolder rspec          = internal_word_Relocation::spec(addrLocation);
+
+  assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
+
+  // Some extra safety net.
+  if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+    guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
+  }
+
+  (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+  z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
+}
+
+// Generic operation: load a value from memory and test.
+// CondCode indicates the sign (<0, ==0, >0) of the loaded value.
+void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
+  z_lb(dst, a);
+  z_ltr(dst, dst);
+}
+
+void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
+  int64_t disp = a.disp20();
+  if (Displacement::is_shortDisp(disp)) {
+    z_lh(dst, a);
+  } else if (Displacement::is_longDisp(disp)) {
+    z_lhy(dst, a);
+  } else {
+    guarantee(false, "displacement out of range");
+  }
+  z_ltr(dst, dst);
+}
+
+void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
+  z_lt(dst, a);
+}
+
+void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
+  z_ltgf(dst, a);
+}
+
+void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
+  z_ltg(dst, a);
+}
+
+// Test a bit in memory.
+void MacroAssembler::testbit(const Address &a, unsigned int bit) {
+  assert(a.index() == noreg, "no index reg allowed in testbit");
+  if (bit <= 7) {
+    z_tm(a.disp() + 3, a.base(), 1 << bit);
+  } else if (bit <= 15) {
+    z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
+  } else if (bit <= 23) {
+    z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
+  } else if (bit <= 31) {
+    z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+// Test a bit in a register. Result is reflected in CC.
+void MacroAssembler::testbit(Register r, unsigned int bitPos) {
+  if (bitPos < 16) {
+    z_tmll(r, 1U<<bitPos);
+  } else if (bitPos < 32) {
+    z_tmlh(r, 1U<<(bitPos-16));
+  } else if (bitPos < 48) {
+    z_tmhl(r, 1U<<(bitPos-32));
+  } else if (bitPos < 64) {
+    z_tmhh(r, 1U<<(bitPos-48));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+// Clear a register, i.e. load const zero into reg.
+// Return len (in bytes) of generated instruction(s).
+// whole_reg: Clear 64 bits if true, 32 bits otherwise.
+// set_cc:    Use instruction that sets the condition code, if true.
+int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
+  unsigned int start_off = offset();
+  if (whole_reg) {
+    set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
+  } else {  // Only 32bit register.
+    set_cc ? z_xr(r, r) : z_lhi(r, 0);
+  }
+  return offset() - start_off;
+}
+
+#ifdef ASSERT
+int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
+  switch (pattern_len) {
+    case 1:
+      pattern = (pattern & 0x000000ff)  | ((pattern & 0x000000ff)<<8);
+    case 2:
+      pattern = (pattern & 0x0000ffff)  | ((pattern & 0x0000ffff)<<16);
+    case 4:
+      pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
+    case 8:
+      return load_const_optimized_rtn_len(r, pattern, true);
+      break;
+    default:
+      guarantee(false, "preset_reg: bad len");
+  }
+  return 0;
+}
+#endif
+
+// addr: Address descriptor of memory to clear index register will not be used !
+// size: Number of bytes to clear.
+//    !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
+//    !!! Use store_const() instead                  !!!
+void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
+  guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
+
+  if (size == 1) {
+    z_mvi(addr, 0);
+    return;
+  }
+
+  switch (size) {
+    case 2: z_mvhhi(addr, 0);
+      return;
+    case 4: z_mvhi(addr, 0);
+      return;
+    case 8: z_mvghi(addr, 0);
+      return;
+    default: ; // Fallthru to xc.
+  }
+
+  z_xc(addr, size, addr);
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) z_nop();
+}
+
+// Special version for non-relocateable code if required alignment
+// is larger than CodeEntryAlignment.
+void MacroAssembler::align_address(int modulus) {
+  while ((uintptr_t)pc() % modulus != 0) z_nop();
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         Register temp_reg,
+                                         int64_t extra_slot_offset) {
+  // On Z, we can have index and disp in an Address. So don't call argument_offset,
+  // which issues an unnecessary add instruction.
+  int stackElementSize = Interpreter::stackElementSize;
+  int64_t offset = extra_slot_offset * stackElementSize;
+  const Register argbase = Z_esp;
+  if (arg_slot.is_constant()) {
+    offset += arg_slot.as_constant() * stackElementSize;
+    return Address(argbase, offset);
+  }
+  // else
+  assert(temp_reg != noreg, "must specify");
+  assert(temp_reg != Z_ARG1, "base and index are conflicting");
+  z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
+  return Address(argbase, temp_reg, offset);
+}
+
+
+//===================================================================
+//===   START   C O N S T A N T S   I N   C O D E   S T R E A M   ===
+//===================================================================
+//===            P A T CH A B L E   C O N S T A N T S             ===
+//===================================================================
+
+
+//---------------------------------------------------
+//  Load (patchable) constant into register
+//---------------------------------------------------
+
+
+// Load absolute address (and try to optimize).
+//   Note: This method is usable only for position-fixed code,
+//         referring to a position-fixed target location.
+//         If not so, relocations and patching must be used.
+void MacroAssembler::load_absolute_address(Register d, address addr) {
+  assert(addr != NULL, "should not happen");
+  BLOCK_COMMENT("load_absolute_address:");
+  if (addr == NULL) {
+    z_larl(d, pc()); // Dummy emit for size calc.
+    return;
+  }
+
+  if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
+    z_larl(d, addr);
+    return;
+  }
+
+  load_const_optimized(d, (long)addr);
+}
+
+// Load a 64bit constant.
+// Patchable code sequence, but not atomically patchable.
+// Make sure to keep code size constant -> no value-dependent optimizations.
+// Do not kill condition code.
+void MacroAssembler::load_const(Register t, long x) {
+  Assembler::z_iihf(t, (int)(x >> 32));
+  Assembler::z_iilf(t, (int)(x & 0xffffffff));
+}
+
+// Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
+// Patchable code sequence, but not atomically patchable.
+// Make sure to keep code size constant -> no value-dependent optimizations.
+// Do not kill condition code.
+void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
+  if (sign_extend) { Assembler::z_lgfi(t, x); }
+  else             { Assembler::z_llilf(t, x); }
+}
+
+// Load narrow oop constant, no decompression.
+void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
+  assert(UseCompressedOops, "must be on to call this method");
+  load_const_32to64(t, a, false /*sign_extend*/);
+}
+
+// Load narrow klass constant, compression required.
+void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
+  assert(UseCompressedClassPointers, "must be on to call this method");
+  narrowKlass encoded_k = Klass::encode_klass(k);
+  load_const_32to64(t, encoded_k, false /*sign_extend*/);
+}
+
+//------------------------------------------------------
+//  Compare (patchable) constant with register.
+//------------------------------------------------------
+
+// Compare narrow oop in reg with narrow oop constant, no decompression.
+void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
+  assert(UseCompressedOops, "must be on to call this method");
+
+  Assembler::z_clfi(oop1, oop2);
+}
+
+// Compare narrow oop in reg with narrow oop constant, no decompression.
+void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
+  assert(UseCompressedClassPointers, "must be on to call this method");
+  narrowKlass encoded_k = Klass::encode_klass(klass2);
+
+  Assembler::z_clfi(klass1, encoded_k);
+}
+
+//----------------------------------------------------------
+//  Check which kind of load_constant we have here.
+//----------------------------------------------------------
+
+// Detection of CPU version dependent load_const sequence.
+// The detection is valid only for code sequences generated by load_const,
+// not load_const_optimized.
+bool MacroAssembler::is_load_const(address a) {
+  unsigned long inst1, inst2;
+  unsigned int  len1,  len2;
+
+  len1 = get_instruction(a, &inst1);
+  len2 = get_instruction(a + len1, &inst2);
+
+  return is_z_iihf(inst1) && is_z_iilf(inst2);
+}
+
+// Detection of CPU version dependent load_const_32to64 sequence.
+// Mostly used for narrow oops and narrow Klass pointers.
+// The detection is valid only for code sequences generated by load_const_32to64.
+bool MacroAssembler::is_load_const_32to64(address pos) {
+  unsigned long inst1, inst2;
+  unsigned int len1;
+
+  len1 = get_instruction(pos, &inst1);
+  return is_z_llilf(inst1);
+}
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
+bool MacroAssembler::is_compare_immediate32(address pos) {
+  return is_equal(pos, CLFI_ZOPC, RIL_MASK);
+}
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
+bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
+  return is_compare_immediate32(pos);
+  }
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
+bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
+  return is_compare_immediate32(pos);
+}
+
+//-----------------------------------
+//  patch the load_constant
+//-----------------------------------
+
+// CPU-version dependend patching of load_const.
+void MacroAssembler::patch_const(address a, long x) {
+  assert(is_load_const(a), "not a load of a constant");
+  set_imm32((address)a, (int) ((x >> 32) & 0xffffffff));
+  set_imm32((address)(a + 6), (int)(x & 0xffffffff));
+}
+
+// Patching the value of CPU version dependent load_const_32to64 sequence.
+// The passed ptr MUST be in compressed format!
+int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
+  assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
+
+  set_imm32(pos, np);
+  return 6;
+}
+
+// Patching the value of CPU version dependent compare_immediate_narrow sequence.
+// The passed ptr MUST be in compressed format!
+int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
+  assert(is_compare_immediate32(pos), "not a compressed ptr compare");
+
+  set_imm32(pos, np);
+  return 6;
+}
+
+// Patching the immediate value of CPU version dependent load_narrow_oop sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
+  assert(UseCompressedOops, "Can only patch compressed oops");
+
+  narrowOop no = oopDesc::encode_heap_oop(o);
+  return patch_load_const_32to64(pos, no);
+}
+
+// Patching the immediate value of CPU version dependent load_narrow_klass sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
+  assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
+
+  narrowKlass nk = Klass::encode_klass(k);
+  return patch_load_const_32to64(pos, nk);
+}
+
+// Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
+  assert(UseCompressedOops, "Can only patch compressed oops");
+
+  narrowOop no = oopDesc::encode_heap_oop(o);
+  return patch_compare_immediate_32(pos, no);
+}
+
+// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
+  assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
+
+  narrowKlass nk = Klass::encode_klass(k);
+  return patch_compare_immediate_32(pos, nk);
+}
+
+//------------------------------------------------------------------------
+//  Extract the constant from a load_constant instruction stream.
+//------------------------------------------------------------------------
+
+// Get constant from a load_const sequence.
+long MacroAssembler::get_const(address a) {
+  assert(is_load_const(a), "not a load of a constant");
+  unsigned long x;
+  x =  (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
+  x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
+  return (long) x;
+}
+
+//--------------------------------------
+//  Store a constant in memory.
+//--------------------------------------
+
+// General emitter to move a constant to memory.
+// The store is atomic.
+//  o Address must be given in RS format (no index register)
+//  o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
+//  o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
+//  o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
+//  o Memory slot must be at least as wide as constant, will assert otherwise.
+//  o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
+int MacroAssembler::store_const(const Address &dest, long imm,
+                                unsigned int lm, unsigned int lc,
+                                Register scratch) {
+  int64_t  disp = dest.disp();
+  Register base = dest.base();
+  assert(!dest.has_index(), "not supported");
+  assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory   length not supported");
+  assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
+  assert(lm>=lc, "memory slot too small");
+  assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
+  assert(Displacement::is_validDisp(disp), "displacement out of range");
+
+  bool is_shortDisp = Displacement::is_shortDisp(disp);
+  int store_offset = -1;
+
+  // For target len == 1 it's easy.
+  if (lm == 1) {
+    store_offset = offset();
+    if (is_shortDisp) {
+      z_mvi(disp, base, imm);
+      return store_offset;
+    } else {
+      z_mviy(disp, base, imm);
+      return store_offset;
+    }
+  }
+
+  // All the "good stuff" takes an unsigned displacement.
+  if (is_shortDisp) {
+    // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
+
+    store_offset = offset();
+    switch (lm) {
+      case 2:  // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
+        z_mvhhi(disp, base, imm);
+        return store_offset;
+      case 4:
+        if (Immediate::is_simm16(imm)) {
+          z_mvhi(disp, base, imm);
+          return store_offset;
+        }
+        break;
+      case 8:
+        if (Immediate::is_simm16(imm)) {
+          z_mvghi(disp, base, imm);
+          return store_offset;
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+    }
+  }
+
+  //  Can't optimize, so load value and store it.
+  guarantee(scratch != noreg, " need a scratch register here !");
+  if (imm != 0) {
+    load_const_optimized(scratch, imm);  // Preserves CC anyway.
+  } else {
+    // Leave CC alone!!
+    (void) clear_reg(scratch, true, false); // Indicate unused result.
+  }
+
+  store_offset = offset();
+  if (is_shortDisp) {
+    switch (lm) {
+      case 2:
+        z_sth(scratch, disp, Z_R0, base);
+        return store_offset;
+      case 4:
+        z_st(scratch, disp, Z_R0, base);
+        return store_offset;
+      case 8:
+        z_stg(scratch, disp, Z_R0, base);
+        return store_offset;
+      default:
+        ShouldNotReachHere();
+        break;
+    }
+  } else {
+    switch (lm) {
+      case 2:
+        z_sthy(scratch, disp, Z_R0, base);
+        return store_offset;
+      case 4:
+        z_sty(scratch, disp, Z_R0, base);
+        return store_offset;
+      case 8:
+        z_stg(scratch, disp, Z_R0, base);
+        return store_offset;
+      default:
+        ShouldNotReachHere();
+        break;
+    }
+  }
+  return -1; // should not reach here
+}
+
+//===================================================================
+//===       N O T   P A T CH A B L E   C O N S T A N T S          ===
+//===================================================================
+
+// Load constant x into register t with a fast instrcution sequence
+// depending on the bits in x. Preserves CC under all circumstances.
+int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
+  if (x == 0) {
+    int len;
+    if (emit) {
+      len = clear_reg(t, true, false);
+    } else {
+      len = 4;
+    }
+    return len;
+  }
+
+  if (Immediate::is_simm16(x)) {
+    if (emit) { z_lghi(t, x); }
+    return 4;
+  }
+
+  // 64 bit value: | part1 | part2 | part3 | part4 |
+  // At least one part is not zero!
+  int part1 = ((x >> 32) & 0xffff0000) >> 16;
+  int part2 = (x >> 32) & 0x0000ffff;
+  int part3 = (x & 0xffff0000) >> 16;
+  int part4 = (x & 0x0000ffff);
+
+  // Lower word only (unsigned).
+  if ((part1 == 0) && (part2 == 0)) {
+    if (part3 == 0) {
+      if (emit) z_llill(t, part4);
+      return 4;
+    }
+    if (part4 == 0) {
+      if (emit) z_llilh(t, part3);
+      return 4;
+    }
+    if (emit) z_llilf(t, (int)(x & 0xffffffff));
+    return 6;
+  }
+
+  // Upper word only.
+  if ((part3 == 0) && (part4 == 0)) {
+    if (part1 == 0) {
+      if (emit) z_llihl(t, part2);
+      return 4;
+    }
+    if (part2 == 0) {
+      if (emit) z_llihh(t, part1);
+      return 4;
+    }
+    if (emit) z_llihf(t, (int)(x >> 32));
+    return 6;
+  }
+
+  // Lower word only (signed).
+  if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
+    if (emit) z_lgfi(t, (int)(x & 0xffffffff));
+    return 6;
+  }
+
+  int len = 0;
+
+  if ((part1 == 0) || (part2 == 0)) {
+    if (part1 == 0) {
+      if (emit) z_llihl(t, part2);
+      len += 4;
+    } else {
+      if (emit) z_llihh(t, part1);
+      len += 4;
+    }
+  } else {
+    if (emit) z_llihf(t, (int)(x >> 32));
+    len += 6;
+  }
+
+  if ((part3 == 0) || (part4 == 0)) {
+    if (part3 == 0) {
+      if (emit) z_iill(t, part4);
+      len += 4;
+    } else {
+      if (emit) z_iilh(t, part3);
+      len += 4;
+    }
+  } else {
+    if (emit) z_iilf(t, (int)(x & 0xffffffff));
+    len += 6;
+  }
+  return len;
+}
+
+//=====================================================================
+//===     H I G H E R   L E V E L   B R A N C H   E M I T T E R S   ===
+//=====================================================================
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+  // Right operand is constant.
+  if (x2.is_constant()) {
+    jlong value = x2.as_constant();
+    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
+    return;
+  }
+
+  // Right operand is in register.
+  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
+}
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+  // Right operand is constant.
+  if (x2.is_constant()) {
+    jlong value = x2.as_constant();
+    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
+    return;
+  }
+
+  // Right operand is in register.
+  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
+}
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+  // Right operand is constant.
+  if (x2.is_constant()) {
+    jlong value = x2.as_constant();
+    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
+    return;
+  }
+
+  // Right operand is in register.
+  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
+}
+
+void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+  // Right operand is constant.
+  if (x2.is_constant()) {
+    jlong value = x2.as_constant();
+    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
+    return;
+  }
+
+  // Right operand is in register.
+  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
+}
+
+// Generate an optimal branch to the branch target.
+// Optimal means that a relative branch (brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Used registers:
+//   Z_R1 - work reg. Holds branch target address.
+//          Used in fallback case only.
+//
+// This version of branch_optimized is good for cases where the target address is known
+// and constant, i.e. is never changed (no relocation, no patching).
+void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
+  address branch_origin = pc();
+
+  if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
+    z_brc(cond, branch_addr);
+  } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
+    z_brcl(cond, branch_addr);
+  } else {
+    load_const_optimized(Z_R1, branch_addr);  // CC must not get killed by load_const_optimized.
+    z_bcr(cond, Z_R1);
+  }
+}
+
+// This version of branch_optimized is good for cases where the target address
+// is potentially not yet known at the time the code is emitted.
+//
+// One very common case is a branch to an unbound label which is handled here.
+// The caller might know (or hope) that the branch distance is short enough
+// to be encoded in a 16bit relative address. In this case he will pass a
+// NearLabel branch_target.
+// Care must be taken with unbound labels. Each call to target(label) creates
+// an entry in the patch queue for that label to patch all references of the label
+// once it gets bound. Those recorded patch locations must be patchable. Otherwise,
+// an assertion fires at patch time.
+void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
+  if (branch_target.is_bound()) {
+    address branch_addr = target(branch_target);
+    branch_optimized(cond, branch_addr);
+  } else {
+    z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
+  }
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+//   r1 - left compare operand
+//   r2 - right compare operand
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+                                                  Register r2,
+                                                  Assembler::branch_condition cond,
+                                                  address  branch_addr,
+                                                  bool     len64,
+                                                  bool     has_sign) {
+  unsigned int casenum = (len64?2:0)+(has_sign?0:1);
+
+  address branch_origin = pc();
+  if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
+    switch (casenum) {
+      case 0: z_crj( r1, r2, cond, branch_addr); break;
+      case 1: z_clrj (r1, r2, cond, branch_addr); break;
+      case 2: z_cgrj(r1, r2, cond, branch_addr); break;
+      case 3: z_clgrj(r1, r2, cond, branch_addr); break;
+      default: ShouldNotReachHere(); break;
+    }
+  } else {
+    switch (casenum) {
+      case 0: z_cr( r1, r2); break;
+      case 1: z_clr(r1, r2); break;
+      case 2: z_cgr(r1, r2); break;
+      case 3: z_clgr(r1, r2); break;
+      default: ShouldNotReachHere(); break;
+    }
+    branch_optimized(cond, branch_addr);
+  }
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgij, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+//   r1 - left compare operand (in register)
+//   x2 - right compare operand (immediate)
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+                                                  jlong    x2,
+                                                  Assembler::branch_condition cond,
+                                                  Label&   branch_target,
+                                                  bool     len64,
+                                                  bool     has_sign) {
+  address      branch_origin = pc();
+  bool         x2_imm8       = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
+  bool         is_RelAddr16  = (branch_target.is_bound() &&
+                                RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
+  unsigned int casenum       = (len64?2:0)+(has_sign?0:1);
+
+  if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
+    switch (casenum) {
+      case 0: z_cij( r1, x2, cond, branch_target); break;
+      case 1: z_clij(r1, x2, cond, branch_target); break;
+      case 2: z_cgij(r1, x2, cond, branch_target); break;
+      case 3: z_clgij(r1, x2, cond, branch_target); break;
+      default: ShouldNotReachHere(); break;
+    }
+    return;
+  }
+
+  if (x2 == 0) {
+    switch (casenum) {
+      case 0: z_ltr(r1, r1); break;
+      case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
+      case 2: z_ltgr(r1, r1); break;
+      case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
+      default: ShouldNotReachHere(); break;
+    }
+  } else {
+    if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
+      switch (casenum) {
+        case 0: z_chi(r1, x2); break;
+        case 1: z_chi(r1, x2); break; // positive immediate < 2**15
+        case 2: z_cghi(r1, x2); break;
+        case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
+        default: break;
+      }
+    } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
+      switch (casenum) {
+        case 0: z_cfi( r1, x2); break;
+        case 1: z_clfi(r1, x2); break;
+        case 2: z_cgfi(r1, x2); break;
+        case 3: z_clgfi(r1, x2); break;
+        default: ShouldNotReachHere(); break;
+      }
+    } else {
+      // No instruction with immediate operand possible, so load into register.
+      Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
+      load_const_optimized(scratch, x2);
+      switch (casenum) {
+        case 0: z_cr( r1, scratch); break;
+        case 1: z_clr(r1, scratch); break;
+        case 2: z_cgr(r1, scratch); break;
+        case 3: z_clgr(r1, scratch); break;
+        default: ShouldNotReachHere(); break;
+      }
+    }
+  }
+  branch_optimized(cond, branch_target);
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+//   r1 - left compare operand
+//   r2 - right compare operand
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+                                                  Register r2,
+                                                  Assembler::branch_condition cond,
+                                                  Label&   branch_target,
+                                                  bool     len64,
+                                                  bool     has_sign) {
+  unsigned int casenum = (len64?2:0)+(has_sign?0:1);
+
+  if (branch_target.is_bound()) {
+    address branch_addr = target(branch_target);
+    compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
+  } else {
+    {
+      switch (casenum) {
+        case 0: z_cr( r1, r2); break;
+        case 1: z_clr(r1, r2); break;
+        case 2: z_cgr(r1, r2); break;
+        case 3: z_clgr(r1, r2); break;
+        default: ShouldNotReachHere(); break;
+      }
+      branch_optimized(cond, branch_target);
+    }
+  }
+}
+
+//===========================================================================
+//===   END     H I G H E R   L E V E L   B R A N C H   E M I T T E R S   ===
+//===========================================================================
+
+AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int index = oop_recorder()->allocate_metadata_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->allocate_oop_index(obj);
+  return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+// NOTE: destroys r
+void MacroAssembler::c2bool(Register r, Register t) {
+  z_lcr(t, r);   // t = -r
+  z_or(r, t);    // r = -r OR r
+  z_srl(r, 31);  // Yields 0 if r was 0, 1 otherwise.
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0) {
+    return RegisterOrConstant(value + offset);
+  }
+
+  BLOCK_COMMENT("delayed_value {");
+  // Load indirectly to solve generation ordering problem.
+  load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
+  z_lg(tmp, 0, tmp);                   // tmp = *tmp;
+
+#ifdef ASSERT
+  NearLabel L;
+  compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
+  z_illtrap();
+  bind(L);
+#endif
+
+  if (offset != 0) {
+    z_agfi(tmp, offset);               // tmp = tmp + offset;
+  }
+
+  BLOCK_COMMENT("} delayed_value");
+  return RegisterOrConstant(tmp);
+}
+
+// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
+// and return the resulting instruction.
+// Dest_pos and inst_pos are 32 bit only. These parms can only designate
+// relative positions.
+// Use correct argument types. Do not pre-calculate distance.
+unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
+  int c = 0;
+  unsigned long patched_inst = 0;
+  if (is_call_pcrelative_short(inst) ||
+      is_branch_pcrelative_short(inst) ||
+      is_branchoncount_pcrelative_short(inst) ||
+      is_branchonindex32_pcrelative_short(inst)) {
+    c = 1;
+    int m = fmask(15, 0);    // simm16(-1, 16, 32);
+    int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
+    patched_inst = (inst & ~m) | v;
+  } else if (is_compareandbranch_pcrelative_short(inst)) {
+    c = 2;
+    long m = fmask(31, 16);  // simm16(-1, 16, 48);
+    long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
+    patched_inst = (inst & ~m) | v;
+  } else if (is_branchonindex64_pcrelative_short(inst)) {
+    c = 3;
+    long m = fmask(31, 16);  // simm16(-1, 16, 48);
+    long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
+    patched_inst = (inst & ~m) | v;
+  } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
+    c = 4;
+    long m = fmask(31, 0);  // simm32(-1, 16, 48);
+    long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
+    patched_inst = (inst & ~m) | v;
+  } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
+    c = 5;
+    long m = fmask(31, 0);  // simm32(-1, 16, 48);
+    long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
+    patched_inst = (inst & ~m) | v;
+  } else {
+    print_dbg_msg(tty, inst, "not a relative branch", 0);
+    dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
+    ShouldNotReachHere();
+  }
+
+  long new_off = get_pcrel_offset(patched_inst);
+  if (new_off != (dest_pos-inst_pos)) {
+    tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
+    print_dbg_msg(tty, inst,         "<- original instruction: branch patching error", 0);
+    print_dbg_msg(tty, patched_inst, "<- patched  instruction: branch patching error", 0);
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#endif
+    ShouldNotReachHere();
+  }
+  return patched_inst;
+}
+
+// Only called when binding labels (share/vm/asm/assembler.cpp)
+// Pass arguments as intended. Do not pre-calculate distance.
+void MacroAssembler::pd_patch_instruction(address branch, address target) {
+  unsigned long stub_inst;
+  int           inst_len = get_instruction(branch, &stub_inst);
+
+  set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
+}
+
+
+// Extract relative address (aka offset).
+// inv_simm16 works for 4-byte instructions only.
+// compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
+long MacroAssembler::get_pcrel_offset(unsigned long inst) {
+
+  if (MacroAssembler::is_pcrelative_short(inst)) {
+    if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
+      return RelAddr::inv_pcrel_off16(inv_simm16(inst));
+    } else {
+      return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
+    }
+  }
+
+  if (MacroAssembler::is_pcrelative_long(inst)) {
+    return RelAddr::inv_pcrel_off32(inv_simm32(inst));
+  }
+
+  print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
+#ifdef LUCY_DBG
+  VM_Version::z_SIGSEGV();
+#else
+  ShouldNotReachHere();
+#endif
+  return -1;
+}
+
+long MacroAssembler::get_pcrel_offset(address pc) {
+  unsigned long inst;
+  unsigned int  len = get_instruction(pc, &inst);
+
+#ifdef ASSERT
+  long offset;
+  if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
+    offset = get_pcrel_offset(inst);
+  } else {
+    offset = -1;
+  }
+
+  if (offset == -1) {
+    dump_code_range(tty, pc, 32, "not a pcrelative instruction");
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#else
+    ShouldNotReachHere();
+#endif
+  }
+  return offset;
+#else
+  return get_pcrel_offset(inst);
+#endif // ASSERT
+}
+
+// Get target address from pc-relative instructions.
+address MacroAssembler::get_target_addr_pcrel(address pc) {
+  assert(is_pcrelative_long(pc), "not a pcrelative instruction");
+  return pc + get_pcrel_offset(pc);
+}
+
+// Patch pc relative load address.
+void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
+  unsigned long inst;
+  // Offset is +/- 2**32 -> use long.
+  ptrdiff_t distance = con - pc;
+
+  get_instruction(pc, &inst);
+
+  if (is_pcrelative_short(inst)) {
+    *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc);  // Instructions are at least 2-byte aligned, no test required.
+
+    // Some extra safety net.
+    if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
+      print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
+      dump_code_range(tty, pc, 32, "distance out of range (16bit)");
+      guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
+    }
+    return;
+  }
+
+  if (is_pcrelative_long(inst)) {
+    *(int *)(pc+2)   = RelAddr::pcrel_off32(con, pc);
+
+    // Some Extra safety net.
+    if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
+      print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
+      dump_code_range(tty, pc, 32, "distance out of range (32bit)");
+      guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
+    }
+    return;
+  }
+
+  guarantee(false, "not a pcrelative instruction to patch!");
+}
+
+// "Current PC" here means the address just behind the basr instruction.
+address MacroAssembler::get_PC(Register result) {
+  z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
+  return pc();
+}
+
+// Get current PC + offset.
+// Offset given in bytes, must be even!
+// "Current PC" here means the address of the larl instruction plus the given offset.
+address MacroAssembler::get_PC(Register result, int64_t offset) {
+  address here = pc();
+  z_larl(result, offset/2); // Save target instruction address in result.
+  return here + offset;
+}
+
+// Resize_frame with SP(new) = SP(old) - [offset].
+void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
+{
+  assert_different_registers(offset, fp, Z_SP);
+  if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
+
+  z_sgr(Z_SP, offset);
+  z_stg(fp, _z_abi(callers_sp), Z_SP);
+}
+
+// Resize_frame with SP(new) = [addr].
+void MacroAssembler::resize_frame_absolute(Register addr, Register fp, bool load_fp) {
+  assert_different_registers(addr, fp, Z_SP);
+  if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
+
+  if (addr != Z_R0) {
+    // Minimize stalls by not using Z_SP immediately after update.
+    z_stg(fp, _z_abi(callers_sp), addr);
+    z_lgr(Z_SP, addr);
+  } else {
+    z_lgr(Z_SP, addr);
+    z_stg(fp, _z_abi(callers_sp), Z_SP);
+  }
+}
+
+// Resize_frame with SP(new) = SP(old) + offset.
+void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
+  assert_different_registers(fp, Z_SP);
+  if (load_fp) z_lg(fp, _z_abi(callers_sp), Z_SP);
+
+  if (Displacement::is_validDisp((int)_z_abi(callers_sp) + offset.constant_or_zero())) {
+    // Minimize stalls by first using, then updating Z_SP.
+    // Do that only if we have a small positive offset or if ExtImm are available.
+    z_stg(fp, Address(Z_SP, offset, _z_abi(callers_sp)));
+    add64(Z_SP, offset);
+  } else {
+    add64(Z_SP, offset);
+    z_stg(fp, _z_abi(callers_sp), Z_SP);
+  }
+}
+
+void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
+#ifdef ASSERT
+  assert_different_registers(bytes, old_sp, Z_SP);
+  if (!copy_sp) {
+    z_cgr(old_sp, Z_SP);
+    asm_assert_eq("[old_sp]!=[Z_SP]", 0x211);
+  }
+#endif
+  if (copy_sp) { z_lgr(old_sp, Z_SP); }
+  if (bytes_with_inverted_sign) {
+    z_stg(old_sp, 0, bytes, Z_SP);
+    add2reg_with_index(Z_SP, 0, bytes, Z_SP);
+  } else {
+    z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
+    z_stg(old_sp, 0, Z_SP);
+  }
+}
+
+unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
+  long offset = Assembler::align(bytes, frame::alignment_in_bytes);
+
+  if (Displacement::is_validDisp(-offset)) {
+    // Minimize stalls by first using, then updating Z_SP.
+    // Do that only if we have ExtImm available.
+    z_stg(Z_SP, -offset, Z_SP);
+    add2reg(Z_SP, -offset);
+  } else {
+    if (scratch != Z_R0 && scratch != Z_R1) {
+      z_stg(Z_SP, -offset, Z_SP);
+      add2reg(Z_SP, -offset);
+    } else {   // scratch == Z_R0 || scratch == Z_R1
+      z_lgr(scratch, Z_SP);
+      add2reg(Z_SP, -offset);
+      z_stg(scratch, 0, Z_SP);
+    }
+  }
+  return offset;
+}
+
+// Push a frame of size `bytes' plus abi160 on top.
+unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
+  BLOCK_COMMENT("push_frame_abi160 {");
+  unsigned int res = push_frame(bytes + frame::z_abi_160_size);
+  BLOCK_COMMENT("} push_frame_abi160");
+  return res;
+}
+
+// Pop current C frame.
+void MacroAssembler::pop_frame() {
+  BLOCK_COMMENT("pop_frame:");
+  Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
+  if (allow_relocation) {
+    call_c(entry_point);
+  } else {
+    call_c_static(entry_point);
+  }
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point) {
+  bool allow_relocation = true;
+  call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  bool     allow_relocation,
+                                  bool     check_exceptions) { // Defaults to true.
+  // Allow_relocation indicates, if true, that the generated code shall
+  // be fit for code relocation or referenced data relocation. In other
+  // words: all addresses must be considered variable. PC-relative addressing
+  // is not possible then.
+  // On the other hand, if (allow_relocation == false), addresses and offsets
+  // may be considered stable, enabling us to take advantage of some PC-relative
+  // addressing tweaks. These might improve performance and reduce code size.
+
+  // Determine last_java_sp register.
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = Z_SP;  // Load Z_SP as SP.
+  }
+
+  set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
+
+  // ARG1 must hold thread address.
+  z_lgr(Z_ARG1, Z_thread);
+
+  address return_pc = NULL;
+  if (allow_relocation) {
+    return_pc = call_c(entry_point);
+  } else {
+    return_pc = call_c_static(entry_point);
+  }
+
+  reset_last_Java_frame(allow_relocation);
+
+  // C++ interp handles this in the interpreter.
+  check_and_handle_popframe(Z_thread);
+  check_and_handle_earlyret(Z_thread);
+
+  // Check for pending exceptions.
+  if (check_exceptions) {
+    // Check for pending exceptions (java_thread is set upon return).
+    load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
+
+    // This used to conditionally jump to forward_exception however it is
+    // possible if we relocate that the branch will not reach. So we must jump
+    // around so we can always reach.
+
+    Label ok;
+    z_bre(ok); // Bcondequal is the same as bcondZero.
+    call_stub(StubRoutines::forward_exception_entry());
+    bind(ok);
+  }
+
+  // Get oop result if there is one and reset the value in the thread.
+  if (oop_result->is_valid()) {
+    get_vm_result(oop_result);
+  }
+
+  _last_calls_return_pc = return_pc;  // Wipe out other (error handling) calls.
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  bool     check_exceptions) { // Defaults to true.
+  bool allow_relocation = true;
+  call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
+}
+
+// VM calls without explicit last_java_sp.
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
+  // Call takes possible detour via InterpreterMacroAssembler.
+  call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg_1);
+  call_VM(oop_result, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg_1);
+  assert(arg_2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg_2);
+  call_VM(oop_result, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+                             Register arg_3, bool check_exceptions) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg_1);
+  assert(arg_2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg_2);
+  assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+  lgr_if_needed(Z_ARG4, arg_3);
+  call_VM(oop_result, entry_point, check_exceptions);
+}
+
+// VM static calls without explicit last_java_sp.
+
+void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
+  // Call takes possible detour via InterpreterMacroAssembler.
+  call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
+}
+
+void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+                                    Register arg_3, bool check_exceptions) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg_1);
+  assert(arg_2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg_2);
+  assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+  lgr_if_needed(Z_ARG4, arg_3);
+  call_VM_static(oop_result, entry_point, check_exceptions);
+}
+
+// VM calls with explicit last_java_sp.
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
+  // Call takes possible detour via InterpreterMacroAssembler.
+  call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
+   // Z_ARG1 is reserved for the thread.
+   lgr_if_needed(Z_ARG2, arg_1);
+   call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
+                             Register arg_2, bool check_exceptions) {
+   // Z_ARG1 is reserved for the thread.
+   lgr_if_needed(Z_ARG2, arg_1);
+   assert(arg_2 != Z_ARG2, "smashed argument");
+   lgr_if_needed(Z_ARG3, arg_2);
+   call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
+                             Register arg_2, Register arg_3, bool check_exceptions) {
+  // Z_ARG1 is reserved for the thread.
+  lgr_if_needed(Z_ARG2, arg_1);
+  assert(arg_2 != Z_ARG2, "smashed argument");
+  lgr_if_needed(Z_ARG3, arg_2);
+  assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+  lgr_if_needed(Z_ARG4, arg_3);
+  call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+// VM leaf calls.
+
+void MacroAssembler::call_VM_leaf(address entry_point) {
+  // Call takes possible detour via InterpreterMacroAssembler.
+  call_VM_leaf_base(entry_point, true);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  call_VM_leaf(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  assert(arg_2 != Z_ARG1, "smashed argument");
+  if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+  call_VM_leaf(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  assert(arg_2 != Z_ARG1, "smashed argument");
+  if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+  assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
+  if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
+  call_VM_leaf(entry_point);
+}
+
+// Static VM leaf calls.
+// Really static VM leaf calls are never patched.
+
+void MacroAssembler::call_VM_leaf_static(address entry_point) {
+  // Call takes possible detour via InterpreterMacroAssembler.
+  call_VM_leaf_base(entry_point, false);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  call_VM_leaf_static(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  assert(arg_2 != Z_ARG1, "smashed argument");
+  if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+  call_VM_leaf_static(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+  if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+  assert(arg_2 != Z_ARG1, "smashed argument");
+  if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+  assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
+  if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
+  call_VM_leaf_static(entry_point);
+}
+
+// Don't use detour via call_c(reg).
+address MacroAssembler::call_c(address function_entry) {
+  load_const(Z_R1, function_entry);
+  return call(Z_R1);
+}
+
+// Variant for really static (non-relocatable) calls which are never patched.
+address MacroAssembler::call_c_static(address function_entry) {
+  load_absolute_address(Z_R1, function_entry);
+#if 0 // def ASSERT
+  // Verify that call site did not move.
+  load_const_optimized(Z_R0, function_entry);
+  z_cgr(Z_R1, Z_R0);
+  z_brc(bcondEqual, 3);
+  z_illtrap(0xba);
+#endif
+  return call(Z_R1);
+}
+
+address MacroAssembler::call_c_opt(address function_entry) {
+  bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
+  _last_calls_return_pc = success ? pc() : NULL;
+  return _last_calls_return_pc;
+}
+
+// Identify a call_far_patchable instruction: LARL + LG + BASR
+//
+//    nop                   ; optionally, if required for alignment
+//    lgrl rx,A(TOC entry)  ; PC-relative access into constant pool
+//    basr Z_R14,rx         ; end of this instruction must be aligned to a word boundary
+//
+// Code pattern will eventually get patched into variant2 (see below for detection code).
+//
+bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
+  address iaddr = instruction_addr;
+
+  // Check for the actual load instruction.
+  if (!is_load_const_from_toc(iaddr)) { return false; }
+  iaddr += load_const_from_toc_size();
+
+  // Check for the call (BASR) instruction, finally.
+  assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
+  return is_call_byregister(iaddr);
+}
+
+// Identify a call_far_patchable instruction: BRASL
+//
+// Code pattern to suits atomic patching:
+//    nop                       ; Optionally, if required for alignment.
+//    nop    ...                ; Multiple filler nops to compensate for size difference (variant0 is longer).
+//    nop                       ; For code pattern detection: Prepend each BRASL with a nop.
+//    brasl  Z_R14,<reladdr>    ; End of code must be 4-byte aligned !
+bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
+  const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
+
+  // Check for correct number of leading nops.
+  address iaddr;
+  for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
+    if (!is_z_nop(iaddr)) { return false; }
+  }
+  assert(iaddr == call_addr, "sanity");
+
+  // --> Check for call instruction.
+  if (is_call_far_pcrelative(call_addr)) {
+    assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
+    return true;
+  }
+
+  return false;
+}
+
+// Emit a NOT mt-safely patchable 64 bit absolute call.
+// If toc_offset == -2, then the destination of the call (= target) is emitted
+//                      to the constant pool and a runtime_call relocation is added
+//                      to the code buffer.
+// If toc_offset != -2, target must already be in the constant pool at
+//                      _ctableStart+toc_offset (a caller can retrieve toc_offset
+//                      from the runtime_call relocation).
+// Special handling of emitting to scratch buffer when there is no constant pool.
+// Slightly changed code pattern. We emit an additional nop if we would
+// not end emitting at a word aligned address. This is to ensure
+// an atomically patchable displacement in brasl instructions.
+//
+// A call_far_patchable comes in different flavors:
+//  - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
+//  - LGRL(CP) / BR          (address in constant pool, pc-relative accesss)
+//  - BRASL                  (relative address of call target coded in instruction)
+// All flavors occupy the same amount of space. Length differences are compensated
+// by leading nops, such that the instruction sequence always ends at the same
+// byte offset. This is required to keep the return offset constant.
+// Furthermore, the return address (the end of the instruction sequence) is forced
+// to be on a 4-byte boundary. This is required for atomic patching, should we ever
+// need to patch the call target of the BRASL flavor.
+// RETURN value: false, if no constant pool entry could be allocated, true otherwise.
+bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
+  // Get current pc and ensure word alignment for end of instr sequence.
+  const address start_pc = pc();
+  const intptr_t       start_off = offset();
+  assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
+  const ptrdiff_t      dist      = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
+  const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
+  const bool emit_relative_call  = !emit_target_to_pool &&
+                                   RelAddr::is_in_range_of_RelAddr32(dist) &&
+                                   ReoptimizeCallSequences &&
+                                   !code_section()->scratch_emit();
+
+  if (emit_relative_call) {
+    // Add padding to get the same size as below.
+    const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
+    unsigned int current_padding;
+    for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
+    assert(current_padding == padding, "sanity");
+
+    // relative call: len = 2(nop) + 6 (brasl)
+    // CodeBlob resize cannot occur in this case because
+    // this call is emitted into pre-existing space.
+    z_nop(); // Prepend each BRASL with a nop.
+    z_brasl(Z_R14, target);
+  } else {
+    // absolute call: Get address from TOC.
+    // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
+    if (emit_target_to_pool) {
+      // When emitting the call for the first time, we do not need to use
+      // the pc-relative version. It will be patched anyway, when the code
+      // buffer is copied.
+      // Relocation is not needed when !ReoptimizeCallSequences.
+      relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
+      AddressLiteral dest(target, rt);
+      // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
+      // inst_mark(). Reset if possible.
+      bool reset_mark = (inst_mark() == pc());
+      tocOffset = store_oop_in_toc(dest);
+      if (reset_mark) { set_inst_mark(); }
+      if (tocOffset == -1) {
+        return false; // Couldn't create constant pool entry.
+      }
+    }
+    assert(offset() == start_off, "emit no code before this point!");
+
+    address tocPos = pc() + tocOffset;
+    if (emit_target_to_pool) {
+      tocPos = code()->consts()->start() + tocOffset;
+    }
+    load_long_pcrelative(Z_R14, tocPos);
+    z_basr(Z_R14, Z_R14);
+  }
+
+#ifdef ASSERT
+  // Assert that we can identify the emitted call.
+  assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
+  assert(offset() == start_off+call_far_patchable_size(), "wrong size");
+
+  if (emit_target_to_pool) {
+    assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
+           "wrong encoding of dest address");
+  }
+#endif
+  return true; // success
+}
+
+// Identify a call_far_patchable instruction.
+// For more detailed information see header comment of call_far_patchable.
+bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
+  return is_call_far_patchable_variant2_at(instruction_addr)  || // short version: BRASL
+         is_call_far_patchable_variant0_at(instruction_addr);    // long version LARL + LG + BASR
+}
+
+// Does the call_far_patchable instruction use a pc-relative encoding
+// of the call destination?
+bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
+  // Variant 2 is pc-relative.
+  return is_call_far_patchable_variant2_at(instruction_addr);
+}
+
+bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
+  // Prepend each BRASL with a nop.
+  return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size());  // Match at position after one nop required.
+}
+
+// Set destination address of a call_far_patchable instruction.
+void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
+  ResourceMark rm;
+
+  // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
+  int code_size = MacroAssembler::call_far_patchable_size();
+  CodeBuffer buf(instruction_addr, code_size);
+  MacroAssembler masm(&buf);
+  masm.call_far_patchable(dest, tocOffset);
+  ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
+}
+
+// Get dest address of a call_far_patchable instruction.
+address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
+  // Dynamic TOC: absolute address in constant pool.
+  // Check variant2 first, it is more frequent.
+
+  // Relative address encoded in call instruction.
+  if (is_call_far_patchable_variant2_at(instruction_addr)) {
+    return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
+
+  // Absolute address in constant pool.
+  } else if (is_call_far_patchable_variant0_at(instruction_addr)) {
+    address iaddr = instruction_addr;
+
+    long    tocOffset = get_load_const_from_toc_offset(iaddr);
+    address tocLoc    = iaddr + tocOffset;
+    return *(address *)(tocLoc);
+  } else {
+    fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
+    fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
+            *(unsigned long*)instruction_addr,
+            *(unsigned long*)(instruction_addr+8),
+            call_far_patchable_size());
+    Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+void MacroAssembler::align_call_far_patchable(address pc) {
+  if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+// Read from the polling page.
+// Use TM or TMY instruction, depending on read offset.
+//   offset = 0: Use TM, safepoint polling.
+//   offset < 0: Use TMY, profiling safepoint polling.
+void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
+  if (Immediate::is_uimm12(offset)) {
+    z_tm(offset, polling_page_address, mask_safepoint);
+  } else {
+    z_tmy(offset, polling_page_address, mask_profiling);
+  }
+}
+
+// Check whether z_instruction is a read access to the polling page
+// which was emitted by load_from_polling_page(..).
+bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
+  unsigned long z_instruction;
+  unsigned int  ilen = get_instruction(instr_loc, &z_instruction);
+
+  if (ilen == 2) { return false; } // It's none of the allowed instructions.
+
+  if (ilen == 4) {
+    if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
+
+    int ms = inv_mask(z_instruction,8,32);  // mask
+    int ra = inv_reg(z_instruction,16,32);  // base register
+    int ds = inv_uimm12(z_instruction);     // displacement
+
+    if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
+      return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
+    }
+
+  } else { /* if (ilen == 6) */
+
+    assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
+
+    if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
+
+    int ms = inv_mask(z_instruction,8,48);  // mask
+    int ra = inv_reg(z_instruction,16,48);  // base register
+    int ds = inv_simm20(z_instruction);     // displacement
+  }
+
+  return true;
+}
+
+// Extract poll address from instruction and ucontext.
+address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
+  assert(ucontext != NULL, "must have ucontext");
+  ucontext_t* uc = (ucontext_t*) ucontext;
+  unsigned long z_instruction;
+  unsigned int ilen = get_instruction(instr_loc, &z_instruction);
+
+  if (ilen == 4 && is_z_tm(z_instruction)) {
+    int ra = inv_reg(z_instruction, 16, 32);  // base register
+    int ds = inv_uimm12(z_instruction);       // displacement
+    address addr = (address)uc->uc_mcontext.gregs[ra];
+    return addr + ds;
+  } else if (ilen == 6 && is_z_tmy(z_instruction)) {
+    int ra = inv_reg(z_instruction, 16, 48);  // base register
+    int ds = inv_simm20(z_instruction);       // displacement
+    address addr = (address)uc->uc_mcontext.gregs[ra];
+    return addr + ds;
+  }
+
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Extract poll register from instruction.
+uint MacroAssembler::get_poll_register(address instr_loc) {
+  unsigned long z_instruction;
+  unsigned int ilen = get_instruction(instr_loc, &z_instruction);
+
+  if (ilen == 4 && is_z_tm(z_instruction)) {
+    return (uint)inv_reg(z_instruction, 16, 32);  // base register
+  } else if (ilen == 6 && is_z_tmy(z_instruction)) {
+    return (uint)inv_reg(z_instruction, 16, 48);  // base register
+  }
+
+  ShouldNotReachHere();
+  return 0;
+}
+
+bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) {
+  ShouldNotCallThis();
+  return false;
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
+  assert_different_registers(tmp1, tmp2);
+  z_sllg(tmp2, thread, os::get_serialize_page_shift_count());
+  load_const_optimized(tmp1, (long) os::get_memory_serialize_page());
+
+  int mask = os::get_serialize_page_mask();
+  if (Immediate::is_uimm16(mask)) {
+    z_nill(tmp2, mask);
+    z_llghr(tmp2, tmp2);
+  } else {
+    z_nilf(tmp2, mask);
+    z_llgfr(tmp2, tmp2);
+  }
+
+  z_release();
+  z_st(Z_R0, 0, tmp2, tmp1);
+}
+
+// Don't rely on register locking, always use Z_R1 as scratch register instead.
+void MacroAssembler::bang_stack_with_offset(int offset) {
+  // Stack grows down, caller passes positive offset.
+  assert(offset > 0, "must bang with positive offset");
+  if (Displacement::is_validDisp(-offset)) {
+    z_tmy(-offset, Z_SP, mask_stackbang);
+  } else {
+    add2reg(Z_R1, -offset, Z_SP);    // Do not destroy Z_SP!!!
+    z_tm(0, Z_R1, mask_stackbang);  // Just banging.
+  }
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register t1,
+                                   Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  Register end = t1;
+  Register thread = Z_thread;
+
+  z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
+  if (var_size_in_bytes == noreg) {
+    z_lay(end, Address(obj, con_size_in_bytes));
+  } else {
+    z_lay(end, Address(obj, var_size_in_bytes));
+  }
+  z_cg(end, Address(thread, JavaThread::tlab_end_offset()));
+  branch_optimized(bcondHigh, slow_case);
+
+  // Update the tlab top pointer.
+  z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
+
+  // Recover var_size_in_bytes if necessary.
+  if (var_size_in_bytes == end) {
+    z_sgr(var_size_in_bytes, obj);
+  }
+}
+
+// Emitter for interface method lookup.
+//   input: recv_klass, intf_klass, itable_index
+//   output: method_result
+//   kills: itable_index, temp1_reg, Z_R0, Z_R1
+// TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
+// If the register is still not needed then, remove it.
+void MacroAssembler::lookup_interface_method(Register           recv_klass,
+                                             Register           intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register           method_result,
+                                             Register           temp1_reg,
+                                             Register           temp2_reg,
+                                             Label&             no_such_interface) {
+
+  const Register vtable_len = temp1_reg;    // Used to compute itable_entry_addr.
+  const Register itable_entry_addr = Z_R1_scratch;
+  const Register itable_interface = Z_R0_scratch;
+
+  BLOCK_COMMENT("lookup_interface_method {");
+
+  // Load start of itable entries into itable_entry_addr.
+  z_llgf(vtable_len, Address(recv_klass, InstanceKlass::vtable_length_offset()));
+  z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
+
+  // Loop over all itable entries until desired interfaceOop(Rinterface) found.
+  const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset());
+
+  add2reg_with_index(itable_entry_addr,
+                     vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(),
+                     recv_klass, vtable_len);
+
+  const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
+  Label     search;
+
+  bind(search);
+
+  // Handle IncompatibleClassChangeError.
+  // If the entry is NULL then we've reached the end of the table
+  // without finding the expected interface, so throw an exception.
+  load_and_test_long(itable_interface, Address(itable_entry_addr));
+  z_bre(no_such_interface);
+
+  add2reg(itable_entry_addr, itable_offset_search_inc);
+  z_cgr(itable_interface, intf_klass);
+  z_brne(search);
+
+  // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
+
+  const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
+                                    itableOffsetEntry::interface_offset_in_bytes()) -
+                                   itable_offset_search_inc;
+
+  // Compute itableMethodEntry and get method and entry point
+  // we use addressing with index and displacement, since the formula
+  // for computing the entry's offset has a fixed and a dynamic part,
+  // the latter depending on the matched interface entry and on the case,
+  // that the itable index has been passed as a register, not a constant value.
+  int method_offset = itableMethodEntry::method_offset_in_bytes();
+                           // Fixed part (displacement), common operand.
+  Register itable_offset;  // Dynamic part (index register).
+
+  if (itable_index.is_register()) {
+     // Compute the method's offset in that register, for the formula, see the
+     // else-clause below.
+     itable_offset = itable_index.as_register();
+
+     z_sllg(itable_offset, itable_offset, exact_log2(itableMethodEntry::size() * wordSize));
+     z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
+  } else {
+    itable_offset = Z_R1_scratch;
+    // Displacement increases.
+    method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
+
+    // Load index from itable.
+    z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
+  }
+
+  // Finally load the method's oop.
+  z_lg(method_result, method_offset, itable_offset, recv_klass);
+  BLOCK_COMMENT("} lookup_interface_method");
+}
+
+// Lookup for virtual method invocation.
+void MacroAssembler::lookup_virtual_method(Register           recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register           method_result) {
+  assert_different_registers(recv_klass, vtable_index.register_or_noreg());
+  assert(vtableEntry::size() * wordSize == wordSize,
+         "else adjust the scaling in the code below");
+
+  BLOCK_COMMENT("lookup_virtual_method {");
+
+  const int base = in_bytes(Klass::vtable_start_offset());
+
+  if (vtable_index.is_constant()) {
+    // Load with base + disp.
+    Address vtable_entry_addr(recv_klass,
+                              vtable_index.as_constant() * wordSize +
+                              base +
+                              vtableEntry::method_offset_in_bytes());
+
+    z_lg(method_result, vtable_entry_addr);
+  } else {
+    // Shift index properly and load with base + index + disp.
+    Register vindex = vtable_index.as_register();
+    Address  vtable_entry_addr(recv_klass, vindex,
+                               base + vtableEntry::method_offset_in_bytes());
+
+    z_sllg(vindex, vindex, exact_log2(wordSize));
+    z_lg(method_result, vtable_entry_addr);
+  }
+  BLOCK_COMMENT("} lookup_virtual_method");
+}
+
+// Factor out code to call ic_miss_handler.
+// Generate code to call the inline cache miss handler.
+//
+// In most cases, this code will be generated out-of-line.
+// The method parameters are intended to provide some variability.
+//   ICM          - Label which has to be bound to the start of useful code (past any traps).
+//   trapMarker   - Marking byte for the generated illtrap instructions (if any).
+//                  Any value except 0x00 is supported.
+//                  = 0x00 - do not generate illtrap instructions.
+//                         use nops to fill ununsed space.
+//   requiredSize - required size of the generated code. If the actually
+//                  generated code is smaller, use padding instructions to fill up.
+//                  = 0 - no size requirement, no padding.
+//   scratch      - scratch register to hold branch target address.
+//
+//  The method returns the code offset of the bound label.
+unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
+  intptr_t startOffset = offset();
+
+  // Prevent entry at content_begin().
+  if (trapMarker != 0) {
+    z_illtrap(trapMarker);
+  }
+
+  // Load address of inline cache miss code into scratch register
+  // and branch to cache miss handler.
+  BLOCK_COMMENT("IC miss handler {");
+  BIND(ICM);
+  unsigned int   labelOffset = offset();
+  AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
+
+  load_const_optimized(scratch, icmiss);
+  z_br(scratch);
+
+  // Fill unused space.
+  if (requiredSize > 0) {
+    while ((offset() - startOffset) < requiredSize) {
+      if (trapMarker == 0) {
+        z_nop();
+      } else {
+        z_illtrap(trapMarker);
+      }
+    }
+  }
+  BLOCK_COMMENT("} IC miss handler");
+  return labelOffset;
+}
+
+void MacroAssembler::nmethod_UEP(Label& ic_miss) {
+  Register ic_reg       = as_Register(Matcher::inline_cache_reg_encode());
+  int      klass_offset = oopDesc::klass_offset_in_bytes();
+  if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+    if (VM_Version::has_CompareBranch()) {
+      z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
+    } else {
+      z_ltgr(Z_ARG1, Z_ARG1);
+      z_bre(ic_miss);
+    }
+  }
+  // Compare cached class against klass from receiver.
+  compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
+  z_brne(ic_miss);
+}
+
+void MacroAssembler::check_klass_subtype_fast_path(Register   sub_klass,
+                                                   Register   super_klass,
+                                                   Register   temp1_reg,
+                                                   Label*     L_success,
+                                                   Label*     L_failure,
+                                                   Label*     L_slow_path,
+                                                   RegisterOrConstant super_check_offset) {
+
+  const int sc_offset  = in_bytes(Klass::secondary_super_cache_offset());
+  const int sco_offset = in_bytes(Klass::super_check_offset_offset());
+
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco ||
+                         super_check_offset.constant_or_zero() == sc_offset);
+
+  // Input registers must not overlap.
+  assert_different_registers(sub_klass, super_klass, temp1_reg);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp1_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  const Register Rsuper_check_offset = temp1_reg;
+
+  NearLabel L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
+  BLOCK_COMMENT("check_klass_subtype_fast_path {");
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface. Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
+
+  // Check the supertype display, which is uint.
+  if (must_load_sco) {
+    z_llgf(Rsuper_check_offset, sco_offset, super_klass);
+    super_check_offset = RegisterOrConstant(Rsuper_check_offset);
+  }
+  Address super_check_addr(sub_klass, super_check_offset, 0);
+  z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
+
+  if (super_check_offset.is_register()) {
+    branch_optimized(Assembler::bcondEqual, *L_success);
+    z_cfi(super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      branch_optimized(Assembler::bcondEqual, *L_slow_path);
+    } else {
+      branch_optimized(Assembler::bcondNotEqual, *L_failure);
+      final_jmp(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      branch_optimized(Assembler::bcondEqual, *L_success);
+    } else {
+      branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
+      final_jmp(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      branch_optimized(Assembler::bcondEqual, *L_success);
+    } else {
+      branch_optimized(Assembler::bcondNotEqual, *L_failure);
+      final_jmp(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+#undef local_brc
+#undef final_jmp
+  BLOCK_COMMENT("} check_klass_subtype_fast_path");
+  // fallthru (to slow path)
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
+                                                   Register Rsuperklass,
+                                                   Register Rarray_ptr,  // tmp
+                                                   Register Rlength,     // tmp
+                                                   Label* L_success,
+                                                   Label* L_failure) {
+  // Input registers must not overlap.
+  // Also check for R1 which is explicitely used here.
+  assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
+  NearLabel L_fallthrough, L_loop;
+  int label_nulls = 0;
+  if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  const int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+
+  const int length_offset = Array<Klass*>::length_offset_in_bytes();
+  const int base_offset   = Array<Klass*>::base_offset_in_bytes();
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
+
+  NearLabel loop_iterate, loop_count, match;
+
+  BLOCK_COMMENT("check_klass_subtype_slow_path {");
+  z_lg(Rarray_ptr, ss_offset, Rsubklass);
+
+  load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
+  branch_optimized(Assembler::bcondZero, *L_failure);
+
+  // Oops in table are NO MORE compressed.
+  z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
+  z_bre(match);                               // Shortcut for array length = 1.
+
+  // No match yet, so we must walk the array's elements.
+  z_lngfr(Rlength, Rlength);
+  z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
+  z_llill(Z_R1, BytesPerWord);               // Set increment/end index.
+  add2reg(Rlength, 2 * BytesPerWord);        // start index  = -(n-2)*BytesPerWord
+  z_slgr(Rarray_ptr, Rlength);               // start addr: +=  (n-2)*BytesPerWord
+  z_bru(loop_count);
+
+  BIND(loop_iterate);
+  z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
+  z_bre(match);
+  BIND(loop_count);
+  z_brxlg(Rlength, Z_R1, loop_iterate);
+
+  // Rsuperklass not found among secondary super classes -> failure.
+  branch_optimized(Assembler::bcondAlways, *L_failure);
+
+  // Got a hit. Return success (zero result). Set cache.
+  // Cache load doesn't happen here. For speed it is directly emitted by the compiler.
+
+  BIND(match);
+
+  z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
+
+  final_jmp(*L_success);
+
+  // Exit to the surrounding code.
+  BIND(L_fallthrough);
+#undef local_brc
+#undef final_jmp
+  BLOCK_COMMENT("} check_klass_subtype_slow_path");
+}
+
+// Emitter for combining fast and slow path.
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                         Register super_klass,
+                                         Register temp1_reg,
+                                         Register temp2_reg,
+                                         Label&   L_success) {
+  NearLabel failure;
+  BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
+                                &L_success, &failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass,
+                                temp1_reg, temp2_reg, &L_success, NULL);
+  BIND(failure);
+  BLOCK_COMMENT("} check_klass_subtype");
+}
+
+// Increment a counter at counter_address when the eq condition code is
+// set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
+void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
+  Label l;
+  z_brne(l);
+  load_const(tmp1_reg, counter_address);
+  add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
+  z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
+  bind(l);
+}
+
+// Semantics are dependent on the slow_case label:
+//   If the slow_case label is not NULL, failure to biased-lock the object
+//   transfers control to the location of the slow_case label. If the
+//   object could be biased-locked, control is transferred to the done label.
+//   The condition code is unpredictable.
+//
+//   If the slow_case label is NULL, failure to biased-lock the object results
+//   in a transfer of control to the done label with a condition code of not_equal.
+//   If the biased-lock could be successfully obtained, control is transfered to
+//   the done label with a condition code of equal.
+//   It is mandatory to react on the condition code At the done label.
+//
+void MacroAssembler::biased_locking_enter(Register  obj_reg,
+                                          Register  mark_reg,
+                                          Register  temp_reg,
+                                          Register  temp2_reg,    // May be Z_RO!
+                                          Label    &done,
+                                          Label    *slow_case) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
+
+  Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise.
+
+  BLOCK_COMMENT("biased_locking_enter {");
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid.
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits.
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits,
+         "biased locking makes assumptions about bit layout");
+  z_lr(temp_reg, mark_reg);
+  z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
+  z_chi(temp_reg, markOopDesc::biased_lock_pattern);
+  z_brne(cas_label);  // Try cas if object is not biased, i.e. cannot be biased locked.
+
+  load_prototype_header(temp_reg, obj_reg);
+  load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
+
+  z_ogr(temp_reg, Z_thread);
+  z_xgr(temp_reg, mark_reg);
+  z_ngr(temp_reg, temp2_reg);
+  if (PrintBiasedLockingStatistics) {
+    increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg);
+    // Restore mark_reg.
+    z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
+  }
+  branch_optimized(Assembler::bcondEqual, done);  // Biased lock obtained, return success.
+
+  Label try_revoke_bias;
+  Label try_rebias;
+  Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
+
+  //----------------------------------------------------------------------------
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  z_tmll(temp_reg, markOopDesc::biased_lock_mask_in_place);
+  z_brnaz(try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  z_tmll(temp_reg, markOopDesc::epoch_mask_in_place);
+  z_brnaz(try_rebias);
+
+  //----------------------------------------------------------------------------
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place |
+         markOopDesc::epoch_mask_in_place);
+  z_lgr(temp_reg, Z_thread);
+  z_llgfr(mark_reg, mark_reg);
+  z_ogr(temp_reg, mark_reg);
+
+  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+  z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+
+  if (PrintBiasedLockingStatistics) {
+    increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(),
+                         temp_reg, temp2_reg);
+  }
+  if (slow_case != NULL) {
+    branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
+  }
+  branch_optimized(Assembler::bcondAlways, done);           // Biased lock status given in condition code.
+
+  //----------------------------------------------------------------------------
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+
+  z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  load_prototype_header(temp_reg, obj_reg);
+  z_llgfr(mark_reg, mark_reg);
+
+  z_ogr(temp_reg, Z_thread);
+
+  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+  z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+
+  if (PrintBiasedLockingStatistics) {
+    increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg);
+  }
+  if (slow_case != NULL) {
+    branch_optimized(Assembler::bcondNotEqual, *slow_case);  // Biased lock not obtained, need to go the long way.
+  }
+  z_bru(done);           // Biased lock status given in condition code.
+
+  //----------------------------------------------------------------------------
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  load_prototype_header(temp_reg, obj_reg);
+
+  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+  z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (PrintBiasedLockingStatistics) {
+    // z_cgr(mark_reg, temp2_reg);
+    increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg);
+  }
+
+  bind(cas_label);
+  BLOCK_COMMENT("} biased_locking_enter");
+}
+
+void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) {
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  BLOCK_COMMENT("biased_locking_exit {");
+
+  z_lg(temp_reg, 0, mark_addr);
+  z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
+
+  z_chi(temp_reg, markOopDesc::biased_lock_pattern);
+  z_bre(done);
+  BLOCK_COMMENT("} biased_locking_exit");
+}
+
+void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
+  Register displacedHeader = temp1;
+  Register currentHeader = temp1;
+  Register temp = temp2;
+  NearLabel done, object_has_monitor;
+
+  BLOCK_COMMENT("compiler_fast_lock_object {");
+
+  // Load markOop from oop into mark.
+  z_lg(displacedHeader, 0, oop);
+
+  if (try_bias) {
+    biased_locking_enter(oop, displacedHeader, temp, Z_R0, done);
+  }
+
+  // Handle existing monitor.
+  if ((EmitSync & 0x01) == 0) {
+    // The object has an existing monitor iff (mark & monitor_value) != 0.
+    guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+    z_lr(temp, displacedHeader);
+    z_nill(temp, markOopDesc::monitor_value);
+    z_brne(object_has_monitor);
+  }
+
+  // Set mark to markOop | markOopDesc::unlocked_value.
+  z_oill(displacedHeader, markOopDesc::unlocked_value);
+
+  // Load Compare Value application register.
+
+  // Initialize the box (must happen before we update the object mark).
+  z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box);
+
+  // Memory Fence (in cmpxchgd)
+  // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
+
+  // If the compare-and-swap succeeded, then we found an unlocked object and we
+  // have now locked it.
+  z_csg(displacedHeader, box, 0, oop);
+  assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture.
+  z_bre(done);
+
+  // We did not see an unlocked object so try the fast recursive case.
+
+  z_sgr(currentHeader, Z_SP);
+  load_const_optimized(temp, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+
+  z_ngr(currentHeader, temp);
+  //   z_brne(done);
+  //   z_release();
+  z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box);
+
+  z_bru(done);
+
+  if ((EmitSync & 0x01) == 0) {
+    Register zero = temp;
+    Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
+    bind(object_has_monitor);
+    // The object's monitor m is unlocked iff m->owner == NULL,
+    // otherwise m->owner may contain a thread or a stack address.
+    //
+    // Try to CAS m->owner from NULL to current thread.
+    z_lghi(zero, 0);
+    // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+    z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
+    // Store a non-null value into the box.
+    z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
+#ifdef ASSERT
+      z_brne(done);
+      // We've acquired the monitor, check some invariants.
+      // Invariant 1: _recursions should be 0.
+      asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
+                              "monitor->_recursions should be 0", -1);
+      z_ltgr(zero, zero); // Set CR=EQ.
+#endif
+  }
+  bind(done);
+
+  BLOCK_COMMENT("} compiler_fast_lock_object");
+  // If locking was successful, CR should indicate 'EQ'.
+  // The compiler or the native wrapper generates a branch to the runtime call
+  // _complete_monitor_locking_Java.
+}
+
+void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
+  Register displacedHeader = temp1;
+  Register currentHeader = temp2;
+  Register temp = temp1;
+  Register monitor = temp2;
+
+  Label done, object_has_monitor;
+
+  BLOCK_COMMENT("compiler_fast_unlock_object {");
+
+  if (try_bias) {
+    biased_locking_exit(oop, currentHeader, done);
+  }
+
+  // Find the lock address and load the displaced header from the stack.
+  // if the displaced header is zero, we have a recursive unlock.
+  load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+  z_bre(done);
+
+  // Handle existing monitor.
+  if ((EmitSync & 0x02) == 0) {
+    // The object has an existing monitor iff (mark & monitor_value) != 0.
+    z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
+    guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+    z_nill(currentHeader, markOopDesc::monitor_value);
+    z_brne(object_has_monitor);
+  }
+
+  // Check if it is still a light weight lock, this is true if we see
+  // the stack address of the basicLock in the markOop of the object
+  // copy box to currentHeader such that csg does not kill it.
+  z_lgr(currentHeader, box);
+  z_csg(currentHeader, displacedHeader, 0, oop);
+  z_bru(done); // Csg sets CR as desired.
+
+  // Handle existing monitor.
+  if ((EmitSync & 0x02) == 0) {
+    bind(object_has_monitor);
+    z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);    // CurrentHeader is tagged with monitor_value set.
+    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+    z_brne(done);
+    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+    z_brne(done);
+    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+    z_brne(done);
+    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+    z_brne(done);
+    z_release();
+    z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+  }
+
+  bind(done);
+
+  BLOCK_COMMENT("} compiler_fast_unlock_object");
+  // flag == EQ indicates success
+  // flag == NE indicates failure
+}
+
+// Write to card table for modification at store_addr - register is destroyed afterwards.
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register tmp) {
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableForRS ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  assert_different_registers(store_addr, tmp);
+  z_srlg(store_addr, store_addr, CardTableModRefBS::card_shift);
+  load_absolute_address(tmp, (address)bs->byte_map_base);
+  z_agr(store_addr, tmp);
+  z_mvi(0, store_addr, 0); // Store byte 0.
+}
+
+#if INCLUDE_ALL_GCS
+
+//------------------------------------------------------
+// General G1 pre-barrier generator.
+// Purpose: record the previous value if it is not null.
+// All non-tmps are preserved.
+//------------------------------------------------------
+void MacroAssembler::g1_write_barrier_pre(Register           Robj,
+                                          RegisterOrConstant offset,
+                                          Register           Rpre_val,      // Ideally, this is a non-volatile register.
+                                          Register           Rval,          // Will be preserved.
+                                          Register           Rtmp1,         // If Rpre_val is volatile, either Rtmp1
+                                          Register           Rtmp2,         // or Rtmp2 has to be non-volatile..
+                                          bool               pre_val_needed // Save Rpre_val across runtime call, caller uses it.
+                                       ) {
+  Label callRuntime, filtered;
+  const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active());
+  const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
+  const int index_offset  = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index());
+  assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
+
+  BLOCK_COMMENT("g1_write_barrier_pre {");
+
+  // Is marking active?
+  // Note: value is loaded for test purposes only. No further use here.
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
+  } else {
+    guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
+  }
+  z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
+
+  // Do we need to load the previous value into Rpre_val?
+  if (Robj != noreg) {
+    // Load the previous value...
+    Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+    if (UseCompressedOops) {
+      z_llgf(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
+    } else {
+      z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
+    }
+  }
+  assert(Rpre_val != noreg, "must have a real register");
+
+  // Is the previous value NULL?
+  // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
+  //       Register contents is preserved across runtime call if caller requests to do so.
+  z_ltgr(Rpre_val, Rpre_val);
+  z_bre(filtered); // previous value is NULL, so we don't need to record it.
+
+  // Decode the oop now. We know it's not NULL.
+  if (Robj != noreg && UseCompressedOops) {
+    oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false);
+  }
+
+  // OK, it's not filtered, so we'll need to call enqueue.
+
+  // We can store the original value in the thread's buffer
+  // only if index > 0. Otherwise, we need runtime to handle.
+  // (The index field is typed as size_t.)
+  Register Rbuffer = Rtmp1, Rindex = Rtmp2;
+
+  z_lg(Rbuffer, buffer_offset, Z_thread);
+
+  load_and_test_long(Rindex, Address(Z_thread, index_offset));
+  z_bre(callRuntime); // If index == 0, goto runtime.
+
+  add2reg(Rindex, -wordSize); // Decrement index.
+  z_stg(Rindex, index_offset, Z_thread);
+
+  // Record the previous value.
+  z_stg(Rpre_val, 0, Rbuffer, Rindex);
+  z_bru(filtered);  // We are done.
+
+  Rbuffer = noreg;  // end of life
+  Rindex  = noreg;  // end of life
+
+  bind(callRuntime);
+
+  // Save Rpre_val (result) over runtime call.
+  // Requires Rtmp1, Rtmp2, or Rpre_val to be non-volatile.
+  Register Rpre_save = Rpre_val;
+  if (pre_val_needed && Rpre_val->is_volatile()) {
+    guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
+    Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
+  }
+  lgr_if_needed(Rpre_save, Rpre_val);
+
+  // Preserve inputs by spilling them into the top frame.
+  if (Robj != noreg && Robj->is_volatile()) {
+    z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+  }
+  if (offset.is_register() && offset.as_register()->is_volatile()) {
+    Register Roff = offset.as_register();
+    z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Rval != noreg && Rval->is_volatile()) {
+    z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+  }
+
+  // Push frame to protect top frame with return pc and spilled register values.
+  save_return_pc();
+  push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
+
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, Z_thread);
+
+  pop_frame();
+  restore_return_pc();
+
+  // Restore spilled values.
+  if (Robj != noreg && Robj->is_volatile()) {
+    z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+  }
+  if (offset.is_register() && offset.as_register()->is_volatile()) {
+    Register Roff = offset.as_register();
+    z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+  }
+  if (Rval != noreg && Rval->is_volatile()) {
+    z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+  }
+
+  // Restore Rpre_val (result) after runtime call.
+  lgr_if_needed(Rpre_val, Rpre_save);
+
+  bind(filtered);
+  BLOCK_COMMENT("} g1_write_barrier_pre");
+}
+
+// General G1 post-barrier generator.
+// Purpose: Store cross-region card.
+void MacroAssembler::g1_write_barrier_post(Register Rstore_addr,
+                                           Register Rnew_val,
+                                           Register Rtmp1,
+                                           Register Rtmp2,
+                                           Register Rtmp3) {
+  Label callRuntime, filtered;
+
+  assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+
+  BLOCK_COMMENT("g1_write_barrier_post {");
+
+  // Does store cross heap regions?
+  // It does if the two addresses specify different grain addresses.
+  if (G1RSBarrierRegionFilter) {
+    if (VM_Version::has_DistinctOpnds()) {
+      z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
+    } else {
+      z_lgr(Rtmp1, Rstore_addr);
+      z_xgr(Rtmp1, Rnew_val);
+    }
+    z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
+    z_bre(filtered);
+  }
+
+  // Crosses regions, storing NULL?
+#ifdef ASSERT
+  z_ltgr(Rnew_val, Rnew_val);
+  asm_assert_ne("null oop not allowed (G1)", 0x255); // TODO: also on z? Checked by caller on PPC64, so following branch is obsolete:
+  z_bre(filtered);  // Safety net: don't break if we have a NULL oop.
+#endif
+  Rnew_val = noreg; // end of lifetime
+
+  // Storing region crossing non-NULL, is card already dirty?
+  assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code");
+  assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
+  // Make sure not to use Z_R0 for any of these registers.
+  Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
+  Register Rbase      = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
+
+  // calculate address of card
+  load_const_optimized(Rbase, (address)bs->byte_map_base);        // Card table base.
+  z_srlg(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); // Index into card table.
+  add2reg_with_index(Rcard_addr, 0, Rcard_addr, Rbase);           // Explicit calculation needed for cli.
+  Rbase = noreg; // end of lifetime
+
+  // Filter young.
+  assert((unsigned int)G1SATBCardTableModRefBS::g1_young_card_val() <= 255, "otherwise check this code");
+  z_cli(0, Rcard_addr, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  z_bre(filtered);
+
+  // Check the card value. If dirty, we're done.
+  // This also avoids false sharing of the (already dirty) card.
+  z_sync(); // Required to support concurrent cleaning.
+  assert((unsigned int)CardTableModRefBS::dirty_card_val() <= 255, "otherwise check this code");
+  z_cli(0, Rcard_addr, CardTableModRefBS::dirty_card_val()); // Reload after membar.
+  z_bre(filtered);
+
+  // Storing a region crossing, non-NULL oop, card is clean.
+  // Dirty card and log.
+  z_mvi(0, Rcard_addr, CardTableModRefBS::dirty_card_val());
+
+  Register Rcard_addr_x = Rcard_addr;
+  Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
+  Register Rqueue_buf   = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
+  const int qidx_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_index());
+  const int qbuf_off    = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
+  if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
+    Rcard_addr_x = Z_R0_scratch;  // Register shortage. We have to use Z_R0.
+  }
+  lgr_if_needed(Rcard_addr_x, Rcard_addr);
+
+  load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
+  z_bre(callRuntime); // Index == 0 then jump to runtime.
+
+  z_lg(Rqueue_buf, qbuf_off, Z_thread);
+
+  add2reg(Rqueue_index, -wordSize); // Decrement index.
+  z_stg(Rqueue_index, qidx_off, Z_thread);
+
+  z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
+  z_bru(filtered);
+
+  bind(callRuntime);
+
+  // TODO: do we need a frame? Introduced to be on the safe side.
+  bool needs_frame = true;
+
+  // VM call need frame to access(write) O register.
+  if (needs_frame) {
+    save_return_pc();
+    push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
+  }
+
+  // Save the live input values.
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr_x, Z_thread);
+
+  if (needs_frame) {
+    pop_frame();
+    restore_return_pc();
+  }
+
+  bind(filtered);
+
+  BLOCK_COMMENT("} g1_write_barrier_post");
+}
+#endif // INCLUDE_ALL_GCS
+
+// Last_Java_sp must comply to the rules in frame_s390.hpp.
+void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
+  BLOCK_COMMENT("set_last_Java_frame {");
+
+  // Always set last_Java_pc and flags first because once last_Java_sp
+  // is visible has_last_Java_frame is true and users will look at the
+  // rest of the fields. (Note: flags should always be zero before we
+  // get here so doesn't need to be set.)
+
+  // Verify that last_Java_pc was zeroed on return to Java.
+  if (allow_relocation) {
+    asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
+                            Z_thread,
+                            "last_Java_pc not zeroed before leaving Java",
+                            0x200);
+  } else {
+    asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
+                                   Z_thread,
+                                   "last_Java_pc not zeroed before leaving Java",
+                                   0x200);
+  }
+
+  // When returning from calling out from Java mode the frame anchor's
+  // last_Java_pc will always be set to NULL. It is set here so that
+  // if we are doing a call to native (not VM) that we capture the
+  // known pc and don't have to rely on the native call having a
+  // standard frame linkage where we can find the pc.
+  if (last_Java_pc!=noreg) {
+    z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
+  }
+
+  // This membar release is not required on z/Architecture, since the sequence of stores
+  // in maintained. Nevertheless, we leave it in to document the required ordering.
+  // The implementation of z_release() should be empty.
+  // z_release();
+
+  z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
+  BLOCK_COMMENT("} set_last_Java_frame");
+}
+
+void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
+  BLOCK_COMMENT("reset_last_Java_frame {");
+
+  if (allow_relocation) {
+    asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
+                               Z_thread,
+                               "SP was not set, still zero",
+                               0x202);
+  } else {
+    asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
+                                      Z_thread,
+                                      "SP was not set, still zero",
+                                      0x202);
+  }
+
+  // _last_Java_sp = 0
+  // Clearing storage must be atomic here, so don't use clear_mem()!
+  store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
+
+  // _last_Java_pc = 0
+  store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
+
+  BLOCK_COMMENT("} reset_last_Java_frame");
+  return;
+}
+
+void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
+  assert_different_registers(sp, tmp1);
+
+  // We cannot trust that code generated by the C++ compiler saves R14
+  // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
+  // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
+  // Therefore we load the PC into tmp1 and let set_last_Java_frame() save
+  // it into the frame anchor.
+  get_PC(tmp1);
+  set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
+}
+
+void MacroAssembler::set_thread_state(JavaThreadState new_state) {
+  z_release();
+
+  assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
+  assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
+  store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result) {
+  verify_thread();
+
+  z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
+  clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*));
+
+  verify_oop(oop_result);
+}
+
+void MacroAssembler::get_vm_result_2(Register result) {
+  verify_thread();
+
+  z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset()));
+  clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*));
+}
+
+// We require that C code which does not return a value in vm_result will
+// leave it undisturbed.
+void MacroAssembler::set_vm_result(Register oop_result) {
+  z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
+}
+
+// Explicit null checks (used for method handle code).
+void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
+  if (!ImplicitNullChecks) {
+    NearLabel ok;
+
+    compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
+
+    // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
+    address exception_entry = Interpreter::throw_NullPointerException_entry();
+    load_absolute_address(reg, exception_entry);
+    z_br(reg);
+
+    bind(ok);
+  } else {
+    if (needs_explicit_null_check((intptr_t)offset)) {
+      // Provoke OS NULL exception if reg = NULL by
+      // accessing M[reg] w/o changing any registers.
+      z_lg(tmp, 0, reg);
+    }
+    // else
+      // Nothing to do, (later) access of M[reg + offset]
+      // will provoke OS NULL exception if reg = NULL.
+  }
+}
+
+//-------------------------------------
+//  Compressed Klass Pointers
+//-------------------------------------
+
+// Klass oop manipulations if compressed.
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+  Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
+  address  base    = Universe::narrow_klass_base();
+  int      shift   = Universe::narrow_klass_shift();
+  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+  BLOCK_COMMENT("cKlass encoder {");
+
+#ifdef ASSERT
+  Label ok;
+  z_tmll(current, KlassAlignmentInBytes-1); // Check alignment.
+  z_brc(Assembler::bcondAllZero, ok);
+  // The plain disassembler does not recognize illtrap. It instead displays
+  // a 32-bit value. Issueing two illtraps assures the disassembler finds
+  // the proper beginning of the next instruction.
+  z_illtrap(0xee);
+  z_illtrap(0xee);
+  bind(ok);
+#endif
+
+  if (base != NULL) {
+    unsigned int base_h = ((unsigned long)base)>>32;
+    unsigned int base_l = (unsigned int)((unsigned long)base);
+    if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+      lgr_if_needed(dst, current);
+      z_aih(dst, -((int)base_h));     // Base has no set bits in lower half.
+    } else if ((base_h == 0) && (base_l != 0)) {
+      lgr_if_needed(dst, current);
+      z_agfi(dst, -(int)base_l);
+    } else {
+      load_const(Z_R0, base);
+      lgr_if_needed(dst, current);
+      z_sgr(dst, Z_R0);
+    }
+    current = dst;
+  }
+  if (shift != 0) {
+    assert (LogKlassAlignmentInBytes == shift, "decode alg wrong");
+    z_srlg(dst, current, shift);
+    current = dst;
+  }
+  lgr_if_needed(dst, current); // Move may be required (if neither base nor shift != 0).
+
+  BLOCK_COMMENT("} cKlass encoder");
+}
+
+// This function calculates the size of the code generated by
+//   decode_klass_not_null(register dst, Register src)
+// when (Universe::heap() != NULL). Hence, if the instructions
+// it generates change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  address  base    = Universe::narrow_klass_base();
+  int shift_size   = Universe::narrow_klass_shift() == 0 ? 0 : 6; /* sllg */
+  int addbase_size = 0;
+  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+  if (base != NULL) {
+    unsigned int base_h = ((unsigned long)base)>>32;
+    unsigned int base_l = (unsigned int)((unsigned long)base);
+    if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+      addbase_size += 6; /* aih */
+    } else if ((base_h == 0) && (base_l != 0)) {
+      addbase_size += 6; /* algfi */
+    } else {
+      addbase_size += load_const_size();
+      addbase_size += 4; /* algr */
+    }
+  }
+#ifdef ASSERT
+  addbase_size += 10;
+  addbase_size += 2; // Extra sigill.
+#endif
+  return addbase_size + shift_size;
+}
+
+// !!! If the instructions that get generated here change
+//     then function instr_size_for_decode_klass_not_null()
+//     needs to get updated.
+// This variant of decode_klass_not_null() must generate predictable code!
+// The code must only depend on globally known parameters.
+void MacroAssembler::decode_klass_not_null(Register dst) {
+  address  base    = Universe::narrow_klass_base();
+  int      shift   = Universe::narrow_klass_shift();
+  int      beg_off = offset();
+  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+  BLOCK_COMMENT("cKlass decoder (const size) {");
+
+  if (shift != 0) { // Shift required?
+    z_sllg(dst, dst, shift);
+  }
+  if (base != NULL) {
+    unsigned int base_h = ((unsigned long)base)>>32;
+    unsigned int base_l = (unsigned int)((unsigned long)base);
+    if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+      z_aih(dst, base_h);     // Base has no set bits in lower half.
+    } else if ((base_h == 0) && (base_l != 0)) {
+      z_algfi(dst, base_l);   // Base has no set bits in upper half.
+    } else {
+      load_const(Z_R0, base); // Base has set bits everywhere.
+      z_algr(dst, Z_R0);
+    }
+  }
+
+#ifdef ASSERT
+  Label ok;
+  z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
+  z_brc(Assembler::bcondAllZero, ok);
+  // The plain disassembler does not recognize illtrap. It instead displays
+  // a 32-bit value. Issueing two illtraps assures the disassembler finds
+  // the proper beginning of the next instruction.
+  z_illtrap(0xd1);
+  z_illtrap(0xd1);
+  bind(ok);
+#endif
+  assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
+
+  BLOCK_COMMENT("} cKlass decoder (const size)");
+}
+
+// This variant of decode_klass_not_null() is for cases where
+//  1) the size of the generated instructions may vary
+//  2) the result is (potentially) stored in a register different from the source.
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+  address base  = Universe::narrow_klass_base();
+  int     shift = Universe::narrow_klass_shift();
+  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+  BLOCK_COMMENT("cKlass decoder {");
+
+  if (src == noreg) src = dst;
+
+  if (shift != 0) { // Shift or at least move required?
+    z_sllg(dst, src, shift);
+  } else {
+    lgr_if_needed(dst, src);
+  }
+
+  if (base != NULL) {
+    unsigned int base_h = ((unsigned long)base)>>32;
+    unsigned int base_l = (unsigned int)((unsigned long)base);
+    if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+      z_aih(dst, base_h);     // Base has not set bits in lower half.
+    } else if ((base_h == 0) && (base_l != 0)) {
+      z_algfi(dst, base_l);   // Base has no set bits in upper half.
+    } else {
+      load_const_optimized(Z_R0, base); // Base has set bits everywhere.
+      z_algr(dst, Z_R0);
+    }
+  }
+
+#ifdef ASSERT
+  Label ok;
+  z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
+  z_brc(Assembler::bcondAllZero, ok);
+  // The plain disassembler does not recognize illtrap. It instead displays
+  // a 32-bit value. Issueing two illtraps assures the disassembler finds
+  // the proper beginning of the next instruction.
+  z_illtrap(0xd2);
+  z_illtrap(0xd2);
+  bind(ok);
+#endif
+  BLOCK_COMMENT("} cKlass decoder");
+}
+
+void MacroAssembler::load_klass(Register klass, Address mem) {
+  if (UseCompressedClassPointers) {
+    z_llgf(klass, mem);
+    // Attention: no null check here!
+    decode_klass_not_null(klass);
+  } else {
+    z_lg(klass, mem);
+  }
+}
+
+void MacroAssembler::load_klass(Register klass, Register src_oop) {
+  if (UseCompressedClassPointers) {
+    z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
+    // Attention: no null check here!
+    decode_klass_not_null(klass);
+  } else {
+    z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
+  }
+}
+
+void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) {
+  assert_different_registers(Rheader, Rsrc_oop);
+  load_klass(Rheader, Rsrc_oop);
+  z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset()));
+}
+
+void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
+  if (UseCompressedClassPointers) {
+    assert_different_registers(dst_oop, klass, Z_R0);
+    if (ck == noreg) ck = klass;
+    encode_klass_not_null(ck, klass);
+    z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+  } else {
+    z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::store_klass_gap(Register s, Register d) {
+  if (UseCompressedClassPointers) {
+    assert(s != d, "not enough registers");
+    z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
+  }
+}
+
+// Compare klass ptr in memory against klass ptr in register.
+//
+// Rop1            - klass in register, always uncompressed.
+// disp            - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
+// Rbase           - Base address of cKlass in memory.
+// maybeNULL       - True if Rop1 possibly is a NULL.
+void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) {
+
+  BLOCK_COMMENT("compare klass ptr {");
+
+  if (UseCompressedClassPointers) {
+    const int shift = Universe::narrow_klass_shift();
+    address   base  = Universe::narrow_klass_base();
+
+    assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift");
+    assert_different_registers(Rop1, Z_R0);
+    assert_different_registers(Rop1, Rbase, Z_R1);
+
+    // First encode register oop and then compare with cOop in memory.
+    // This sequence saves an unnecessary cOop load and decode.
+    if (base == NULL) {
+      if (shift == 0) {
+        z_cl(Rop1, disp, Rbase);     // Unscaled
+      } else {
+        z_srlg(Z_R0, Rop1, shift);   // ZeroBased
+        z_cl(Z_R0, disp, Rbase);
+      }
+    } else {                         // HeapBased
+#ifdef ASSERT
+      bool     used_R0 = true;
+      bool     used_R1 = true;
+#endif
+      Register current = Rop1;
+      Label    done;
+
+      if (maybeNULL) {       // NULL ptr must be preserved!
+        z_ltgr(Z_R0, current);
+        z_bre(done);
+        current = Z_R0;
+      }
+
+      unsigned int base_h = ((unsigned long)base)>>32;
+      unsigned int base_l = (unsigned int)((unsigned long)base);
+      if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+        lgr_if_needed(Z_R0, current);
+        z_aih(Z_R0, -((int)base_h));     // Base has no set bits in lower half.
+      } else if ((base_h == 0) && (base_l != 0)) {
+        lgr_if_needed(Z_R0, current);
+        z_agfi(Z_R0, -(int)base_l);
+      } else {
+        int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
+        add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
+      }
+
+      if (shift != 0) {
+        z_srlg(Z_R0, Z_R0, shift);
+      }
+      bind(done);
+      z_cl(Z_R0, disp, Rbase);
+#ifdef ASSERT
+      if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
+      if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
+#endif
+    }
+  } else {
+    z_clg(Rop1, disp, Z_R0, Rbase);
+  }
+  BLOCK_COMMENT("} compare klass ptr");
+}
+
+//---------------------------
+//  Compressed oops
+//---------------------------
+
+void MacroAssembler::encode_heap_oop(Register oop) {
+  oop_encoder(oop, oop, true /*maybe null*/);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register oop) {
+  oop_encoder(oop, oop, false /*not null*/);
+}
+
+// Called with something derived from the oop base. e.g. oop_base>>3.
+int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
+  unsigned int oop_base_ll = ((unsigned int)(oop_base >>  0)) & 0xffff;
+  unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
+  unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
+  unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
+  unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
+                               + (oop_base_lh == 0 ? 0:1)
+                               + (oop_base_hl == 0 ? 0:1)
+                               + (oop_base_hh == 0 ? 0:1);
+
+  assert(oop_base != 0, "This is for HeapBased cOops only");
+
+  if (n_notzero_parts != 1) { //  Check if oop_base is just a few pages shy of a power of 2.
+    uint64_t pow2_offset = 0x10000 - oop_base_ll;
+    if (pow2_offset < 0x8000) {  // This might not be necessary.
+      uint64_t oop_base2 = oop_base + pow2_offset;
+
+      oop_base_ll = ((unsigned int)(oop_base2 >>  0)) & 0xffff;
+      oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
+      oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
+      oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
+      n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
+                        (oop_base_lh == 0 ? 0:1) +
+                        (oop_base_hl == 0 ? 0:1) +
+                        (oop_base_hh == 0 ? 0:1);
+      if (n_notzero_parts == 1) {
+        assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
+        return -pow2_offset;
+      }
+    }
+  }
+  return 0;
+}
+
+// If base address is offset from a straight power of two by just a few pages,
+// return this offset to the caller for a possible later composite add.
+// TODO/FIX: will only work correctly for 4k pages.
+int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
+  int pow2_offset = get_oop_base_pow2_offset(oop_base);
+
+  load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
+
+  return pow2_offset;
+}
+
+int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
+  int offset = get_oop_base(Rbase, oop_base);
+  z_lcgr(Rbase, Rbase);
+  return -offset;
+}
+
+// Compare compressed oop in memory against oop in register.
+// Rop1            - Oop in register.
+// disp            - Offset of cOop in memory.
+// Rbase           - Base address of cOop in memory.
+// maybeNULL       - True if Rop1 possibly is a NULL.
+// maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction.
+void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) {
+  Register Rbase  = mem.baseOrR0();
+  Register Rindex = mem.indexOrR0();
+  int64_t  disp   = mem.disp();
+
+  const int shift = Universe::narrow_oop_shift();
+  address   base  = Universe::narrow_oop_base();
+
+  assert(UseCompressedOops, "must be on to call this method");
+  assert(Universe::heap() != NULL, "java heap must be initialized to call this method");
+  assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
+  assert_different_registers(Rop1, Z_R0);
+  assert_different_registers(Rop1, Rbase, Z_R1);
+  assert_different_registers(Rop1, Rindex, Z_R1);
+
+  BLOCK_COMMENT("compare heap oop {");
+
+  // First encode register oop and then compare with cOop in memory.
+  // This sequence saves an unnecessary cOop load and decode.
+  if (base == NULL) {
+    if (shift == 0) {
+      z_cl(Rop1, disp, Rindex, Rbase);  // Unscaled
+    } else {
+      z_srlg(Z_R0, Rop1, shift);        // ZeroBased
+      z_cl(Z_R0, disp, Rindex, Rbase);
+    }
+  } else {                              // HeapBased
+#ifdef ASSERT
+    bool  used_R0 = true;
+    bool  used_R1 = true;
+#endif
+    Label done;
+    int   pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
+
+    if (maybeNULL) {       // NULL ptr must be preserved!
+      z_ltgr(Z_R0, Rop1);
+      z_bre(done);
+    }
+
+    add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
+    z_srlg(Z_R0, Z_R0, shift);
+
+    bind(done);
+    z_cl(Z_R0, disp, Rindex, Rbase);
+#ifdef ASSERT
+    if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
+    if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
+#endif
+  }
+  BLOCK_COMMENT("} compare heap oop");
+}
+
+// Load heap oop and decompress, if necessary.
+void  MacroAssembler::load_heap_oop(Register dest, const Address &a) {
+  if (UseCompressedOops) {
+    z_llgf(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
+    oop_decoder(dest, dest, true);
+  } else {
+    z_lg(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
+  }
+}
+
+// Load heap oop and decompress, if necessary.
+void MacroAssembler::load_heap_oop(Register dest, int64_t disp, Register base) {
+  if (UseCompressedOops) {
+    z_llgf(dest, disp, base);
+    oop_decoder(dest, dest, true);
+  } else {
+    z_lg(dest, disp, base);
+  }
+}
+
+// Load heap oop and decompress, if necessary.
+void MacroAssembler::load_heap_oop_not_null(Register dest, int64_t disp, Register base) {
+  if (UseCompressedOops) {
+    z_llgf(dest, disp, base);
+    oop_decoder(dest, dest, false);
+  } else {
+    z_lg(dest, disp, base);
+  }
+}
+
+// Compress, if necessary, and store oop to heap.
+void MacroAssembler::store_heap_oop(Register Roop, RegisterOrConstant offset, Register base) {
+  Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+  if (UseCompressedOops) {
+    assert_different_registers(Roop, offset.register_or_noreg(), base);
+    encode_heap_oop(Roop);
+    z_st(Roop, offset.constant_or_zero(), Ridx, base);
+  } else {
+    z_stg(Roop, offset.constant_or_zero(), Ridx, base);
+  }
+}
+
+// Compress, if necessary, and store oop to heap. Oop is guaranteed to be not NULL.
+void MacroAssembler::store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base) {
+  Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+  if (UseCompressedOops) {
+    assert_different_registers(Roop, offset.register_or_noreg(), base);
+    encode_heap_oop_not_null(Roop);
+    z_st(Roop, offset.constant_or_zero(), Ridx, base);
+  } else {
+    z_stg(Roop, offset.constant_or_zero(), Ridx, base);
+  }
+}
+
+// Store NULL oop to heap.
+void MacroAssembler::store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base) {
+  Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+  if (UseCompressedOops) {
+    z_st(zero, offset.constant_or_zero(), Ridx, base);
+  } else {
+    z_stg(zero, offset.constant_or_zero(), Ridx, base);
+  }
+}
+
+//-------------------------------------------------
+// Encode compressed oop. Generally usable encoder.
+//-------------------------------------------------
+// Rsrc - contains regular oop on entry. It remains unchanged.
+// Rdst - contains compressed oop on exit.
+// Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
+//
+// Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
+// Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
+//
+// only32bitValid is set, if later code only uses the lower 32 bits. In this
+// case we must not fix the upper 32 bits.
+void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
+                                 Register Rbase, int pow2_offset, bool only32bitValid) {
+
+  const address oop_base  = Universe::narrow_oop_base();
+  const int     oop_shift = Universe::narrow_oop_shift();
+  const bool    disjoint  = Universe::narrow_oop_base_disjoint();
+
+  assert(UseCompressedOops, "must be on to call this method");
+  assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder");
+  assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
+
+  if (disjoint || (oop_base == NULL)) {
+    BLOCK_COMMENT("cOop encoder zeroBase {");
+    if (oop_shift == 0) {
+      if (oop_base != NULL && !only32bitValid) {
+        z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
+      } else {
+        lgr_if_needed(Rdst, Rsrc);
+      }
+    } else {
+      z_srlg(Rdst, Rsrc, oop_shift);
+      if (oop_base != NULL && !only32bitValid) {
+        z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+      }
+    }
+    BLOCK_COMMENT("} cOop encoder zeroBase");
+    return;
+  }
+
+  bool used_R0 = false;
+  bool used_R1 = false;
+
+  BLOCK_COMMENT("cOop encoder general {");
+  assert_different_registers(Rdst, Z_R1);
+  assert_different_registers(Rsrc, Rbase);
+  if (maybeNULL) {
+    Label done;
+    // We reorder shifting and subtracting, so that we can compare
+    // and shift in parallel:
+    //
+    // cycle 0:  potential LoadN, base = <const>
+    // cycle 1:  base = !base     dst = src >> 3,    cmp cr = (src != 0)
+    // cycle 2:  if (cr) br,      dst = dst + base + offset
+
+    // Get oop_base components.
+    if (pow2_offset == -1) {
+      if (Rdst == Rbase) {
+        if (Rdst == Z_R1 || Rsrc == Z_R1) {
+          Rbase = Z_R0;
+          used_R0 = true;
+        } else {
+          Rdst = Z_R1;
+          used_R1 = true;
+        }
+      }
+      if (Rbase == Z_R1) {
+        used_R1 = true;
+      }
+      pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
+    }
+    assert_different_registers(Rdst, Rbase);
+
+    // Check for NULL oop (must be left alone) and shift.
+    if (oop_shift != 0) {  // Shift out alignment bits
+      if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
+        z_srag(Rdst, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
+      } else {
+        z_srlg(Rdst, Rsrc, oop_shift);
+        z_ltgr(Rsrc, Rsrc);  // This is the recommended way of testing for zero.
+        // This probably is faster, as it does not write a register. No!
+        // z_cghi(Rsrc, 0);
+      }
+    } else {
+      z_ltgr(Rdst, Rsrc);   // Move NULL to result register.
+    }
+    z_bre(done);
+
+    // Subtract oop_base components.
+    if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
+      z_algr(Rdst, Rbase);
+      if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
+    } else {
+      add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
+    }
+    if (!only32bitValid) {
+      z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+    }
+    bind(done);
+
+  } else {  // not null
+    // Get oop_base components.
+    if (pow2_offset == -1) {
+      pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
+    }
+
+    // Subtract oop_base components and shift.
+    if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
+      // Don't use lay instruction.
+      if (Rdst == Rsrc) {
+        z_algr(Rdst, Rbase);
+      } else {
+        lgr_if_needed(Rdst, Rbase);
+        z_algr(Rdst, Rsrc);
+      }
+      if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
+    } else {
+      add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
+    }
+    if (oop_shift != 0) {   // Shift out alignment bits.
+      z_srlg(Rdst, Rdst, oop_shift);
+    }
+    if (!only32bitValid) {
+      z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+    }
+  }
+#ifdef ASSERT
+  if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
+  if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
+#endif
+  BLOCK_COMMENT("} cOop encoder general");
+}
+
+//-------------------------------------------------
+// decode compressed oop. Generally usable decoder.
+//-------------------------------------------------
+// Rsrc - contains compressed oop on entry.
+// Rdst - contains regular oop on exit.
+// Rdst and Rsrc may indicate same register.
+// Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
+// Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
+// Rbase - register to use for the base
+// pow2_offset - offset of base to nice value. If -1, base must be loaded.
+// For performance, it is good to
+//  - avoid Z_R0 for any of the argument registers.
+//  - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
+//  - avoid Z_R1 for Rdst if Rdst == Rbase.
+void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) {
+
+  const address oop_base  = Universe::narrow_oop_base();
+  const int     oop_shift = Universe::narrow_oop_shift();
+  const bool    disjoint  = Universe::narrow_oop_base_disjoint();
+
+  assert(UseCompressedOops, "must be on to call this method");
+  assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder");
+  assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
+         "cOop encoder detected bad shift");
+
+  // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
+
+  if (oop_base != NULL) {
+    unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
+    unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
+    unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
+    if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
+      BLOCK_COMMENT("cOop decoder disjointBase {");
+      // We do not need to load the base. Instead, we can install the upper bits
+      // with an OR instead of an ADD.
+      Label done;
+
+      // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
+      if (maybeNULL) {  // NULL ptr must be preserved!
+        z_slag(Rdst, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
+        z_bre(done);
+      } else {
+        z_sllg(Rdst, Rsrc, oop_shift);  // Logical shift leaves condition code alone.
+      }
+      if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
+        z_oihf(Rdst, oop_base_hf);
+      } else if (oop_base_hl != 0) {
+        z_oihl(Rdst, oop_base_hl);
+      } else {
+        assert(oop_base_hh != 0, "not heapbased mode");
+        z_oihh(Rdst, oop_base_hh);
+      }
+      bind(done);
+      BLOCK_COMMENT("} cOop decoder disjointBase");
+    } else {
+      BLOCK_COMMENT("cOop decoder general {");
+      // There are three decode steps:
+      //   scale oop offset (shift left)
+      //   get base (in reg) and pow2_offset (constant)
+      //   add base, pow2_offset, and oop offset
+      // The following register overlap situations may exist:
+      // Rdst == Rsrc,  Rbase any other
+      //   not a problem. Scaling in-place leaves Rbase undisturbed.
+      //   Loading Rbase does not impact the scaled offset.
+      // Rdst == Rbase, Rsrc  any other
+      //   scaling would destroy a possibly preloaded Rbase. Loading Rbase
+      //   would destroy the scaled offset.
+      //   Remedy: use Rdst_tmp if Rbase has been preloaded.
+      //           use Rbase_tmp if base has to be loaded.
+      // Rsrc == Rbase, Rdst  any other
+      //   Only possible without preloaded Rbase.
+      //   Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
+      // Rsrc == Rbase, Rdst == Rbase
+      //   Only possible without preloaded Rbase.
+      //   Loading Rbase would destroy compressed oop. Scaling in-place is ok.
+      //   Remedy: use Rbase_tmp.
+      //
+      Label    done;
+      Register Rdst_tmp       = Rdst;
+      Register Rbase_tmp      = Rbase;
+      bool     used_R0        = false;
+      bool     used_R1        = false;
+      bool     base_preloaded = pow2_offset >= 0;
+      guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
+      assert(oop_shift != 0, "room for optimization");
+
+      // Check if we need to use scratch registers.
+      if (Rdst == Rbase) {
+        assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
+        if (Rdst != Rsrc) {
+          if (base_preloaded) { Rdst_tmp  = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
+          else                { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
+        } else {
+          Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
+        }
+      }
+      if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
+
+      // Scale oop and check for NULL.
+      // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
+      if (maybeNULL) {  // NULL ptr must be preserved!
+        z_slag(Rdst_tmp, Rsrc, oop_shift);  // Arithmetic shift sets the condition code.
+        z_bre(done);
+      } else {
+        z_sllg(Rdst_tmp, Rsrc, oop_shift);  // Logical shift leaves condition code alone.
+      }
+
+      // Get oop_base components.
+      if (!base_preloaded) {
+        pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
+      }
+
+      // Add up all components.
+      if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
+        z_algr(Rdst_tmp, Rbase_tmp);
+        if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
+      } else {
+        add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
+      }
+
+      bind(done);
+      lgr_if_needed(Rdst, Rdst_tmp);
+#ifdef ASSERT
+      if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
+      if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
+#endif
+      BLOCK_COMMENT("} cOop decoder general");
+    }
+  } else {
+    BLOCK_COMMENT("cOop decoder zeroBase {");
+    if (oop_shift == 0) {
+      lgr_if_needed(Rdst, Rsrc);
+    } else {
+      z_sllg(Rdst, Rsrc, oop_shift);
+    }
+    BLOCK_COMMENT("} cOop decoder zeroBase");
+  }
+}
+
+void MacroAssembler::load_mirror(Register mirror, Register method) {
+  mem2reg_opt(mirror, Address(method, Method::const_offset()));
+  mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
+  mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
+  mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
+}
+
+//---------------------------------------------------------------
+//---  Operations on arrays.
+//---------------------------------------------------------------
+
+// Compiler ensures base is doubleword aligned and cnt is #doublewords.
+// Emitter does not KILL cnt and base arguments, since they need to be copied to
+// work registers anyway.
+// Actually, only r0, r1, and r5 are killed.
+unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
+  // Src_addr is evenReg.
+  // Src_len is odd_Reg.
+
+  int      block_start = offset();
+  Register tmp_reg  = src_len; // Holds target instr addr for EX.
+  Register dst_len  = Z_R1;    // Holds dst len  for MVCLE.
+  Register dst_addr = Z_R0;    // Holds dst addr for MVCLE.
+
+  Label doXC, doMVCLE, done;
+
+  BLOCK_COMMENT("Clear_Array {");
+
+  // Check for zero len and convert to long.
+  z_ltgfr(src_len, cnt_arg);      // Remember casted value for doSTG case.
+  z_bre(done);                    // Nothing to do if len == 0.
+
+  // Prefetch data to be cleared.
+  if (VM_Version::has_Prefetch()) {
+    z_pfd(0x02,   0, Z_R0, base_pointer_arg);
+    z_pfd(0x02, 256, Z_R0, base_pointer_arg);
+  }
+
+  z_sllg(dst_len, src_len, 3);    // #bytes to clear.
+  z_cghi(src_len, 32);            // Check for len <= 256 bytes (<=32 DW).
+  z_brnh(doXC);                   // If so, use executed XC to clear.
+
+  // MVCLE: initialize long arrays (general case).
+  bind(doMVCLE);
+  z_lgr(dst_addr, base_pointer_arg);
+  clear_reg(src_len, true, false); // Src len of MVCLE is zero.
+
+  MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+  z_bru(done);
+
+  // XC: initialize short arrays.
+  Label XC_template; // Instr template, never exec directly!
+    bind(XC_template);
+    z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
+
+  bind(doXC);
+    add2reg(dst_len, -1);             // Get #bytes-1 for EXECUTE.
+    if (VM_Version::has_ExecuteExtensions()) {
+      z_exrl(dst_len, XC_template);   // Execute XC with var. len.
+    } else {
+      z_larl(tmp_reg, XC_template);
+      z_ex(dst_len,0,Z_R0,tmp_reg);   // Execute XC with var. len.
+    }
+    // z_bru(done);      // fallthru
+
+  bind(done);
+
+  BLOCK_COMMENT("} Clear_Array");
+
+  int block_end = offset();
+  return block_end - block_start;
+}
+
+// Compiler ensures base is doubleword aligned and cnt is count of doublewords.
+// Emitter does not KILL any arguments nor work registers.
+// Emitter generates up to 16 XC instructions, depending on the array length.
+unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
+  int  block_start    = offset();
+  int  off;
+  int  lineSize_Bytes = AllocatePrefetchStepSize;
+  int  lineSize_DW    = AllocatePrefetchStepSize>>LogBytesPerWord;
+  bool doPrefetch     = VM_Version::has_Prefetch();
+  int  XC_maxlen      = 256;
+  int  numXCInstr     = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
+
+  BLOCK_COMMENT("Clear_Array_Const {");
+  assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
+
+  // Do less prefetching for very short arrays.
+  if (numXCInstr > 0) {
+    // Prefetch only some cache lines, then begin clearing.
+    if (doPrefetch) {
+      if (cnt*BytesPerWord <= lineSize_Bytes/4) {  // If less than 1/4 of a cache line to clear,
+        z_pfd(0x02, 0, Z_R0, base);                // prefetch just the first cache line.
+      } else {
+        assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
+        for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
+          z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
+        }
+      }
+    }
+
+    for (off=0; off<(numXCInstr-1); off++) {
+      z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
+
+      // Prefetch some cache lines in advance.
+      if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
+        z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
+      }
+    }
+    if (off*XC_maxlen < cnt*BytesPerWord) {
+      z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
+    }
+  }
+  BLOCK_COMMENT("} Clear_Array_Const");
+
+  int block_end = offset();
+  return block_end - block_start;
+}
+
+// Compiler ensures base is doubleword aligned and cnt is #doublewords.
+// Emitter does not KILL cnt and base arguments, since they need to be copied to
+// work registers anyway.
+// Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
+//
+// For very large arrays, exploit MVCLE H/W support.
+// MVCLE instruction automatically exploits H/W-optimized page mover.
+// - Bytes up to next page boundary are cleared with a series of XC to self.
+// - All full pages are cleared with the page mover H/W assist.
+// - Remaining bytes are again cleared by a series of XC to self.
+//
+unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
+  // Src_addr is evenReg.
+  // Src_len is odd_Reg.
+
+  int      block_start = offset();
+  Register dst_len  = Z_R1;      // Holds dst len  for MVCLE.
+  Register dst_addr = Z_R0;      // Holds dst addr for MVCLE.
+
+  BLOCK_COMMENT("Clear_Array_Const_Big {");
+
+  // Get len to clear.
+  load_const_optimized(dst_len, (long)cnt*8L);  // in Bytes = #DW*8
+
+  // Prepare other args to MVCLE.
+  z_lgr(dst_addr, base_pointer_arg);
+  // Indicate unused result.
+  (void) clear_reg(src_len, true, false);  // Src len of MVCLE is zero.
+
+  // Clear.
+  MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+  BLOCK_COMMENT("} Clear_Array_Const_Big");
+
+  int block_end = offset();
+  return block_end - block_start;
+}
+
+// Allocator.
+unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
+                                                           Register cnt_reg,
+                                                           Register tmp1_reg, Register tmp2_reg) {
+  // Tmp1 is oddReg.
+  // Tmp2 is evenReg.
+
+  int block_start = offset();
+  Label doMVC, doMVCLE, done, MVC_template;
+
+  BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
+
+  // Check for zero len and convert to long.
+  z_ltgfr(cnt_reg, cnt_reg);      // Remember casted value for doSTG case.
+  z_bre(done);                    // Nothing to do if len == 0.
+
+  z_sllg(Z_R1, cnt_reg, 3);       // Dst len in bytes. calc early to have the result ready.
+
+  z_cghi(cnt_reg, 32);            // Check for len <= 256 bytes (<=32 DW).
+  z_brnh(doMVC);                  // If so, use executed MVC to clear.
+
+  bind(doMVCLE);                  // A lot of data (more than 256 bytes).
+  // Prep dest reg pair.
+  z_lgr(Z_R0, dst_reg);           // dst addr
+  // Dst len already in Z_R1.
+  // Prep src reg pair.
+  z_lgr(tmp2_reg, src_reg);       // src addr
+  z_lgr(tmp1_reg, Z_R1);          // Src len same as dst len.
+
+  // Do the copy.
+  move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
+  z_bru(done);                         // All done.
+
+  bind(MVC_template);             // Just some data (not more than 256 bytes).
+  z_mvc(0, 0, dst_reg, 0, src_reg);
+
+  bind(doMVC);
+
+  if (VM_Version::has_ExecuteExtensions()) {
+    add2reg(Z_R1, -1);
+  } else {
+    add2reg(tmp1_reg, -1, Z_R1);
+    z_larl(Z_R1, MVC_template);
+  }
+
+  if (VM_Version::has_Prefetch()) {
+    z_pfd(1,  0,Z_R0,src_reg);
+    z_pfd(2,  0,Z_R0,dst_reg);
+    //    z_pfd(1,256,Z_R0,src_reg);    // Assume very short copy.
+    //    z_pfd(2,256,Z_R0,dst_reg);
+  }
+
+  if (VM_Version::has_ExecuteExtensions()) {
+    z_exrl(Z_R1, MVC_template);
+  } else {
+    z_ex(tmp1_reg, 0, Z_R0, Z_R1);
+  }
+
+  bind(done);
+
+  BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
+
+  int block_end = offset();
+  return block_end - block_start;
+}
+
+//------------------------------------------------------
+//   Special String Intrinsics. Implementation
+//------------------------------------------------------
+
+// Intrinsics for CompactStrings
+
+// Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result
+// The result is the number of characters copied before the first incompatible character was found.
+// If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise.
+//
+// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
+// - Different number of characters may have been written to dead array (if tmp2 not provided).
+// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
+unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register odd_reg,
+                                             Register even_reg, Register tmp, Register tmp2) {
+  int block_start = offset();
+  Label Lloop1, Lloop2, Lslow, Ldone;
+  const Register addr2 = dst, ind1 = result, mask = tmp;
+  const bool precise = (tmp2 != noreg);
+
+  BLOCK_COMMENT("string_compress {");
+
+  z_sll(odd_reg, 1);       // Number of bytes to read. (Must be a positive simm32.)
+  clear_reg(ind1);         // Index to read.
+  z_llilf(mask, 0xFF00FF00);
+  z_ahi(odd_reg, -16);     // Last possible index for fast loop.
+  z_brl(Lslow);
+
+  // ind1: index, even_reg: index increment, odd_reg: index limit
+  z_iihf(mask, 0xFF00FF00);
+  z_lhi(even_reg, 16);
+
+  bind(Lloop1); // 8 Characters per iteration.
+  z_lg(Z_R0, Address(src, ind1));
+  z_lg(Z_R1, Address(src, ind1, 8));
+  if (precise) {
+    if (VM_Version::has_DistinctOpnds()) {
+      z_ogrk(tmp2, Z_R0, Z_R1);
+    } else {
+      z_lgr(tmp2, Z_R0);
+      z_ogr(tmp2, Z_R1);
+    }
+    z_ngr(tmp2, mask);
+    z_brne(Lslow);         // Failed fast case, retry slowly.
+  }
+  z_stcmh(Z_R0, 5, 0, addr2);
+  z_stcm(Z_R0, 5, 2, addr2);
+  if (!precise) { z_ogr(Z_R0, Z_R1); }
+  z_stcmh(Z_R1, 5, 4, addr2);
+  z_stcm(Z_R1, 5, 6, addr2);
+  if (!precise) {
+    z_ngr(Z_R0, mask);
+    z_brne(Ldone);         // Failed (more than needed was written).
+  }
+  z_aghi(addr2, 8);
+  z_brxle(ind1, even_reg, Lloop1);
+
+  bind(Lslow);
+  // Compute index limit and skip if negative.
+  z_ahi(odd_reg, 16-2);    // Last possible index for slow loop.
+  z_lhi(even_reg, 2);
+  z_cr(ind1, odd_reg);
+  z_brh(Ldone);
+
+  bind(Lloop2); // 1 Character per iteration.
+  z_llh(Z_R0, Address(src, ind1));
+  z_tmll(Z_R0, 0xFF00);
+  z_brnaz(Ldone);          // Failed slow case: Return number of written characters.
+  z_stc(Z_R0, Address(addr2));
+  z_aghi(addr2, 1);
+  z_brxle(ind1, even_reg, Lloop2);
+
+  bind(Ldone);             // result = ind1 = 2*cnt
+  z_srl(ind1, 1);
+
+  BLOCK_COMMENT("} string_compress");
+
+  return offset() - block_start;
+}
+
+// Inflate byte[] to char[].
+unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
+  int block_start = offset();
+
+  BLOCK_COMMENT("string_inflate {");
+
+  Register stop_char = Z_R0;
+  Register table     = Z_R1;
+  Register src_addr  = tmp;
+
+  assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
+  assert(dst->encoding()%2 == 0, "must be even reg");
+  assert(cnt->encoding()%2 == 1, "must be odd reg");
+  assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
+
+  StubRoutines::zarch::generate_load_trot_table_addr(this, table);  // kills Z_R0 (if ASSERT)
+  clear_reg(stop_char);  // Stop character. Not used here, but initialized to have a defined value.
+  lgr_if_needed(src_addr, src);
+  z_llgfr(cnt, cnt);     // # src characters, must be a positive simm32.
+
+  translate_ot(dst, src_addr, /* mask = */ 0x0001);
+
+  BLOCK_COMMENT("} string_inflate");
+
+  return offset() - block_start;
+}
+
+// Inflate byte[] to char[]. odd_reg contains cnt. Kills src.
+unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg,
+                                            Register even_reg, Register tmp) {
+  int block_start = offset();
+
+  BLOCK_COMMENT("string_inflate {");
+
+  Label Lloop1, Lloop2, Lslow, Ldone;
+  const Register addr1 = src, ind2 = tmp;
+
+  z_sll(odd_reg, 1);       // Number of bytes to write. (Must be a positive simm32.)
+  clear_reg(ind2);         // Index to write.
+  z_ahi(odd_reg, -16);     // Last possible index for fast loop.
+  z_brl(Lslow);
+
+  // ind2: index, even_reg: index increment, odd_reg: index limit
+  clear_reg(Z_R0);
+  clear_reg(Z_R1);
+  z_lhi(even_reg, 16);
+
+  bind(Lloop1); // 8 Characters per iteration.
+  z_icmh(Z_R0, 5, 0, addr1);
+  z_icmh(Z_R1, 5, 4, addr1);
+  z_icm(Z_R0, 5, 2, addr1);
+  z_icm(Z_R1, 5, 6, addr1);
+  z_aghi(addr1, 8);
+  z_stg(Z_R0, Address(dst, ind2));
+  z_stg(Z_R1, Address(dst, ind2, 8));
+  z_brxle(ind2, even_reg, Lloop1);
+
+  bind(Lslow);
+  // Compute index limit and skip if negative.
+  z_ahi(odd_reg, 16-2);    // Last possible index for slow loop.
+  z_lhi(even_reg, 2);
+  z_cr(ind2, odd_reg);
+  z_brh(Ldone);
+
+  bind(Lloop2); // 1 Character per iteration.
+  z_llc(Z_R0, Address(addr1));
+  z_sth(Z_R0, Address(dst, ind2));
+  z_aghi(addr1, 1);
+  z_brxle(ind2, even_reg, Lloop2);
+
+  bind(Ldone);
+
+  BLOCK_COMMENT("} string_inflate");
+
+  return offset() - block_start;
+}
+
+// Kills src.
+unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
+                                           Register odd_reg, Register even_reg, Register tmp) {
+  int block_start = offset();
+  Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
+  const Register addr = src, mask = tmp;
+
+  BLOCK_COMMENT("has_negatives {");
+
+  z_llgfr(Z_R1, cnt);      // Number of bytes to read. (Must be a positive simm32.)
+  z_llilf(mask, 0x80808080);
+  z_lhi(result, 1);        // Assume true.
+  // Last possible addr for fast loop.
+  z_lay(odd_reg, -16, Z_R1, src);
+  z_chi(cnt, 16);
+  z_brl(Lslow);
+
+  // ind1: index, even_reg: index increment, odd_reg: index limit
+  z_iihf(mask, 0x80808080);
+  z_lghi(even_reg, 16);
+
+  bind(Lloop1); // 16 bytes per iteration.
+  z_lg(Z_R0, Address(addr));
+  z_lg(Z_R1, Address(addr, 8));
+  z_ogr(Z_R0, Z_R1);
+  z_ngr(Z_R0, mask);
+  z_brne(Ldone);           // If found return 1.
+  z_brxlg(addr, even_reg, Lloop1);
+
+  bind(Lslow);
+  z_aghi(odd_reg, 16-1);   // Last possible addr for slow loop.
+  z_lghi(even_reg, 1);
+  z_cgr(addr, odd_reg);
+  z_brh(Lnotfound);
+
+  bind(Lloop2); // 1 byte per iteration.
+  z_cli(Address(addr), 0x80);
+  z_brnl(Ldone);           // If found return 1.
+  z_brxlg(addr, even_reg, Lloop2);
+
+  bind(Lnotfound);
+  z_lhi(result, 0);
+
+  bind(Ldone);
+
+  BLOCK_COMMENT("} has_negatives");
+
+  return offset() - block_start;
+}
+
+// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result
+unsigned int MacroAssembler::string_compare(Register str1, Register str2,
+                                            Register cnt1, Register cnt2,
+                                            Register odd_reg, Register even_reg, Register result, int ae) {
+  int block_start = offset();
+
+  assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);
+  assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);
+
+  // If strings are equal up to min length, return the length difference.
+  const Register diff = result, // Pre-set result with length difference.
+                 min  = cnt1,   // min number of bytes
+                 tmp  = cnt2;
+
+  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+  // we interchange str1 and str2 in the UL case and negate the result.
+  // Like this, str1 is always latin1 encoded, except for the UU case.
+  // In addition, we need 0 (or sign which is 0) extend when using 64 bit register.
+  const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
+
+  BLOCK_COMMENT("string_compare {");
+
+  if (used_as_LU) {
+    z_srl(cnt2, 1);
+  }
+
+  // See if the lengths are different, and calculate min in cnt1.
+  // Save diff in case we need it for a tie-breaker.
+
+  // diff = cnt1 - cnt2
+  if (VM_Version::has_DistinctOpnds()) {
+    z_srk(diff, cnt1, cnt2);
+  } else {
+    z_lr(diff, cnt1);
+    z_sr(diff, cnt2);
+  }
+  if (str1 != str2) {
+    if (VM_Version::has_LoadStoreConditional()) {
+      z_locr(min, cnt2, Assembler::bcondHigh);
+    } else {
+      Label Lskip;
+      z_brl(Lskip);    // min ok if cnt1 < cnt2
+      z_lr(min, cnt2); // min = cnt2
+      bind(Lskip);
+    }
+  }
+
+  if (ae == StrIntrinsicNode::UU) {
+    z_sra(diff, 1);
+  }
+  if (str1 != str2) {
+    Label Ldone;
+    if (used_as_LU) {
+      // Loop which searches the first difference character by character.
+      Label Lloop;
+      const Register ind1 = Z_R1,
+                     ind2 = min;
+      int stride1 = 1, stride2 = 2; // See comment above.
+
+      // ind1: index, even_reg: index increment, odd_reg: index limit
+      z_llilf(ind1, (unsigned int)(-stride1));
+      z_lhi(even_reg, stride1);
+      add2reg(odd_reg, -stride1, min);
+      clear_reg(ind2); // kills min
+
+      bind(Lloop);
+      z_brxh(ind1, even_reg, Ldone);
+      z_llc(tmp, Address(str1, ind1));
+      z_llh(Z_R0, Address(str2, ind2));
+      z_ahi(ind2, stride2);
+      z_sr(tmp, Z_R0);
+      z_bre(Lloop);
+
+      z_lr(result, tmp);
+
+    } else {
+      // Use clcle in fast loop (only for same encoding).
+      z_lgr(Z_R0, str1);
+      z_lgr(even_reg, str2);
+      z_llgfr(Z_R1, min);
+      z_llgfr(odd_reg, min);
+
+      if (ae == StrIntrinsicNode::LL) {
+        compare_long_ext(Z_R0, even_reg, 0);
+      } else {
+        compare_long_uni(Z_R0, even_reg, 0);
+      }
+      z_bre(Ldone);
+      z_lgr(Z_R1, Z_R0);
+      if (ae == StrIntrinsicNode::LL) {
+        z_llc(Z_R0, Address(even_reg));
+        z_llc(result, Address(Z_R1));
+      } else {
+        z_llh(Z_R0, Address(even_reg));
+        z_llh(result, Address(Z_R1));
+      }
+      z_sr(result, Z_R0);
+    }
+
+    // Otherwise, return the difference between the first mismatched chars.
+    bind(Ldone);
+  }
+
+  if (ae == StrIntrinsicNode::UL) {
+    z_lcr(result, result); // Negate result (see note above).
+  }
+
+  BLOCK_COMMENT("} string_compare");
+
+  return offset() - block_start;
+}
+
+unsigned int MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
+                                          Register odd_reg, Register even_reg, Register result, bool is_byte) {
+  int block_start = offset();
+
+  BLOCK_COMMENT("array_equals {");
+
+  assert_different_registers(ary1, limit, odd_reg, even_reg);
+  assert_different_registers(ary2, limit, odd_reg, even_reg);
+
+  Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;
+  int base_offset = 0;
+
+  if (ary1 != ary2) {
+    if (is_array_equ) {
+      base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+
+      // Return true if the same array.
+      compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
+
+      // Return false if one of them is NULL.
+      compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
+      compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
+
+      // Load the lengths of arrays.
+      z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
+
+      // Return false if the two arrays are not equal length.
+      z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
+      z_brne(Ldone_false);
+
+      // string len in bytes (right operand)
+      if (!is_byte) {
+        z_chi(odd_reg, 128);
+        z_sll(odd_reg, 1); // preserves flags
+        z_brh(Lclcle);
+      } else {
+        compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
+      }
+    } else {
+      z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
+      compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
+    }
+
+
+    // Use clc instruction for up to 256 bytes.
+    {
+      Register str1_reg = ary1,
+          str2_reg = ary2;
+      if (is_array_equ) {
+        str1_reg = Z_R1;
+        str2_reg = even_reg;
+        add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
+        add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
+      }
+      z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
+      z_brl(Ldone_true);
+      // Note: We could jump to the template if equal.
+
+      assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
+      z_exrl(odd_reg, CLC_template);
+      z_bre(Ldone_true);
+      // fall through
+
+      bind(Ldone_false);
+      clear_reg(result);
+      z_bru(Ldone);
+
+      bind(CLC_template);
+      z_clc(0, 0, str1_reg, 0, str2_reg);
+    }
+
+    // Use clcle instruction.
+    {
+      bind(Lclcle);
+      add2reg(even_reg, base_offset, ary2); // string addr (right operand)
+      add2reg(Z_R0, base_offset, ary1);     // string addr (left operand)
+
+      z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)
+      if (is_byte) {
+        compare_long_ext(Z_R0, even_reg, 0);
+      } else {
+        compare_long_uni(Z_R0, even_reg, 0);
+      }
+      z_lghi(result, 0); // Preserve flags.
+      z_brne(Ldone);
+    }
+  }
+  // fall through
+
+  bind(Ldone_true);
+  z_lghi(result, 1); // All characters are equal.
+  bind(Ldone);
+
+  BLOCK_COMMENT("} array_equals");
+
+  return offset() - block_start;
+}
+
+// kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result
+unsigned int MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
+                                            Register needle, Register needlecnt, int needlecntval,
+                                            Register odd_reg, Register even_reg, int ae) {
+  int block_start = offset();
+
+  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+  const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
+  const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
+  Label L_needle1, L_Found, L_NotFound;
+
+  BLOCK_COMMENT("string_indexof {");
+
+  if (needle == haystack) {
+    z_lhi(result, 0);
+  } else {
+
+  // Load first character of needle (R0 used by search_string instructions).
+  if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
+
+  // Compute last haystack addr to use if no match gets found.
+  if (needlecnt != noreg) { // variable needlecnt
+    z_ahi(needlecnt, -1); // Remaining characters after first one.
+    z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.
+    if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
+  } else { // constant needlecnt
+    assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");
+    // Compute index succeeding last element to compare.
+    if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
+  }
+
+  z_llgfr(haycnt, haycnt); // Clear high half.
+  z_lgr(result, haystack); // Final result will be computed from needle start pointer.
+  if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
+  z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
+
+  if (h_csize != n_csize) {
+    assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
+
+    if (needlecnt != noreg || needlecntval != 1) {
+      if (needlecnt != noreg) {
+        compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
+      }
+
+      // Main Loop: UL version (now we have at least 2 characters).
+      Label L_OuterLoop, L_InnerLoop, L_Skip;
+      bind(L_OuterLoop); // Search for 1st 2 characters.
+      z_lgr(Z_R1, haycnt);
+      MacroAssembler::search_string_uni(Z_R1, result);
+      z_brc(Assembler::bcondNotFound, L_NotFound);
+      z_lgr(result, Z_R1);
+
+      z_lghi(Z_R1, n_csize);
+      z_lghi(even_reg, h_csize);
+      bind(L_InnerLoop);
+      z_llgc(odd_reg, Address(needle, Z_R1));
+      z_ch(odd_reg, Address(result, even_reg));
+      z_brne(L_Skip);
+      if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }
+      z_brnl(L_Found);
+      z_aghi(Z_R1, n_csize);
+      z_aghi(even_reg, h_csize);
+      z_bru(L_InnerLoop);
+
+      bind(L_Skip);
+      z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+      z_bru(L_OuterLoop);
+    }
+
+  } else {
+    const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);
+    Label L_clcle;
+
+    if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {
+      if (needlecnt != noreg) {
+        compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);
+        z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)
+        z_brl(L_needle1);
+      }
+
+      // Main Loop: clc version (now we have at least 2 characters).
+      Label L_OuterLoop, CLC_template;
+      bind(L_OuterLoop); // Search for 1st 2 characters.
+      z_lgr(Z_R1, haycnt);
+      if (h_csize == 1) {
+        MacroAssembler::search_string(Z_R1, result);
+      } else {
+        MacroAssembler::search_string_uni(Z_R1, result);
+      }
+      z_brc(Assembler::bcondNotFound, L_NotFound);
+      z_lgr(result, Z_R1);
+
+      if (needlecnt != noreg) {
+        assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
+        z_exrl(needlecnt, CLC_template);
+      } else {
+        z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
+      }
+      z_bre(L_Found);
+      z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+      z_bru(L_OuterLoop);
+
+      if (needlecnt != noreg) {
+        bind(CLC_template);
+        z_clc(h_csize, 0, Z_R1, n_csize, needle);
+      }
+    }
+
+    if (needlecnt != noreg || needle_bytes > 256) {
+      bind(L_clcle);
+
+      // Main Loop: clcle version (now we have at least 256 bytes).
+      Label L_OuterLoop, CLC_template;
+      bind(L_OuterLoop); // Search for 1st 2 characters.
+      z_lgr(Z_R1, haycnt);
+      if (h_csize == 1) {
+        MacroAssembler::search_string(Z_R1, result);
+      } else {
+        MacroAssembler::search_string_uni(Z_R1, result);
+      }
+      z_brc(Assembler::bcondNotFound, L_NotFound);
+
+      add2reg(Z_R0, n_csize, needle);
+      add2reg(even_reg, h_csize, Z_R1);
+      z_lgr(result, Z_R1);
+      if (needlecnt != noreg) {
+        z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)
+        z_llgfr(odd_reg, needlecnt);
+      } else {
+        load_const_optimized(Z_R1, needle_bytes);
+        if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }
+      }
+      if (h_csize == 1) {
+        compare_long_ext(Z_R0, even_reg, 0);
+      } else {
+        compare_long_uni(Z_R0, even_reg, 0);
+      }
+      z_bre(L_Found);
+
+      if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
+      z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+      z_bru(L_OuterLoop);
+    }
+  }
+
+  if (needlecnt != noreg || needlecntval == 1) {
+    bind(L_needle1);
+
+    // Single needle character version.
+    if (h_csize == 1) {
+      MacroAssembler::search_string(haycnt, result);
+    } else {
+      MacroAssembler::search_string_uni(haycnt, result);
+    }
+    z_lgr(result, haycnt);
+    z_brc(Assembler::bcondFound, L_Found);
+  }
+
+  bind(L_NotFound);
+  add2reg(result, -1, haystack); // Return -1.
+
+  bind(L_Found); // Return index (or -1 in fallthrough case).
+  z_sgr(result, haystack);
+  if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }
+  }
+  BLOCK_COMMENT("} string_indexof");
+
+  return offset() - block_start;
+}
+
+// early clobber: result
+unsigned int MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
+                                                 Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {
+  int block_start = offset();
+
+  BLOCK_COMMENT("string_indexof_char {");
+
+  if (needle == haystack) {
+    z_lhi(result, 0);
+  } else {
+
+  Label Ldone;
+
+  z_llgfr(odd_reg, haycnt);  // Preset loop ctr/searchrange end.
+  if (needle == noreg) {
+    load_const_optimized(Z_R0, (unsigned long)needleChar);
+  } else {
+    if (is_byte) {
+      z_llgcr(Z_R0, needle); // First (and only) needle char.
+    } else {
+      z_llghr(Z_R0, needle); // First (and only) needle char.
+    }
+  }
+
+  if (!is_byte) {
+    z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.
+  }
+
+  z_lgr(even_reg, haystack); // haystack addr
+  z_agr(odd_reg, haystack);  // First char after range end.
+  z_lghi(result, -1);
+
+  if (is_byte) {
+    MacroAssembler::search_string(odd_reg, even_reg);
+  } else {
+    MacroAssembler::search_string_uni(odd_reg, even_reg);
+  }
+  z_brc(Assembler::bcondNotFound, Ldone);
+  if (is_byte) {
+    if (VM_Version::has_DistinctOpnds()) {
+      z_sgrk(result, odd_reg, haystack);
+    } else {
+      z_sgr(odd_reg, haystack);
+      z_lgr(result, odd_reg);
+    }
+  } else {
+    z_slgr(odd_reg, haystack);
+    z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));
+  }
+
+  bind(Ldone);
+  }
+  BLOCK_COMMENT("} string_indexof_char");
+
+  return offset() - block_start;
+}
+
+
+//-------------------------------------------------
+//   Constants (scalar and oop) in constant pool
+//-------------------------------------------------
+
+// Add a non-relocated constant to the CP.
+int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
+  long    value  = val.value();
+  address tocPos = long_constant(value);
+
+  if (tocPos != NULL) {
+    int tocOffset = (int)(tocPos - code()->consts()->start());
+    return tocOffset;
+  }
+  // Address_constant returned NULL, so no constant entry has been created.
+  // In that case, we return a "fatal" offset, just in case that subsequently
+  // generated access code is executed.
+  return -1;
+}
+
+// Returns the TOC offset where the address is stored.
+// Add a relocated constant to the CP.
+int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
+  // Use RelocationHolder::none for the constant pool entry.
+  // Otherwise we will end up with a failing NativeCall::verify(x),
+  // where x is the address of the constant pool entry.
+  address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
+
+  if (tocPos != NULL) {
+    int              tocOffset = (int)(tocPos - code()->consts()->start());
+    RelocationHolder rsp = oop.rspec();
+    Relocation      *rel = rsp.reloc();
+
+    // Store toc_offset in relocation, used by call_far_patchable.
+    if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
+      ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
+    }
+    // Relocate at the load's pc.
+    relocate(rsp);
+
+    return tocOffset;
+  }
+  // Address_constant returned NULL, so no constant entry has been created
+  // in that case, we return a "fatal" offset, just in case that subsequently
+  // generated access code is executed.
+  return -1;
+}
+
+bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
+  int     tocOffset = store_const_in_toc(a);
+  if (tocOffset == -1) return false;
+  address tocPos    = tocOffset + code()->consts()->start();
+  assert((address)code()->consts()->start() != NULL, "Please add CP address");
+
+  load_long_pcrelative(dst, tocPos);
+  return true;
+}
+
+bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
+  int     tocOffset = store_oop_in_toc(a);
+  if (tocOffset == -1) return false;
+  address tocPos    = tocOffset + code()->consts()->start();
+  assert((address)code()->consts()->start() != NULL, "Please add CP address");
+
+  load_addr_pcrelative(dst, tocPos);
+  return true;
+}
+
+// If the instruction sequence at the given pc is a load_const_from_toc
+// sequence, return the value currently stored at the referenced position
+// in the TOC.
+intptr_t MacroAssembler::get_const_from_toc(address pc) {
+
+  assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
+
+  long    offset  = get_load_const_from_toc_offset(pc);
+  address dataLoc = NULL;
+  if (is_load_const_from_toc_pcrelative(pc)) {
+    dataLoc = pc + offset;
+  } else {
+    CodeBlob* cb = CodeCache::find_blob_unsafe(pc);   // Else we get assertion if nmethod is zombie.
+    assert(cb && cb->is_nmethod(), "sanity");
+    nmethod* nm = (nmethod*)cb;
+    dataLoc = nm->ctable_begin() + offset;
+  }
+  return *(intptr_t *)dataLoc;
+}
+
+// If the instruction sequence at the given pc is a load_const_from_toc
+// sequence, copy the passed-in new_data value into the referenced
+// position in the TOC.
+void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
+  assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
+
+  long    offset = MacroAssembler::get_load_const_from_toc_offset(pc);
+  address dataLoc = NULL;
+  if (is_load_const_from_toc_pcrelative(pc)) {
+    dataLoc = pc+offset;
+  } else {
+    nmethod* nm = CodeCache::find_nmethod(pc);
+    assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
+    dataLoc = nm->ctable_begin() + offset;
+  }
+  if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
+    *(unsigned long *)dataLoc = new_data;
+  }
+}
+
+// Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
+// site. Verify by calling is_load_const_from_toc() before!!
+// Offset is +/- 2**32 -> use long.
+long MacroAssembler::get_load_const_from_toc_offset(address a) {
+  assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
+  //  expected code sequence:
+  //    z_lgrl(t, simm32);    len = 6
+  unsigned long inst;
+  unsigned int  len = get_instruction(a, &inst);
+  return get_pcrel_offset(inst);
+}
+
+//**********************************************************************************
+//  inspection of generated instruction sequences for a particular pattern
+//**********************************************************************************
+
+bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
+#ifdef ASSERT
+  unsigned long inst;
+  unsigned int  len = get_instruction(a+2, &inst);
+  if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
+    const int range = 128;
+    Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
+    VM_Version::z_SIGSEGV();
+  }
+#endif
+  // expected code sequence:
+  //   z_lgrl(t, relAddr32);    len = 6
+  //TODO: verify accessed data is in CP, if possible.
+  return is_load_pcrelative_long(a);  // TODO: might be too general. Currently, only lgrl is used.
+}
+
+bool MacroAssembler::is_load_const_from_toc_call(address a) {
+  return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
+}
+
+bool MacroAssembler::is_load_const_call(address a) {
+  return is_load_const(a) && is_call_byregister(a + load_const_size());
+}
+
+//-------------------------------------------------
+//   Emitters for some really CICS instructions
+//-------------------------------------------------
+
+void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
+  assert(dst->encoding()%2==0, "must be an even/odd register pair");
+  assert(src->encoding()%2==0, "must be an even/odd register pair");
+  assert(pad<256, "must be a padding BYTE");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_mvcle(dst, src, pad);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
+  assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
+  assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
+  assert(pad<256, "must be a padding BYTE");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_clcle(left, right, pad, Z_R0);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
+  assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
+  assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
+  assert(pad<=0xfff, "must be a padding HALFWORD");
+  assert(VM_Version::has_ETF2(), "instruction must be available");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_clclu(left, right, pad, Z_R0);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::search_string(Register end, Register start) {
+  assert(end->encoding() != 0, "end address must not be in R0");
+  assert(start->encoding() != 0, "start address must not be in R0");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_srst(end, start);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::search_string_uni(Register end, Register start) {
+  assert(end->encoding() != 0, "end address must not be in R0");
+  assert(start->encoding() != 0, "start address must not be in R0");
+  assert(VM_Version::has_ETF3(), "instruction must be available");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_srstu(end, start);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kmac(Register srcBuff) {
+  assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
+  assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_kmac(Z_R0, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kimd(Register srcBuff) {
+  assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
+  assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_kimd(Z_R0, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::klmd(Register srcBuff) {
+  assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
+  assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_klmd(Z_R0, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::km(Register dstBuff, Register srcBuff) {
+  // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
+  // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
+  assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
+  assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
+  assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_km(dstBuff, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
+  // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
+  // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
+  assert(srcBuff->encoding()     != 0, "src buffer address can't be in Z_R0");
+  assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
+  assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_kmc(dstBuff, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
+  assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_cksm(crcBuff, srcBuff);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
+  assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+  assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_troo(r1, r2, m3);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
+  assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+  assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_trot(r1, r2, m3);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
+  assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+  assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_trto(r1, r2, m3);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
+  assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+  assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+  Label retry;
+  bind(retry);
+  Assembler::z_trtt(r1, r2, m3);
+  Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::generate_safepoint_check(Label& slow_path, Register scratch, bool may_relocate) {
+  if (scratch == noreg) scratch = Z_R1;
+  address Astate = SafepointSynchronize::address_of_state();
+  BLOCK_COMMENT("safepoint check:");
+
+  if (may_relocate) {
+    ptrdiff_t total_distance = Astate - this->pc();
+    if (RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+      RelocationHolder rspec = external_word_Relocation::spec(Astate);
+      (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+      load_absolute_address(scratch, Astate);
+    } else {
+      load_const_optimized(scratch, Astate);
+    }
+  } else {
+    load_absolute_address(scratch, Astate);
+  }
+  z_cli(/*SafepointSynchronize::sz_state()*/4-1, scratch, SafepointSynchronize::_not_synchronized);
+  z_brne(slow_path);
+}
+
+
+void MacroAssembler::generate_type_profiling(const Register Rdata,
+                                             const Register Rreceiver_klass,
+                                             const Register Rwanted_receiver_klass,
+                                             const Register Rmatching_row,
+                                             bool is_virtual_call) {
+  const int row_size = in_bytes(ReceiverTypeData::receiver_offset(1)) -
+                       in_bytes(ReceiverTypeData::receiver_offset(0));
+  const int num_rows = ReceiverTypeData::row_limit();
+  NearLabel found_free_row;
+  NearLabel do_increment;
+  NearLabel found_no_slot;
+
+  BLOCK_COMMENT("type profiling {");
+
+  // search for:
+  //    a) The type given in Rwanted_receiver_klass.
+  //    b) The *first* empty row.
+
+  // First search for a) only, just running over b) with no regard.
+  // This is possible because
+  //    wanted_receiver_class == receiver_class  &&  wanted_receiver_class == 0
+  // is never true (receiver_class can't be zero).
+  for (int row_num = 0; row_num < num_rows; row_num++) {
+    // Row_offset should be a well-behaved positive number. The generated code relies
+    // on that wrt constant code size. Add2reg can handle all row_offset values, but
+    // will have to vary generated code size.
+    int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
+    assert(Displacement::is_shortDisp(row_offset), "Limitation of generated code");
+
+    // Is Rwanted_receiver_klass in this row?
+    if (VM_Version::has_CompareBranch()) {
+      z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
+      // Rmatching_row = Rdata + row_offset;
+      add2reg(Rmatching_row, row_offset, Rdata);
+      // if (*row_recv == (intptr_t) receiver_klass) goto fill_existing_slot;
+      compare64_and_branch(Rwanted_receiver_klass, Rreceiver_klass, Assembler::bcondEqual, do_increment);
+    } else {
+      add2reg(Rmatching_row, row_offset, Rdata);
+      z_cg(Rreceiver_klass, row_offset, Z_R0, Rdata);
+      z_bre(do_increment);
+    }
+  }
+
+  // Now that we did not find a match, let's search for b).
+
+  // We could save the first calculation of Rmatching_row if we woud search for a) in reverse order.
+  // We would then end up here with Rmatching_row containing the value for row_num == 0.
+  // We would not see much benefit, if any at all, because the CPU can schedule
+  // two instructions together with a branch anyway.
+  for (int row_num = 0; row_num < num_rows; row_num++) {
+    int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
+
+    // Has this row a zero receiver_klass, i.e. is it empty?
+    if (VM_Version::has_CompareBranch()) {
+      z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
+      // Rmatching_row = Rdata + row_offset
+      add2reg(Rmatching_row, row_offset, Rdata);
+      // if (*row_recv == (intptr_t) 0) goto found_free_row
+      compare64_and_branch(Rwanted_receiver_klass, (intptr_t)0, Assembler::bcondEqual, found_free_row);
+    } else {
+      add2reg(Rmatching_row, row_offset, Rdata);
+      load_and_test_long(Rwanted_receiver_klass, Address(Rdata, row_offset));
+      z_bre(found_free_row);  // zero -> Found a free row.
+    }
+  }
+
+  // No match, no empty row found.
+  // Increment total counter to indicate polymorphic case.
+  if (is_virtual_call) {
+    add2mem_64(Address(Rdata, CounterData::count_offset()), 1, Rmatching_row);
+  }
+  z_bru(found_no_slot);
+
+  // Here we found an empty row, but we have not found Rwanted_receiver_klass.
+  // Rmatching_row holds the address to the first empty row.
+  bind(found_free_row);
+  // Store receiver_klass into empty slot.
+  z_stg(Rreceiver_klass, 0, Z_R0, Rmatching_row);
+
+  // Increment the counter of Rmatching_row.
+  bind(do_increment);
+  ByteSize counter_offset = ReceiverTypeData::receiver_count_offset(0) - ReceiverTypeData::receiver_offset(0);
+  add2mem_64(Address(Rmatching_row, counter_offset), 1, Rdata);
+
+  bind(found_no_slot);
+
+  BLOCK_COMMENT("} type profiling");
+}
+
+//---------------------------------------
+// Helpers for Intrinsic Emitters
+//---------------------------------------
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
+  assert_different_registers(crc, table, tmp);
+  assert_different_registers(val, table);
+  if (crc == val) {      // Must rotate first to use the unmodified value.
+    rotate_then_insert(tmp, val, 56-2, 63-2, 2, true);  // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+    z_srl(crc, 8);       // Unsigned shift, clear leftmost 8 bits.
+  } else {
+    z_srl(crc, 8);       // Unsigned shift, clear leftmost 8 bits.
+    rotate_then_insert(tmp, val, 56-2, 63-2, 2, true);  // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+  }
+  z_x(crc, Address(table, tmp, 0));
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+  fold_byte_crc32(crc, crc, table, tmp);
+}
+
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table.
+ *
+ * @param [in,out]crc Register containing the crc.
+ * @param [in]val     Register containing the byte to fold into the CRC.
+ * @param [in]table   Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+  z_xr(val, crc);
+  fold_byte_crc32(crc, val, table, val);
+}
+
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ */
+void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+                                           Register data, bool invertCRC) {
+  assert_different_registers(crc, buf, len, table, data);
+
+  Label L_mainLoop, L_done;
+  const int mainLoop_stepping = 1;
+
+  // Process all bytes in a single-byte loop.
+  z_ltr(len, len);
+  z_brnh(L_done);
+
+  if (invertCRC) {
+    not_(crc, noreg, false); // ~c
+  }
+
+  bind(L_mainLoop);
+    z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
+    add2reg(buf, mainLoop_stepping);        // Advance buffer position.
+    update_byte_crc32(crc, data, table);
+    z_brct(len, L_mainLoop);                // Iterate.
+
+  if (invertCRC) {
+    not_(crc, noreg, false); // ~c
+  }
+
+  bind(L_done);
+}
+
+/**
+ * Emits code to update CRC-32 with a 4-byte value according to constants in table.
+ * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
+ *
+ */
+void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+                                        Register t0,  Register t1,  Register t2,    Register t3) {
+  // This is what we implement (the DOBIG4 part):
+  //
+  // #define DOBIG4 c ^= *++buf4; \
+  //         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+  //             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+  // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+  const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
+  const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
+  const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
+  const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
+
+  // XOR crc with next four bytes of buffer.
+  lgr_if_needed(t0, crc);
+  z_x(t0, Address(buf, bufDisp));
+  if (bufInc != 0) {
+    add2reg(buf, bufInc);
+  }
+
+  // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
+  rotate_then_insert(t3, t0, 56-2, 63-2, 2,    true);  // ((c >>  0) & 0xff) << 2
+  rotate_then_insert(t2, t0, 56-2, 63-2, 2-8,  true);  // ((c >>  8) & 0xff) << 2
+  rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true);  // ((c >> 16) & 0xff) << 2
+  rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true);  // ((c >> 24) & 0xff) << 2
+
+  // Load pre-calculated table values.
+  // Use columns 4..7 for big-endian.
+  z_ly(t3, Address(table, t3, (intptr_t)ix0));
+  z_ly(t2, Address(table, t2, (intptr_t)ix1));
+  z_ly(t1, Address(table, t1, (intptr_t)ix2));
+  z_ly(t0, Address(table, t0, (intptr_t)ix3));
+
+  // Calculate new crc from table values.
+  z_xr(t2, t3);
+  z_xr(t0, t1);
+  z_xr(t0, t2);  // Now crc contains the final checksum value.
+  lgr_if_needed(crc, t0);
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3) {
+  assert_different_registers(crc, buf, len, table);
+
+  Label L_mainLoop, L_tail;
+  Register  data = t0;
+  Register  ctr  = Z_R0;
+  const int mainLoop_stepping = 8;
+  const int tailLoop_stepping = 1;
+  const int log_stepping      = exact_log2(mainLoop_stepping);
+
+  // Don't test for len <= 0 here. This pathological case should not occur anyway.
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+  // The situation itself is detected and handled correctly by the conditional branches
+  // following aghi(len, -stepping) and aghi(len, +stepping).
+
+  not_(crc, noreg, false);             // 1s complement of crc
+
+#if 0
+  {
+    // Pre-mainLoop alignment did not show any positive effect on performance.
+    // We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
+
+    z_cghi(len, mainLoop_stepping);    // Alignment is useless for short data streams.
+    z_brnh(L_tail);
+
+    // Align buf to word (4-byte) boundary.
+    z_lcr(ctr, buf);
+    rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
+    z_sgfr(len, ctr);                  // Remaining len after alignment.
+
+    update_byteLoop_crc32(crc, buf, ctr, table, data, false);
+  }
+#endif
+
+  // Check for short (<mainLoop_stepping bytes) buffer.
+  z_srag(ctr, len, log_stepping);
+  z_brnh(L_tail);
+
+  z_lrvr(crc, crc);             // Revert byte order because we are dealing with big-endian data.
+  rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
+
+  BIND(L_mainLoop);
+    update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
+    update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
+    z_brct(ctr, L_mainLoop);    // Iterate.
+
+  z_lrvr(crc, crc);        // Revert byte order back to original.
+
+  // Process last few (<8) bytes of buffer.
+  BIND(L_tail);
+  update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+  not_(crc, noreg, false); // 1s complement of crc
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3) {
+  assert_different_registers(crc, buf, len, table);
+
+  Label L_mainLoop, L_tail;
+  Register  data = t0;
+  Register  ctr  = Z_R0;
+  const int mainLoop_stepping = 4;
+  const int log_stepping      = exact_log2(mainLoop_stepping);
+
+  // Don't test for len <= 0 here. This pathological case should not occur anyway.
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+  // The situation itself is detected and handled correctly by the conditional branches
+  // following aghi(len, -stepping) and aghi(len, +stepping).
+
+  not_(crc, noreg, false); // 1s complement of crc
+
+  // Check for short (<4 bytes) buffer.
+  z_srag(ctr, len, log_stepping);
+  z_brnh(L_tail);
+
+  z_lrvr(crc, crc);          // Revert byte order because we are dealing with big-endian data.
+  rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
+
+  BIND(L_mainLoop);
+    update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
+    z_brct(ctr, L_mainLoop); // Iterate.
+  z_lrvr(crc, crc);          // Revert byte order back to original.
+
+  // Process last few (<8) bytes of buffer.
+  BIND(L_tail);
+  update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+  not_(crc, noreg, false); // 1s complement of crc
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register pointing to CRC table
+ */
+void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+                                        Register t0,  Register t1,  Register t2,  Register t3) {
+  assert_different_registers(crc, buf, len, table);
+  Register data = t0;
+
+  update_byteLoop_crc32(crc, buf, len, table, data, true);
+}
+
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+  assert_different_registers(crc, buf, len, table, tmp);
+
+  not_(crc, noreg, false); // ~c
+
+  z_llgc(tmp, Address(buf, (intptr_t)0));  // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
+  update_byte_crc32(crc, tmp, table);
+
+  not_(crc, noreg, false); // ~c
+}
+
+//
+// Code for BigInteger::multiplyToLen() intrinsic.
+//
+
+// dest_lo += src1 + src2
+// dest_hi += carry1 + carry2
+// Z_R7 is destroyed !
+void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
+                                     Register src1, Register src2) {
+  clear_reg(Z_R7);
+  z_algr(dest_lo, src1);
+  z_alcgr(dest_hi, Z_R7);
+  z_algr(dest_lo, src2);
+  z_alcgr(dest_hi, Z_R7);
+}
+
+// Multiply 64 bit by 64 bit first loop.
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
+                                           Register x_xstart,
+                                           Register y, Register y_idx,
+                                           Register z,
+                                           Register carry,
+                                           Register product,
+                                           Register idx, Register kdx) {
+  // jlong carry, x[], y[], z[];
+  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+  //   huge_128 product = y[idx] * x[xstart] + carry;
+  //   z[kdx] = (jlong)product;
+  //   carry  = (jlong)(product >>> 64);
+  // }
+  // z[xstart] = carry;
+
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
+
+  z_aghi(xstart, -1);
+  z_brl(L_one_x);   // Special case: length of x is 1.
+
+  // Load next two integers of x.
+  z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+  mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
+
+
+  bind(L_first_loop);
+
+  z_aghi(idx, -1);
+  z_brl(L_first_loop_exit);
+  z_aghi(idx, -1);
+  z_brl(L_one_y);
+
+  // Load next two integers of y.
+  z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
+  mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
+
+
+  bind(L_multiply);
+
+  Register multiplicand = product->successor();
+  Register product_low = multiplicand;
+
+  lgr_if_needed(multiplicand, x_xstart);
+  z_mlgr(product, y_idx);     // multiplicand * y_idx -> product::multiplicand
+  clear_reg(Z_R7);
+  z_algr(product_low, carry); // Add carry to result.
+  z_alcgr(product, Z_R7);     // Add carry of the last addition.
+  add2reg(kdx, -2);
+
+  // Store result.
+  z_sllg(Z_R7, kdx, LogBytesPerInt);
+  reg2mem_opt(product_low, Address(z, Z_R7, 0));
+  lgr_if_needed(carry, product);
+  z_bru(L_first_loop);
+
+
+  bind(L_one_y); // Load one 32 bit portion of y as (0,value).
+
+  clear_reg(y_idx);
+  mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
+  z_bru(L_multiply);
+
+
+  bind(L_one_x); // Load one 32 bit portion of x as (0,value).
+
+  clear_reg(x_xstart);
+  mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
+  z_bru(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+// Multiply 64 bit by 64 bit and add 128 bit.
+void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
+                                            Register z,
+                                            Register yz_idx, Register idx,
+                                            Register carry, Register product,
+                                            int offset) {
+  // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
+  // z[kdx] = (jlong)product;
+
+  Register multiplicand = product->successor();
+  Register product_low = multiplicand;
+
+  z_sllg(Z_R7, idx, LogBytesPerInt);
+  mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
+
+  lgr_if_needed(multiplicand, x_xstart);
+  z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
+  mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
+
+  add2_with_carry(product, product_low, carry, yz_idx);
+
+  z_sllg(Z_R7, idx, LogBytesPerInt);
+  reg2mem_opt(product_low, Address(z, Z_R7, offset));
+
+}
+
+// Multiply 128 bit by 128 bit. Unrolled inner loop.
+void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
+                                             Register y, Register z,
+                                             Register yz_idx, Register idx,
+                                             Register jdx,
+                                             Register carry, Register product,
+                                             Register carry2) {
+  // jlong carry, x[], y[], z[];
+  // int kdx = ystart+1;
+  // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //   huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
+  //   z[kdx+idx+1] = (jlong)product;
+  //   jlong carry2 = (jlong)(product >>> 64);
+  //   product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
+  //   z[kdx+idx] = (jlong)product;
+  //   carry = (jlong)(product >>> 64);
+  // }
+  // idx += 2;
+  // if (idx > 0) {
+  //   product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
+  //   z[kdx+idx] = (jlong)product;
+  //   carry = (jlong)(product >>> 64);
+  // }
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  // scale the index
+  lgr_if_needed(jdx, idx);
+  and_imm(jdx, 0xfffffffffffffffcL);
+  rshift(jdx, 2);
+
+
+  bind(L_third_loop);
+
+  z_aghi(jdx, -1);
+  z_brl(L_third_loop_exit);
+  add2reg(idx, -4);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
+  lgr_if_needed(carry2, product);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
+  lgr_if_needed(carry, product);
+  z_bru(L_third_loop);
+
+
+  bind(L_third_loop_exit);  // Handle any left-over operand parts.
+
+  and_imm(idx, 0x3);
+  z_brz(L_post_third_loop_done);
+
+  Label L_check_1;
+
+  z_aghi(idx, -2);
+  z_brl(L_check_1);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
+  lgr_if_needed(carry, product);
+
+
+  bind(L_check_1);
+
+  add2reg(idx, 0x2);
+  and_imm(idx, 0x1);
+  z_aghi(idx, -1);
+  z_brl(L_post_third_loop_done);
+
+  Register   multiplicand = product->successor();
+  Register   product_low = multiplicand;
+
+  z_sllg(Z_R7, idx, LogBytesPerInt);
+  clear_reg(yz_idx);
+  mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
+  lgr_if_needed(multiplicand, x_xstart);
+  z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
+  clear_reg(yz_idx);
+  mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
+
+  add2_with_carry(product, product_low, yz_idx, carry);
+
+  z_sllg(Z_R7, idx, LogBytesPerInt);
+  reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
+  rshift(product_low, 32);
+
+  lshift(product, 32);
+  z_ogr(product_low, product);
+  lgr_if_needed(carry, product_low);
+
+  bind(L_post_third_loop_done);
+}
+
+void MacroAssembler::multiply_to_len(Register x, Register xlen,
+                                     Register y, Register ylen,
+                                     Register z,
+                                     Register tmp1, Register tmp2,
+                                     Register tmp3, Register tmp4,
+                                     Register tmp5) {
+  ShortBranchVerifier sbv(this);
+
+  assert_different_registers(x, xlen, y, ylen, z,
+                             tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
+  assert_different_registers(x, xlen, y, ylen, z,
+                             tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
+
+  z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
+
+  // In openJdk, we store the argument as 32-bit value to slot.
+  Address zlen(Z_SP, _z_abi(remaining_cargs));  // Int in long on big endian.
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product  = Z_R0_scratch;
+  const Register x_xstart = Z_R8;
+
+  // First Loop.
+  //
+  //   final static long LONG_MASK = 0xffffffffL;
+  //   int xstart = xlen - 1;
+  //   int ystart = ylen - 1;
+  //   long carry = 0;
+  //   for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //     long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
+  //     z[kdx] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[xstart] = (int)carry;
+  //
+
+  lgr_if_needed(idx, ylen);  // idx = ylen
+  z_llgf(kdx, zlen);         // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
+  clear_reg(carry);          // carry = 0
+
+  Label L_done;
+
+  lgr_if_needed(xstart, xlen);
+  z_aghi(xstart, -1);
+  z_brl(L_done);
+
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  NearLabel L_second_loop;
+  compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
+
+  NearLabel L_carry;
+  z_aghi(kdx, -1);
+  z_brz(L_carry);
+
+  // Store lower 32 bits of carry.
+  z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
+  reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+  rshift(carry, 32);
+  z_aghi(kdx, -1);
+
+
+  bind(L_carry);
+
+  // Store upper 32 bits of carry.
+  z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
+  reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
+
+  const Register jdx = tmp1;
+
+  bind(L_second_loop);
+
+  clear_reg(carry);           // carry = 0;
+  lgr_if_needed(jdx, ylen);   // j = ystart+1
+
+  z_aghi(xstart, -1);         // i = xstart-1;
+  z_brl(L_done);
+
+  // Use free slots in the current stackframe instead of push/pop.
+  Address zsave(Z_SP, _z_abi(carg_1));
+  reg2mem_opt(z, zsave);
+
+
+  Label L_last_x;
+
+  z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+  load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
+  z_aghi(xstart, -1);                           // i = xstart-1;
+  z_brl(L_last_x);
+
+  z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+  mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
+
+
+  Label L_third_loop_prologue;
+
+  bind(L_third_loop_prologue);
+
+  Address xsave(Z_SP, _z_abi(carg_2));
+  Address xlensave(Z_SP, _z_abi(carg_3));
+  Address ylensave(Z_SP, _z_abi(carg_4));
+
+  reg2mem_opt(x, xsave);
+  reg2mem_opt(xstart, xlensave);
+  reg2mem_opt(ylen, ylensave);
+
+
+  multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
+
+  mem2reg_opt(z, zsave);
+  mem2reg_opt(x, xsave);
+  mem2reg_opt(xlen, xlensave);   // This is the decrement of the loop counter!
+  mem2reg_opt(ylen, ylensave);
+
+  add2reg(tmp3, 1, xlen);
+  z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
+  reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+  z_aghi(tmp3, -1);
+  z_brl(L_done);
+
+  rshift(carry, 32);
+  z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
+  reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+  z_bru(L_second_loop);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+
+  clear_reg(x_xstart);
+  mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
+  z_bru(L_third_loop_prologue);
+
+  bind(L_done);
+
+  z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
+}
+
+#ifndef PRODUCT
+// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
+void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
+  Label ok;
+  if (check_equal) {
+    z_bre(ok);
+  } else {
+    z_brne(ok);
+  }
+  stop(msg, id);
+  bind(ok);
+}
+
+// Assert if CC indicates "low".
+void MacroAssembler::asm_assert_low(const char *msg, int id) {
+  Label ok;
+  z_brnl(ok);
+  stop(msg, id);
+  bind(ok);
+}
+
+// Assert if CC indicates "high".
+void MacroAssembler::asm_assert_high(const char *msg, int id) {
+  Label ok;
+  z_brnh(ok);
+  stop(msg, id);
+  bind(ok);
+}
+
+// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false)
+// generate non-relocatable code.
+void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) {
+  Label ok;
+  if (check_equal) { z_bre(ok); }
+  else             { z_brne(ok); }
+  stop_static(msg, id);
+  bind(ok);
+}
+
+void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
+                                          Register mem_base, const char* msg, int id) {
+  switch (size) {
+    case 4:
+      load_and_test_int(Z_R0, Address(mem_base, mem_offset));
+      break;
+    case 8:
+      load_and_test_long(Z_R0,  Address(mem_base, mem_offset));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  if (allow_relocation) { asm_assert(check_equal, msg, id); }
+  else                  { asm_assert_static(check_equal, msg, id); }
+}
+
+// Check the condition
+//   expected_size == FP - SP
+// after transformation:
+//   expected_size - FP + SP == 0
+// Destroys Register expected_size if no tmp register is passed.
+void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
+  if (tmp == noreg) {
+    tmp = expected_size;
+  } else {
+    if (tmp != expected_size) {
+      z_lgr(tmp, expected_size);
+    }
+    z_algr(tmp, Z_SP);
+    z_slg(tmp, 0, Z_R0, Z_SP);
+    asm_assert_eq(msg, id);
+  }
+}
+#endif // !PRODUCT
+
+void MacroAssembler::verify_thread() {
+  if (VerifyThread) {
+    unimplemented("", 117);
+  }
+}
+
+// Plausibility check for oops.
+void MacroAssembler::verify_oop(Register oop, const char* msg) {
+  if (!VerifyOops) return;
+
+  BLOCK_COMMENT("verify_oop {");
+  Register tmp = Z_R0;
+  unsigned int nbytes_save = 6 *8;
+  address entry = StubRoutines::verify_oop_subroutine_entry_address();
+  save_return_pc();
+  push_frame_abi160(nbytes_save);
+  z_stmg(Z_R0, Z_R5, 160, Z_SP);
+
+  z_lgr(Z_ARG2, oop);
+  load_const(Z_ARG1, (address) msg);
+  load_const(Z_R1, entry);
+  z_lg(Z_R1, 0, Z_R1);
+  call_c(Z_R1);
+
+  z_lmg(Z_R0, Z_R5, 160, Z_SP);
+  pop_frame();
+
+  restore_return_pc();
+  BLOCK_COMMENT("} verify_oop ");
+}
+
+const char* MacroAssembler::stop_types[] = {
+  "stop",
+  "untested",
+  "unimplemented",
+  "shouldnotreachhere"
+};
+
+static void stop_on_request(const char* tp, const char* msg) {
+  tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
+  guarantee(false, "Z assembly code requires stop: %s", msg);
+}
+
+void MacroAssembler::stop(int type, const char* msg, int id) {
+  BLOCK_COMMENT(err_msg("stop: %s {", msg));
+
+  // Setup arguments.
+  load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
+  load_const(Z_ARG2, (void*) msg);
+  get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
+  save_return_pc();    // Saves return pc Z_R14.
+  push_frame_abi160(0);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+  // The plain disassembler does not recognize illtrap. It instead displays
+  // a 32-bit value. Issueing two illtraps assures the disassembler finds
+  // the proper beginning of the next instruction.
+  z_illtrap(); // Illegal instruction.
+  z_illtrap(); // Illegal instruction.
+
+  BLOCK_COMMENT(" } stop");
+}
+
+// Special version of stop() for code size reduction.
+// Reuses the previously generated call sequence, if any.
+// Generates the call sequence on its own, if necessary.
+// Note: This code will work only in non-relocatable code!
+//       The relative address of the data elements (arg1, arg2) must not change.
+//       The reentry point must not move relative to it's users. This prerequisite
+//       should be given for "hand-written" code, if all chain calls are in the same code blob.
+//       Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
+address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
+  BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg));
+
+  // Setup arguments.
+  if (allow_relocation) {
+    // Relocatable version (for comparison purposes). Remove after some time.
+    load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
+    load_const(Z_ARG2, (void*) msg);
+  } else {
+    load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
+    load_absolute_address(Z_ARG2, (address)msg);
+  }
+  if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
+    BLOCK_COMMENT("branch to reentry point:");
+    z_brc(bcondAlways, reentry);
+  } else {
+    BLOCK_COMMENT("reentry point:");
+    reentry = pc();      // Re-entry point for subsequent stop calls.
+    save_return_pc();    // Saves return pc Z_R14.
+    push_frame_abi160(0);
+    if (allow_relocation) {
+      reentry = NULL;    // Prevent reentry if code relocation is allowed.
+      call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+    } else {
+      call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+    }
+    z_illtrap(); // Illegal instruction as emergency stop, should the above call return.
+  }
+  BLOCK_COMMENT(" } stop_chain");
+
+  return reentry;
+}
+
+// Special version of stop() for code size reduction.
+// Assumes constant relative addresses for data and runtime call.
+void MacroAssembler::stop_static(int type, const char* msg, int id) {
+  stop_chain(NULL, type, msg, id, false);
+}
+
+void MacroAssembler::stop_subroutine() {
+  unimplemented("stop_subroutine", 710);
+}
+
+// Prints msg to stdout from within generated code..
+void MacroAssembler::warn(const char* msg) {
+  RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
+  load_absolute_address(Z_R1, (address) warning);
+  load_absolute_address(Z_ARG1, (address) msg);
+  (void) call(Z_R1);
+  RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
+}
+
+#ifndef PRODUCT
+
+// Write pattern 0x0101010101010101 in region [low-before, high+after].
+void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
+  if (!ZapEmptyStackFields) return;
+  BLOCK_COMMENT("zap memory region {");
+  load_const_optimized(val, 0x0101010101010101);
+  int size = before + after;
+  if (low == high && size < 5 && size > 0) {
+    int offset = -before*BytesPerWord;
+    for (int i = 0; i < size; ++i) {
+      z_stg(val, Address(low, offset));
+      offset +=(1*BytesPerWord);
+    }
+  } else {
+    add2reg(addr, -before*BytesPerWord, low);
+    if (after) {
+#ifdef ASSERT
+      jlong check = after * BytesPerWord;
+      assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
+#endif
+      add2reg(high, after * BytesPerWord);
+    }
+    NearLabel loop;
+    bind(loop);
+    z_stg(val, Address(addr));
+    add2reg(addr, 8);
+    compare64_and_branch(addr, high, bcondNotHigh, loop);
+    if (after) {
+      add2reg(high, -after * BytesPerWord);
+    }
+  }
+  BLOCK_COMMENT("} zap memory region");
+}
+#endif // !PRODUCT
+
+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) {
+  _masm = masm;
+  _masm->load_absolute_address(_rscratch, (address)flag_addr);
+  _masm->load_and_test_int(_rscratch, Address(_rscratch));
+  if (value) {
+    _masm->z_brne(_label); // Skip if true, i.e. != 0.
+  } else {
+    _masm->z_bre(_label);  // Skip if false, i.e. == 0.
+  }
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1073 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_MACROASSEMBLER_S390_HPP
+#define CPU_S390_VM_MACROASSEMBLER_S390_HPP
+
+#include "asm/assembler.hpp"
+
+#define MODERN_IFUN(name)  ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name)
+#define CLASSIC_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name)
+#define MODERN_FFUN(name)  ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name)
+#define CLASSIC_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name)
+
+class MacroAssembler: public Assembler {
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  //
+  // Optimized instruction emitters
+  //
+
+  // Move register if destination register and target register are different.
+  void lr_if_needed(Register rd, Register rs);
+  void lgr_if_needed(Register rd, Register rs);
+  void llgfr_if_needed(Register rd, Register rs);
+  void ldr_if_needed(FloatRegister rd, FloatRegister rs);
+
+  void move_reg_if_needed(Register dest, BasicType dest_type, Register src, BasicType src_type);
+  void move_freg_if_needed(FloatRegister dest, BasicType dest_type, FloatRegister src, BasicType src_type);
+
+  void freg2mem_opt(FloatRegister reg,
+                    int64_t       disp,
+                    Register      index,
+                    Register      base,
+                    void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+                    void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+                    Register      scratch = Z_R0);
+  void freg2mem_opt(FloatRegister reg,
+                    const Address &a, bool is_double = true);
+
+  void mem2freg_opt(FloatRegister reg,
+                    int64_t       disp,
+                    Register      index,
+                    Register      base,
+                    void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+                    void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+                    Register      scratch = Z_R0);
+  void mem2freg_opt(FloatRegister reg,
+                    const Address &a, bool is_double = true);
+
+  void reg2mem_opt(Register reg,
+                   int64_t  disp,
+                   Register index,
+                   Register base,
+                   void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+                   void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
+                   Register scratch = Z_R0);
+  // returns offset of the store instruction
+  int reg2mem_opt(Register reg, const Address &a, bool is_double = true);
+
+  void mem2reg_opt(Register reg,
+                   int64_t  disp,
+                   Register index,
+                   Register base,
+                   void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+                   void (MacroAssembler::*classic)(Register, int64_t, Register, Register));
+  void mem2reg_opt(Register reg, const Address &a, bool is_double = true);
+  void mem2reg_signed_opt(Register reg, const Address &a);
+
+  // AND immediate and set condition code, works for 64 bit immediates/operation as well.
+   void and_imm(Register r, long mask, Register tmp = Z_R0, bool wide = false);
+
+  // 1's complement, 32bit or 64bit. Optimized to exploit distinct operands facility.
+  // Note: The condition code is neither preserved nor correctly set by this code!!!
+  // Note: (wide == false) does not protect the high order half of the target register
+  // from alternation. It only serves as optimization hint for 32-bit results.
+  void not_(Register r1, Register r2 = noreg, bool wide = false);  // r1 = ~r2
+
+  // Expanded support of all "rotate_then_<logicalOP>" instructions.
+  //
+  // Generalize and centralize rotate_then_<logicalOP> emitter.
+  // Functional description. For details, see Principles of Operation, Chapter 7, "Rotate Then Insert..."
+  //  - Bits  in a register are numbered left (most significant) to right (least significant), i.e. [0..63].
+  //  - Bytes in a register are numbered left (most significant) to right (least significant), i.e. [0..7].
+  //  - Register src is rotated to the left by (nRotate&0x3f) positions.
+  //  - Negative values for nRotate result in a rotation to the right by abs(nRotate) positions.
+  //  - The bits in positions [lBitPos..rBitPos] of the _ROTATED_ src operand take part in the
+  //    logical operation performed on the contents (in those positions) of the dst operand.
+  //  - The logical operation that is performed on the dst operand is one of
+  //     o insert the selected bits (replacing the original contents of those bit positions)
+  //     o and the selected bits with the corresponding bits of the dst operand
+  //     o or  the selected bits with the corresponding bits of the dst operand
+  //     o xor the selected bits with the corresponding bits of the dst operand
+  //  - For clear_dst == true, the destination register is cleared before the bits are inserted.
+  //    For clear_dst == false, only the bit positions that get data inserted from src
+  //    are changed. All other bit positions remain unchanged.
+  //  - For test_only == true,  the result of the logicalOP is only used to set the condition code, dst remains unchanged.
+  //    For test_only == false, the result of the logicalOP replaces the selected bits of dst.
+  //  - src32bit and dst32bit indicate the respective register is used as 32bit value only.
+  //    Knowledge can simplify code generation.
+  //
+  // Here is an important performance note, valid for all <logicalOP>s except "insert":
+  //   Due to the too complex nature of the operation, it cannot be done in a single cycle.
+  //   Timing constraints require the instructions to be cracked into two micro-ops, taking
+  //   one or two cycles each to execute. In some cases, an additional pipeline bubble might get added.
+  //   Macroscopically, that makes up for a three- or four-cycle instruction where you would
+  //   expect just a single cycle.
+  //   It is thus not beneficial from a performance point of view to exploit those instructions.
+  //   Other reasons (code compactness, register pressure, ...) might outweigh this penalty.
+  //
+  unsigned long create_mask(int lBitPos, int rBitPos);
+  void rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
+                        int nRotate, bool src32bit, bool dst32bit, bool oneBits);
+  void rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+                          bool clear_dst);
+  void rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+                       bool test_only);
+  void rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+                      bool test_onlyt);
+  void rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+                       bool test_only);
+
+  void add64(Register r1, RegisterOrConstant inc);
+
+  // Helper function to multiply the 64bit contents of a register by a 16bit constant.
+  // The optimization tries to avoid the mghi instruction, since it uses the FPU for
+  // calculation and is thus rather slow.
+  //
+  // There is no handling for special cases, e.g. cval==0 or cval==1.
+  //
+  // Returns len of generated code block.
+  unsigned int mul_reg64_const16(Register rval, Register work, int cval);
+
+  // Generic operation r1 := r2 + imm.
+  void add2reg(Register r1, int64_t imm, Register r2 = noreg);
+  // Generic operation r := b + x + d.
+  void add2reg_with_index(Register r, int64_t d, Register x, Register b = noreg);
+
+  // Add2mem* methods for direct memory increment.
+  void add2mem_32(const Address &a, int64_t imm, Register tmp);
+  void add2mem_64(const Address &a, int64_t imm, Register tmp);
+
+  // *((int8_t*)(dst)) |= imm8
+  inline void or2mem_8(Address& dst, int64_t imm8);
+
+  // Load values by size and signedness.
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
+  void store_sized_value(Register src, Address dst, size_t size_in_bytes);
+
+  // Load values with large offsets to base address.
+ private:
+  int  split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate);
+ public:
+  void load_long_largeoffset(Register t, int64_t si20, Register a, Register tmp);
+  void load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp);
+  void load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp);
+
+ private:
+  long toc_distance();
+ public:
+  void load_toc(Register Rtoc);
+  void load_long_pcrelative(Register Rdst, address dataLocation);
+  static int load_long_pcrelative_size() { return 6; }
+  void load_addr_pcrelative(Register Rdst, address dataLocation);
+  static int load_addr_pcrel_size() { return 6; } // Just a LARL.
+
+  // Load a value from memory and test (set CC).
+  void load_and_test_byte    (Register dst, const Address &a);
+  void load_and_test_short   (Register dst, const Address &a);
+  void load_and_test_int     (Register dst, const Address &a);
+  void load_and_test_int2long(Register dst, const Address &a);
+  void load_and_test_long    (Register dst, const Address &a);
+
+  // Test a bit in memory. Result is reflected in CC.
+  void testbit(const Address &a, unsigned int bit);
+  // Test a bit in a register. Result is reflected in CC.
+  void testbit(Register r, unsigned int bitPos);
+
+  // Clear a register, i.e. load const zero into reg. Return len (in bytes) of
+  // generated instruction(s).
+  //   whole_reg: Clear 64 bits if true, 32 bits otherwise.
+  //   set_cc: Use instruction that sets the condition code, if true.
+  int clear_reg(Register r, bool whole_reg = true, bool set_cc = true);
+
+#ifdef ASSERT
+  int preset_reg(Register r, unsigned long pattern, int pattern_len);
+#endif
+
+  // Clear (store zeros) a small piece of memory.
+  // CAUTION: Do not use this for atomic memory clearing. Use store_const() instead.
+  //   addr: Address descriptor of memory to clear.
+  //         Index register will not be used!
+  //   size: Number of bytes to clear.
+  void clear_mem(const Address& addr, unsigned size);
+
+  // Move immediate values to memory. Currently supports 32 and 64 bit stores,
+  // but may be extended to 16 bit store operation, if needed.
+  // For details, see implementation in *.cpp file.
+         int store_const(const Address &dest, long imm,
+                         unsigned int lm, unsigned int lc,
+                         Register scratch = Z_R0);
+  inline int store_const(const Address &dest, long imm,
+                         Register scratch = Z_R0, bool is_long = true);
+
+  // Move/initialize arbitrarily large memory area. No check for destructive overlap.
+  // Being interruptible, these instructions need a retry-loop.
+  void move_long_ext(Register dst, Register src, unsigned int pad);
+
+  void compare_long_ext(Register left, Register right, unsigned int pad);
+  void compare_long_uni(Register left, Register right, unsigned int pad);
+
+  void search_string(Register end, Register start);
+  void search_string_uni(Register end, Register start);
+
+  // Translate instructions
+  // Being interruptible, these instructions need a retry-loop.
+  void translate_oo(Register dst, Register src, uint mask);
+  void translate_ot(Register dst, Register src, uint mask);
+  void translate_to(Register dst, Register src, uint mask);
+  void translate_tt(Register dst, Register src, uint mask);
+
+  // Crypto instructions.
+  // Being interruptible, these instructions need a retry-loop.
+  void cksm(Register crcBuff, Register srcBuff);
+  void km( Register dstBuff, Register srcBuff);
+  void kmc(Register dstBuff, Register srcBuff);
+  void kimd(Register srcBuff);
+  void klmd(Register srcBuff);
+  void kmac(Register srcBuff);
+
+  // nop padding
+  void align(int modulus);
+  void align_address(int modulus);
+
+  //
+  // Constants, loading constants, TOC support
+  //
+  // Safepoint check factored out.
+  void generate_safepoint_check(Label& slow_path, Register scratch = noreg, bool may_relocate = true);
+
+  // Load generic address: d <- base(a) + index(a) + disp(a).
+  inline void load_address(Register d, const Address &a);
+  // Load absolute address (and try to optimize).
+  void load_absolute_address(Register d, address addr);
+
+  // Address of Z_ARG1 and argument_offset.
+  // If temp_reg == arg_slot, arg_slot will be overwritten.
+  Address argument_address(RegisterOrConstant arg_slot,
+                           Register temp_reg = noreg,
+                           int64_t extra_slot_offset = 0);
+
+  // Load a narrow ptr constant (oop or klass ptr).
+  void load_narrow_oop( Register t, narrowOop a);
+  void load_narrow_klass(Register t, Klass* k);
+
+  static bool is_load_const_32to64(address pos);
+  static bool is_load_narrow_oop(address pos)   { return is_load_const_32to64(pos); }
+  static bool is_load_narrow_klass(address pos) { return is_load_const_32to64(pos); }
+
+  static int  load_const_32to64_size()          { return 6; }
+  static bool load_narrow_oop_size()            { return load_const_32to64_size(); }
+  static bool load_narrow_klass_size()          { return load_const_32to64_size(); }
+
+  static int  patch_load_const_32to64(address pos, int64_t a);
+  static int  patch_load_narrow_oop(address pos, oop o);
+  static int  patch_load_narrow_klass(address pos, Klass* k);
+
+  // cOops. CLFI exploit.
+  void compare_immediate_narrow_oop(Register oop1, narrowOop oop2);
+  void compare_immediate_narrow_klass(Register op1, Klass* op2);
+  static bool is_compare_immediate32(address pos);
+  static bool is_compare_immediate_narrow_oop(address pos);
+  static bool is_compare_immediate_narrow_klass(address pos);
+  static int  compare_immediate_narrow_size()       { return 6; }
+  static int  compare_immediate_narrow_oop_size()   { return compare_immediate_narrow_size(); }
+  static int  compare_immediate_narrow_klass_size() { return compare_immediate_narrow_size(); }
+  static int  patch_compare_immediate_32(address pos, int64_t a);
+  static int  patch_compare_immediate_narrow_oop(address pos, oop o);
+  static int  patch_compare_immediate_narrow_klass(address pos, Klass* k);
+
+  // Load a 32bit constant into a 64bit register.
+  void load_const_32to64(Register t, int64_t x, bool sign_extend=true);
+  // Load a 64 bit constant.
+         void load_const(Register t, long a);
+  inline void load_const(Register t, void* a);
+  inline void load_const(Register t, Label& L);
+  inline void load_const(Register t, const AddressLiteral& a);
+  // Get the 64 bit constant from a `load_const' sequence.
+  static long get_const(address load_const);
+  // Patch the 64 bit constant of a `load_const' sequence. This is a low level
+  // procedure. It neither flushes the instruction cache nor is it atomic.
+  static void patch_const(address load_const, long x);
+  static int load_const_size() { return 12; }
+
+  // Turn a char into boolean. NOTE: destroys r.
+  void c2bool(Register r, Register t = Z_R0);
+
+  // Optimized version of load_const for constants that do not need to be
+  // loaded by a sequence of instructions of fixed length and that do not
+  // need to be patched.
+  int load_const_optimized_rtn_len(Register t, long x, bool emit);
+  inline void load_const_optimized(Register t, long x);
+  inline void load_const_optimized(Register t, void* a);
+  inline void load_const_optimized(Register t, Label& L);
+  inline void load_const_optimized(Register t, const AddressLiteral& a);
+
+ public:
+
+  //----------------------------------------------------------
+  //            oops in code             -------------
+  //  including compressed oops support  -------------
+  //----------------------------------------------------------
+
+  // Metadata in code that we have to keep track of.
+  AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
+  AddressLiteral constant_metadata_address(Metadata* obj); // find_index
+
+  // allocate_index
+  AddressLiteral allocate_oop_address(jobject obj);
+  // find_index
+  AddressLiteral constant_oop_address(jobject obj);
+  // Uses allocate_oop_address.
+  inline void set_oop         (jobject obj, Register d);
+  // Uses constant_oop_address.
+  inline void set_oop_constant(jobject obj, Register d);
+  // Uses constant_metadata_address.
+  inline bool set_metadata_constant(Metadata* md, Register d);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+  //
+  // branch, jump
+  //
+
+  // Use one generic function for all branch patches.
+  static unsigned long patched_branch(address dest_pos, unsigned long inst, address inst_pos);
+
+  void pd_patch_instruction(address branch, address target);
+
+  // Extract relative address from "relative" instructions.
+  static long get_pcrel_offset(unsigned long inst);
+  static long get_pcrel_offset(address pc);
+  static address get_target_addr_pcrel(address pc);
+
+  static inline bool is_call_pcrelative_short(unsigned long inst);
+  static inline bool is_call_pcrelative_long(unsigned long inst);
+  static inline bool is_branch_pcrelative_short(unsigned long inst);
+  static inline bool is_branch_pcrelative_long(unsigned long inst);
+  static inline bool is_compareandbranch_pcrelative_short(unsigned long inst);
+  static inline bool is_branchoncount_pcrelative_short(unsigned long inst);
+  static inline bool is_branchonindex32_pcrelative_short(unsigned long inst);
+  static inline bool is_branchonindex64_pcrelative_short(unsigned long inst);
+  static inline bool is_branchonindex_pcrelative_short(unsigned long inst);
+  static inline bool is_branch_pcrelative16(unsigned long inst);
+  static inline bool is_branch_pcrelative32(unsigned long inst);
+  static inline bool is_branch_pcrelative(unsigned long inst);
+  static inline bool is_load_pcrelative_long(unsigned long inst);
+  static inline bool is_misc_pcrelative_long(unsigned long inst);
+  static inline bool is_pcrelative_short(unsigned long inst);
+  static inline bool is_pcrelative_long(unsigned long inst);
+  // PCrelative TOC access. Variants with address argument.
+  static inline bool is_load_pcrelative_long(address iLoc);
+  static inline bool is_pcrelative_short(address iLoc);
+  static inline bool is_pcrelative_long(address iLoc);
+
+  static inline bool is_pcrelative_instruction(address iloc);
+  static inline bool is_load_addr_pcrel(address a);
+
+  static void patch_target_addr_pcrel(address pc, address con);
+  static void patch_addr_pcrel(address pc, address con) {
+    patch_target_addr_pcrel(pc, con); // Just delegate. This is only for nativeInst_s390.cpp.
+  }
+
+  //---------------------------------------------------------
+  //  Some macros for more comfortable assembler programming.
+  //---------------------------------------------------------
+
+  // NOTE: pass NearLabel T to signal that the branch target T will be bound to a near address.
+
+  void compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+  void compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+  void compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+  void compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+
+  void branch_optimized(Assembler::branch_condition cond, address branch_target);
+  void branch_optimized(Assembler::branch_condition cond, Label&  branch_target);
+  void compare_and_branch_optimized(Register r1,
+                                    Register r2,
+                                    Assembler::branch_condition cond,
+                                    address  branch_addr,
+                                    bool     len64,
+                                    bool     has_sign);
+  void compare_and_branch_optimized(Register r1,
+                                    jlong    x2,
+                                    Assembler::branch_condition cond,
+                                    Label&   branch_target,
+                                    bool     len64,
+                                    bool     has_sign);
+  void compare_and_branch_optimized(Register r1,
+                                    Register r2,
+                                    Assembler::branch_condition cond,
+                                    Label&   branch_target,
+                                    bool     len64,
+                                    bool     has_sign);
+
+  //
+  // Support for frame handling
+  //
+  // Specify the register that should be stored as the return pc in the
+  // current frame (default is R14).
+  inline void save_return_pc(Register pc = Z_R14);
+  inline void restore_return_pc();
+
+  // Get current PC.
+  address get_PC(Register result);
+
+  // Get current PC + offset. Offset given in bytes, must be even!
+  address get_PC(Register result, int64_t offset);
+
+  // Resize current frame either relatively wrt to current SP or absolute.
+  void resize_frame_sub(Register offset, Register fp, bool load_fp=true);
+  void resize_frame_absolute(Register addr, Register fp, bool load_fp=true);
+  void resize_frame(RegisterOrConstant offset, Register fp, bool load_fp=true);
+
+  // Push a frame of size bytes, if copy_sp is false, old_sp must already
+  // contain a copy of Z_SP.
+  void push_frame(Register bytes, Register old_sp, bool copy_sp = true, bool bytes_with_inverted_sign = false);
+
+  // Push a frame of size `bytes'. no abi space provided.
+  // Don't rely on register locking, instead pass a scratch register
+  // (Z_R0 by default).
+  // CAUTION! passing registers >= Z_R2 may produce bad results on
+  // old CPUs!
+  unsigned int push_frame(unsigned int bytes, Register scratch = Z_R0);
+
+  // Push a frame of size `bytes' with abi160 on top.
+  unsigned int push_frame_abi160(unsigned int bytes);
+
+  // Pop current C frame.
+  void pop_frame();
+
+  //
+  // Calls
+  //
+
+ private:
+  address _last_calls_return_pc;
+
+ public:
+  // Support for VM calls. This is the base routine called by the
+  // different versions of call_VM_leaf. The interpreter may customize
+  // this version by overriding it for its purposes (e.g., to
+  // save/restore additional registers when doing a VM call).
+  void call_VM_leaf_base(address entry_point);
+  void call_VM_leaf_base(address entry_point, bool allow_relocation);
+
+  // It is imperative that all calls into the VM are handled via the
+  // call_VM macros. They make sure that the stack linkage is setup
+  // correctly. Call_VM's correspond to ENTRY/ENTRY_X entry points
+  // while call_VM_leaf's correspond to LEAF entry points.
+  //
+  // This is the base routine called by the different versions of
+  // call_VM. The interpreter may customize this version by overriding
+  // it for its purposes (e.g., to save/restore additional registers
+  // when doing a VM call).
+
+  // If no last_java_sp is specified (noreg) then SP will be used instead.
+
+  virtual void call_VM_base(
+    Register        oop_result,        // Where an oop-result ends up if any; use noreg otherwise.
+    Register        last_java_sp,      // To set up last_Java_frame in stubs; use noreg otherwise.
+    address         entry_point,       // The entry point.
+    bool            check_exception);  // Flag which indicates if exception should be checked.
+  virtual void call_VM_base(
+    Register        oop_result,       // Where an oop-result ends up if any; use noreg otherwise.
+    Register        last_java_sp,     // To set up last_Java_frame in stubs; use noreg otherwise.
+    address         entry_point,      // The entry point.
+    bool            allow_relocation, // Flag to request generation of relocatable code.
+    bool            check_exception); // Flag which indicates if exception should be checked.
+
+  // Call into the VM.
+  // Passes the thread pointer (in Z_ARG1) as a prepended argument.
+  // Makes sure oop return values are visible to the GC.
+  void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+               Register arg_3, bool check_exceptions = true);
+
+  void call_VM_static(Register oop_result, address entry_point, bool check_exceptions = true);
+  void call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+                      Register arg_3, bool check_exceptions = true);
+
+  // Overloaded with last_java_sp.
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+               Register arg_1, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+               Register arg_1, Register arg_2, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+               Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+  void call_VM_leaf(address entry_point);
+  void call_VM_leaf(address entry_point, Register arg_1);
+  void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+  // Really static VM leaf call (never patched).
+  void call_VM_leaf_static(address entry_point);
+  void call_VM_leaf_static(address entry_point, Register arg_1);
+  void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2);
+  void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+  // Call a C function via its function entry. Updates and returns _last_calls_return_pc.
+  inline address call(Register function_entry);
+  inline address call_c(Register function_entry);
+         address call_c(address function_entry);
+  // Variant for really static (non-relocatable) calls which are never patched.
+         address call_c_static(address function_entry);
+  // TOC or pc-relative call + emits a runtime_call relocation.
+         address call_c_opt(address function_entry);
+
+  inline address call_stub(Register function_entry);
+  inline address call_stub(address  function_entry);
+
+  // Get the pc where the last call will return to. Returns _last_calls_return_pc.
+  inline address last_calls_return_pc();
+
+ private:
+  static bool is_call_far_patchable_variant0_at(address instruction_addr); // Dynamic TOC: load target addr from CP and call.
+  static bool is_call_far_patchable_variant2_at(address instruction_addr); // PC-relative call, prefixed with NOPs.
+
+
+ public:
+  bool           call_far_patchable(address target, int64_t toc_offset);
+  static bool    is_call_far_patchable_at(address inst_start);             // All supported forms of patchable calls.
+  static bool    is_call_far_patchable_pcrelative_at(address inst_start);  // Pc-relative call with leading nops.
+  static bool    is_call_far_pcrelative(address instruction_addr);         // Pure far pc-relative call, with one leading size adjustment nop.
+  static void    set_dest_of_call_far_patchable_at(address inst_start, address target, int64_t toc_offset);
+  static address get_dest_of_call_far_patchable_at(address inst_start, address toc_start);
+
+  void align_call_far_patchable(address pc);
+
+  // PCrelative TOC access.
+
+  // This value is independent of code position - constant for the lifetime of the VM.
+  static int call_far_patchable_size() {
+    return load_const_from_toc_size() + call_byregister_size();
+  }
+
+  static int call_far_patchable_ret_addr_offset() { return call_far_patchable_size(); }
+
+  static bool call_far_patchable_requires_alignment_nop(address pc) {
+    int size = call_far_patchable_size();
+    return ((intptr_t)(pc + size) & 0x03L) != 0;
+  }
+
+  // END OF PCrelative TOC access.
+
+  static int jump_byregister_size()          { return 2; }
+  static int jump_pcrelative_size()          { return 4; }
+  static int jump_far_pcrelative_size()      { return 6; }
+  static int call_byregister_size()          { return 2; }
+  static int call_pcrelative_size()          { return 4; }
+  static int call_far_pcrelative_size()      { return 2 + 6; } // Prepend each BRASL with a nop.
+  static int call_far_pcrelative_size_raw()  { return 6; }     // Prepend each BRASL with a nop.
+
+  //
+  // Java utilities
+  //
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // Polling page support.
+  enum poll_mask {
+    mask_stackbang = 0xde, // 222 (dec)
+    mask_safepoint = 0x6f, // 111 (dec)
+    mask_profiling = 0xba  // 186 (dec)
+  };
+
+  // Read from the polling page.
+  void load_from_polling_page(Register polling_page_address, int64_t offset = 0);
+
+  // Check if given instruction is a read from the polling page
+  // as emitted by load_from_polling_page.
+  static bool is_load_from_polling_page(address instr_loc);
+  // Extract poll address from instruction and ucontext.
+  static address get_poll_address(address instr_loc, void* ucontext);
+  // Extract poll register from instruction.
+  static uint get_poll_register(address instr_loc);
+
+  // Check if instruction is a write access to the memory serialization page
+  // realized by one of the instructions stw, stwu, stwx, or stwux.
+  static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext);
+
+  // Support for serializing memory accesses between threads.
+  void serialize_memory(Register thread, Register tmp1, Register tmp2);
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset);
+
+  // Atomics
+  // -- none?
+
+  void tlab_allocate(Register obj,                // Result: pointer to object after successful allocation
+                     Register var_size_in_bytes,  // Object size in bytes if unknown at compile time; invalid otherwise.
+                     int      con_size_in_bytes,  // Object size in bytes if   known at compile time.
+                     Register t1,                 // temp register
+                     Label&   slow_case);         // Continuation point if fast allocation fails.
+
+  // Emitter for interface method lookup.
+  //   input: recv_klass, intf_klass, itable_index
+  //   output: method_result
+  //   kills: itable_index, temp1_reg, Z_R0, Z_R1
+  void lookup_interface_method(Register           recv_klass,
+                               Register           intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register           method_result,
+                               Register           temp1_reg,
+                               Register           temp2_reg,
+                               Label&             no_such_interface);
+
+  // virtual method calling
+  void lookup_virtual_method(Register             recv_klass,
+                             RegisterOrConstant   vtable_index,
+                             Register             method_result);
+
+  // Factor out code to call ic_miss_handler.
+  unsigned int call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch);
+  void nmethod_UEP(Label& ic_miss);
+
+  // Emitters for "partial subtype" checks.
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg and temp2_reg.
+  // If super_check_offset is not -1, temp1_reg is not used and can be noreg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp1_reg,
+                                     Label*   L_success,
+                                     Label*   L_failure,
+                                     Label*   L_slow_path,
+                                     RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg can be noreg, if no temps are available.
+  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
+  // Updates the sub's secondary super cache as necessary.
+  void check_klass_subtype_slow_path(Register Rsubklass,
+                                     Register Rsuperklas,
+                                     Register Rarray_ptr, // tmp
+                                     Register Rlength,    // tmp
+                                     Label* L_success,
+                                     Label* L_failure);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp1_reg,
+                           Register temp2_reg,
+                           Label&   L_success);
+
+  // Increment a counter at counter_address when the eq condition code is set.
+  // Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
+  void increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg);
+  // Biased locking support
+  // Upon entry,obj_reg must contain the target object, and mark_reg
+  // must contain the target object's header.
+  // Destroys mark_reg if an attempt is made to bias an anonymously
+  // biased lock. In this case a failure will go either to the slow
+  // case or fall through with the notEqual condition code set with
+  // the expectation that the slow case in the runtime will be called.
+  // In the fall-through case where the CAS-based lock is done,
+  // mark_reg is not destroyed.
+  void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
+                            Register temp2_reg, Label& done, Label* slow_case = NULL);
+  // Upon entry, the base register of mark_addr must contain the oop.
+  // Destroys temp_reg.
+  // If allow_delay_slot_filling is set to true, the next instruction
+  // emitted after this one will go in an annulled delay slot if the
+  // biased locking exit case failed.
+  void biased_locking_exit(Register mark_addr, Register temp_reg, Label& done);
+
+  void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
+  void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
+
+  // Write to card table for modification at store_addr - register is destroyed afterwards.
+  void card_write_barrier_post(Register store_addr, Register tmp);
+
+#if INCLUDE_ALL_GCS
+  // General G1 pre-barrier generator.
+  // Purpose: record the previous value if it is not null.
+  // All non-tmps are preserved.
+  void g1_write_barrier_pre(Register           Robj,
+                            RegisterOrConstant offset,
+                            Register           Rpre_val,        // Ideally, this is a non-volatile register.
+                            Register           Rval,            // Will be preserved.
+                            Register           Rtmp1,           // If Rpre_val is volatile, either Rtmp1
+                            Register           Rtmp2,           // or Rtmp2 has to be non-volatile.
+                            bool               pre_val_needed); // Save Rpre_val across runtime call, caller uses it.
+
+  // General G1 post-barrier generator.
+  // Purpose: Store cross-region card.
+  void g1_write_barrier_post(Register Rstore_addr,
+                             Register Rnew_val,
+                             Register Rtmp1,
+                             Register Rtmp2,
+                             Register Rtmp3);
+#endif // INCLUDE_ALL_GCS
+
+  // Support for last Java frame (but use call_VM instead where possible).
+ private:
+  void set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation);
+  void reset_last_Java_frame(bool allow_relocation);
+  void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation);
+ public:
+  inline void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
+  inline void set_last_Java_frame_static(Register last_java_sp, Register last_Java_pc);
+  inline void reset_last_Java_frame(void);
+  inline void reset_last_Java_frame_static(void);
+  inline void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1);
+  inline void set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1);
+
+  void set_thread_state(JavaThreadState new_state);
+
+  // Read vm result from thread.
+  void get_vm_result  (Register oop_result);
+  void get_vm_result_2(Register result);
+
+  // Vm result is currently getting hijacked to for oop preservation.
+  void set_vm_result(Register oop_result);
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+  //
+  // %%%%%% Currently not done for z/Architecture
+
+  void null_check(Register reg, Register tmp = Z_R0, int64_t offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);  // Implemented in shared file ?!
+
+  // Klass oop manipulations if compressed.
+  void encode_klass_not_null(Register dst, Register src = noreg);
+  void decode_klass_not_null(Register dst, Register src);
+  void decode_klass_not_null(Register dst);
+  void load_klass(Register klass, Address mem);
+  void load_klass(Register klass, Register src_oop);
+  void load_prototype_header(Register Rheader, Register Rsrc_oop);
+  void store_klass(Register klass, Register dst_oop, Register ck = noreg); // Klass will get compressed if ck not provided.
+  void store_klass_gap(Register s, Register dst_oop);
+
+  // This function calculates the size of the code generated by
+  //   decode_klass_not_null(register dst)
+  // when (Universe::heap() != NULL). Hence, if the instructions
+  // it generates change, then this method needs to be updated.
+  static int instr_size_for_decode_klass_not_null();
+
+  void encode_heap_oop(Register oop);
+  void encode_heap_oop_not_null(Register oop);
+
+  static int get_oop_base_pow2_offset(uint64_t oop_base);
+  int  get_oop_base(Register Rbase, uint64_t oop_base);
+  int  get_oop_base_complement(Register Rbase, uint64_t oop_base);
+  void compare_heap_oop(Register Rop1, Address mem, bool maybeNULL);
+  void compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL);
+  void load_heap_oop(Register dest, const Address &a);
+  void load_heap_oop(Register d, int64_t si16, Register s1);
+  void load_heap_oop_not_null(Register d, int64_t si16, Register s1);
+  void store_heap_oop(Register Roop, RegisterOrConstant offset, Register base);
+  void store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base);
+  void store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base);
+  void oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
+                   Register Rbase = Z_R1, int pow2_offset = -1, bool only32bitValid = false);
+  void oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL,
+                   Register Rbase = Z_R1, int pow2_offset = -1);
+
+  void load_mirror(Register mirror, Register method);
+
+  //--------------------------
+  //---  perations on arrays.
+  //--------------------------
+  unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len);
+  unsigned int Clear_Array_Const(long cnt, Register base);
+  unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len);
+  unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
+                                             Register cnt_reg,
+                                             Register tmp1_reg, Register tmp2_reg);
+
+  //-------------------------------------------
+  // Special String Intrinsics Implementation.
+  //-------------------------------------------
+  // Intrinsics for CompactStrings
+  // Compress char[] to byte[]. odd_reg contains cnt. tmp3 is only needed for precise behavior in failure case. Kills dst.
+  unsigned int string_compress(Register result, Register src, Register dst, Register odd_reg,
+                               Register even_reg, Register tmp, Register tmp2 = noreg);
+
+  // Kills src.
+  unsigned int has_negatives(Register result, Register src, Register cnt,
+                             Register odd_reg, Register even_reg, Register tmp);
+
+  // Inflate byte[] to char[].
+  unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
+  // Odd_reg contains cnt. Kills src.
+  unsigned int string_inflate(Register src, Register dst, Register odd_reg,
+                              Register even_reg, Register tmp);
+
+  unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
+                              Register odd_reg, Register even_reg, Register result, int ae);
+
+  unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
+                            Register odd_reg, Register even_reg, Register result, bool is_byte);
+
+  unsigned int string_indexof(Register result, Register haystack, Register haycnt,
+                              Register needle, Register needlecnt, int needlecntval,
+                              Register odd_reg, Register even_reg, int ae);
+
+  unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
+                                   Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
+
+  // Emit an oop const to the constant pool and set a relocation info
+  // with address current_pc. Return the TOC offset of the constant.
+  int store_const_in_toc(AddressLiteral& val);
+  int store_oop_in_toc(AddressLiteral& oop);
+  // Emit an oop const to the constant pool via store_oop_in_toc, or
+  // emit a scalar const to the constant pool via store_const_in_toc,
+  // and load the constant into register dst.
+  bool load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg);
+  // Get CPU version dependent size of load_const sequence.
+  // The returned value is valid only for code sequences
+  // generated by load_const, not load_const_optimized.
+  static int load_const_from_toc_size() {
+    return load_long_pcrelative_size();
+  }
+  bool load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg);
+  static intptr_t get_const_from_toc(address pc);
+  static void     set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb);
+
+  // Dynamic TOC.
+  static bool is_load_const(address a);
+  static bool is_load_const_from_toc_pcrelative(address a);
+  static bool is_load_const_from_toc(address a) { return is_load_const_from_toc_pcrelative(a); }
+
+  // PCrelative TOC access.
+  static bool is_call_byregister(address a) { return is_z_basr(*(short*)a); }
+  static bool is_load_const_from_toc_call(address a);
+  static bool is_load_const_call(address a);
+  static int load_const_call_size() { return load_const_size() + call_byregister_size(); }
+  static int load_const_from_toc_call_size() { return load_const_from_toc_size() + call_byregister_size(); }
+  // Offset is +/- 2**32 -> use long.
+  static long get_load_const_from_toc_offset(address a);
+
+
+  void generate_type_profiling(const Register Rdata,
+                               const Register Rreceiver_klass,
+                               const Register Rwanted_receiver_klass,
+                               const Register Rmatching_row,
+                               bool is_virtual_call);
+
+  // Bit operations for single register operands.
+  inline void lshift(Register r, int places, bool doubl = true);   // <<
+  inline void rshift(Register r, int places, bool doubl = true);   // >>
+
+  //
+  // Debugging
+  //
+
+  // Assert on CC (condition code in CPU state).
+  void asm_assert(bool check_equal, const char* msg, int id) PRODUCT_RETURN;
+  void asm_assert_low(const char *msg, int id) PRODUCT_RETURN;
+  void asm_assert_high(const char *msg, int id) PRODUCT_RETURN;
+  void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); }
+  void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); }
+
+  void asm_assert_static(bool check_equal, const char* msg, int id) PRODUCT_RETURN;
+
+ private:
+  // Emit assertions.
+  void asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
+                            Register mem_base, const char* msg, int id) PRODUCT_RETURN;
+
+ public:
+  inline void asm_assert_mem4_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(true,  true, 4, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem8_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(true,  true, 8, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem4_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(false, true, 4, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem8_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(false, true, 8, mem_offset, mem_base, msg, id);
+  }
+
+  inline void asm_assert_mem4_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(true,  false, 4, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem8_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(true,  false, 8, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem4_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(false, false, 4, mem_offset, mem_base, msg, id);
+  }
+  inline void asm_assert_mem8_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+    asm_assert_mems_zero(false, false, 8, mem_offset, mem_base, msg, id);
+  }
+  void asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) PRODUCT_RETURN;
+
+  // Verify Z_thread contents.
+  void verify_thread();
+
+  // Only if +VerifyOops.
+  void verify_oop(Register reg, const char* s = "broken oop");
+
+  // TODO: verify_method and klass metadata (compare against vptr?).
+  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+ private:
+  // Generate printout in stop().
+  static const char* stop_types[];
+  enum {
+    stop_stop               = 0,
+    stop_untested           = 1,
+    stop_unimplemented      = 2,
+    stop_shouldnotreachhere = 3,
+    stop_end                = 4
+  };
+  // Prints msg and stops execution.
+  void    stop(int type, const char* msg, int id = 0);
+  address stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation); // Non-relocateable code only!!
+  void    stop_static(int type, const char* msg, int id);                                        // Non-relocateable code only!!
+
+ public:
+
+  // Prints msg and stops.
+  address stop_chain(      address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, true); }
+  address stop_chain_static(address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, false); }
+  void stop_static  (const char* msg = "", int id = 0) { stop_static(stop_stop,   msg, id); }
+  void stop         (const char* msg = "", int id = 0) { stop(stop_stop,          msg, id); }
+  void untested     (const char* msg = "", int id = 0) { stop(stop_untested,      msg, id); }
+  void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); }
+  void should_not_reach_here(const char* msg = "", int id = -1) { stop(stop_shouldnotreachhere, msg, id); }
+
+  // Factor out part of stop into subroutine to save space.
+  void stop_subroutine();
+
+  // Prints msg, but don't stop.
+  void warn(const char* msg);
+
+  //-----------------------------
+  //---  basic block tracing code
+  //-----------------------------
+  void trace_basic_block(uint i);
+  void init_basic_block_trace();
+  // Number of bytes a basic block gets larger due to the tracing code macro (worst case).
+  // Currently, worst case is 48 bytes. 64 puts us securely on the safe side.
+  static int basic_blck_trace_blk_size_incr() { return 64; }
+
+  // Write pattern 0x0101010101010101 in region [low-before, high+after].
+  // Low and high may be the same registers. Before and after are
+  // the numbers of 8-byte words.
+  void zap_from_to(Register low, Register high, Register tmp1 = Z_R0, Register tmp2 = Z_R1,
+                   int before = 0, int after = 0) PRODUCT_RETURN;
+
+  // Emitters for CRC32 calculation.
+ private:
+  void fold_byte_crc32(Register crc, Register table, Register val, Register tmp);
+  void fold_8bit_crc32(Register crc, Register table, Register tmp);
+  void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+                             Register data, bool invertCRC);
+  void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+                          Register t0,  Register t1,  Register t2,  Register t3);
+ public:
+  void update_byte_crc32( Register crc, Register val, Register table);
+  void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
+  void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3);
+  void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3);
+  void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+                          Register t0,  Register t1,  Register t2,  Register t3);
+
+  // Emitters for BigInteger.multiplyToLen intrinsic
+  // note: length of result array (zlen) is passed on the stack
+ private:
+  void add2_with_carry(Register dest_hi, Register dest_lo,
+                       Register src1, Register src2);
+  void multiply_64_x_64_loop(Register x, Register xstart,
+                             Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
+                              Register yz_idx, Register idx,
+                              Register carry, Register product, int offset);
+  void multiply_128_x_128_loop(Register x_xstart,
+                               Register y, Register z,
+                               Register yz_idx, Register idx,
+                               Register jdx,
+                               Register carry, Register product,
+                               Register carry2);
+ public:
+  void multiply_to_len(Register x, Register xlen,
+                       Register y, Register ylen,
+                       Register z,
+                       Register tmp1, Register tmp2,
+                       Register tmp3, Register tmp4, Register tmp5);
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+  SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value, Register _rscratch);
+  ~SkipIfEqual();
+};
+
+#ifdef ASSERT
+// Return false (e.g. important for our impl. of virtual calls).
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+#endif // CPU_S390_VM_MACROASSEMBLER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
+#define CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+#include "runtime/thread.hpp"
+
+// Simplified shift operations for single register operands, constant shift amount.
+inline void MacroAssembler::lshift(Register r, int places, bool is_DW) {
+  if (is_DW) {
+    z_sllg(r, r, places);
+  } else {
+    z_sll(r, places);
+  }
+}
+
+inline void MacroAssembler::rshift(Register r, int places, bool is_DW) {
+  if (is_DW) {
+    z_srlg(r, r, places);
+  } else {
+    z_srl(r, places);
+  }
+}
+
+// *((int8_t*)(dst)) |= imm8
+inline void MacroAssembler::or2mem_8(Address& dst, int64_t imm8) {
+  if (Displacement::is_shortDisp(dst.disp())) {
+    z_oi(dst, imm8);
+  } else {
+    z_oiy(dst, imm8);
+  }
+}
+
+inline int MacroAssembler::store_const(const Address &dest, long imm, Register scratch, bool is_long) {
+  unsigned int lm = is_long ? 8 : 4;
+  unsigned int lc = is_long ? 8 : 4;
+  return store_const(dest, imm, lm, lc, scratch);
+}
+
+// Do not rely on add2reg* emitter.
+// Depending on CmdLine switches and actual parameter values,
+// the generated code may alter the condition code, which is counter-intuitive
+// to the semantics of the "load address" (LA/LAY) instruction.
+// Generic address loading d <- base(a) + index(a) + disp(a)
+inline void MacroAssembler::load_address(Register d, const Address &a) {
+  if (Displacement::is_shortDisp(a.disp())) {
+    z_la(d, a.disp(), a.indexOrR0(), a.baseOrR0());
+  } else if (Displacement::is_validDisp(a.disp())) {
+    z_lay(d, a.disp(), a.indexOrR0(), a.baseOrR0());
+  } else {
+    guarantee(false, "displacement = " SIZE_FORMAT_HEX ", out of range for LA/LAY", a.disp());
+  }
+}
+
+inline void MacroAssembler::load_const(Register t, void* x) {
+  load_const(t, (long)x);
+}
+
+// Load a 64 bit constant encoded by a `Label'.
+// Works for bound as well as unbound labels. For unbound labels, the
+// code will become patched as soon as the label gets bound.
+inline void MacroAssembler::load_const(Register t, Label& L) {
+  load_const(t, target(L));
+}
+
+inline void MacroAssembler::load_const(Register t, const AddressLiteral& a) {
+  assert(t != Z_R0, "R0 not allowed");
+  // First relocate (we don't change the offset in the RelocationHolder,
+  // just pass a.rspec()), then delegate to load_const(Register, long).
+  relocate(a.rspec());
+  load_const(t, (long)a.value());
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, long x) {
+  (void) load_const_optimized_rtn_len(t, x, true);
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, void* a) {
+  load_const_optimized(t, (long)a);
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, Label& L) {
+  load_const_optimized(t, target(L));
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, const AddressLiteral& a) {
+  assert(t != Z_R0, "R0 not allowed");
+  assert((relocInfo::relocType)a.rspec().reloc()->type() == relocInfo::none,
+          "cannot relocate optimized load_consts");
+  load_const_optimized(t, a.value());
+}
+
+inline void MacroAssembler::set_oop(jobject obj, Register d) {
+  load_const(d, allocate_oop_address(obj));
+}
+
+inline void MacroAssembler::set_oop_constant(jobject obj, Register d) {
+  load_const(d, constant_oop_address(obj));
+}
+
+// Adds MetaData constant md to TOC and loads it from there.
+// md is added to the oop_recorder, but no relocation is added.
+inline bool MacroAssembler::set_metadata_constant(Metadata* md, Register d) {
+  AddressLiteral a = constant_metadata_address(md);
+  return load_const_from_toc(d, a, d); // Discards the relocation.
+}
+
+
+inline bool MacroAssembler::is_call_pcrelative_short(unsigned long inst) {
+  return is_equal(inst, BRAS_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_call_pcrelative_long(unsigned long inst) {
+  return is_equal(inst, BRASL_ZOPC); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_branch_pcrelative_short(unsigned long inst) {
+  // Branch relative, 16-bit offset.
+  return is_equal(inst, BRC_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branch_pcrelative_long(unsigned long inst) {
+  // Branch relative, 32-bit offset.
+  return is_equal(inst, BRCL_ZOPC); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_compareandbranch_pcrelative_short(unsigned long inst) {
+  // Compare and branch relative, 16-bit offset.
+  return is_equal(inst, CRJ_ZOPC, CMPBRANCH_MASK)  || is_equal(inst, CGRJ_ZOPC, CMPBRANCH_MASK)  ||
+         is_equal(inst, CIJ_ZOPC, CMPBRANCH_MASK)  || is_equal(inst, CGIJ_ZOPC, CMPBRANCH_MASK)  ||
+         is_equal(inst, CLRJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGRJ_ZOPC, CMPBRANCH_MASK) ||
+         is_equal(inst, CLIJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGIJ_ZOPC, CMPBRANCH_MASK);
+}
+
+inline bool MacroAssembler::is_branchoncount_pcrelative_short(unsigned long inst) {
+  // Branch relative on count, 16-bit offset.
+  return is_equal(inst, BRCT_ZOPC) || is_equal(inst, BRCTG_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex32_pcrelative_short(unsigned long inst) {
+  // Branch relative on index (32bit), 16-bit offset.
+  return is_equal(inst, BRXH_ZOPC) || is_equal(inst, BRXLE_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex64_pcrelative_short(unsigned long inst) {
+  // Branch relative on index (64bit), 16-bit offset.
+  return is_equal(inst, BRXHG_ZOPC) || is_equal(inst, BRXLG_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex_pcrelative_short(unsigned long inst) {
+  return is_branchonindex32_pcrelative_short(inst) ||
+         is_branchonindex64_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative16(unsigned long inst) {
+  return is_branch_pcrelative_short(inst) ||
+         is_compareandbranch_pcrelative_short(inst) ||
+         is_branchoncount_pcrelative_short(inst) ||
+         is_branchonindex_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative32(unsigned long inst) {
+  return is_branch_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative(unsigned long inst) {
+  return is_branch_pcrelative16(inst) ||
+         is_branch_pcrelative32(inst);
+}
+
+inline bool MacroAssembler::is_load_pcrelative_long(unsigned long inst) {
+  // Load relative, 32-bit offset.
+  return is_equal(inst, LRL_ZOPC, REL_LONG_MASK) || is_equal(inst, LGRL_ZOPC, REL_LONG_MASK); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_misc_pcrelative_long(unsigned long inst) {
+  // Load address, execute relative, 32-bit offset.
+  return is_equal(inst, LARL_ZOPC, REL_LONG_MASK) || is_equal(inst, EXRL_ZOPC, REL_LONG_MASK); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_pcrelative_short(unsigned long inst) {
+  return is_branch_pcrelative16(inst) ||
+         is_call_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_long(unsigned long inst) {
+  return is_branch_pcrelative32(inst) ||
+         is_call_pcrelative_long(inst) ||
+         is_load_pcrelative_long(inst) ||
+         is_misc_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_load_pcrelative_long(address iLoc) {
+  unsigned long inst;
+  unsigned int  len = get_instruction(iLoc, &inst);
+  return (len == 6) && is_load_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_short(address iLoc) {
+  unsigned long inst;
+  unsigned int  len = get_instruction(iLoc, &inst);
+  return ((len == 4) || (len == 6)) && is_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_long(address iLoc) {
+  unsigned long inst;
+  unsigned int  len = get_instruction(iLoc, &inst);
+  return (len == 6) && is_pcrelative_long(inst);
+}
+
+// Dynamic TOC. Test for any pc-relative instruction.
+inline bool MacroAssembler::is_pcrelative_instruction(address iloc) {
+  unsigned long inst;
+  get_instruction(iloc, &inst);
+  return is_pcrelative_short(inst) ||
+         is_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_load_addr_pcrel(address a) {
+  return is_equal(a, LARL_ZOPC, LARL_MASK);
+}
+
+// Save the return pc in the register that should be stored as the return pc
+// in the current frame (default is R14).
+inline void MacroAssembler::save_return_pc(Register pc) {
+  z_stg(pc, _z_abi16(return_pc), Z_SP);
+}
+
+inline void MacroAssembler::restore_return_pc() {
+  z_lg(Z_R14, _z_abi16(return_pc), Z_SP);
+}
+
+// Call a function with given entry.
+inline address MacroAssembler::call(Register function_entry) {
+  assert(function_entry != Z_R0, "function_entry cannot be Z_R0");
+
+  Assembler::z_basr(Z_R14, function_entry);
+  _last_calls_return_pc = pc();
+
+  return _last_calls_return_pc;
+}
+
+// Call a C function via a function entry.
+inline address MacroAssembler::call_c(Register function_entry) {
+  return call(function_entry);
+}
+
+// Call a stub function via a function descriptor, but don't save TOC before
+// call, don't setup TOC and ENV for call, and don't restore TOC after call
+inline address MacroAssembler::call_stub(Register function_entry) {
+  return call_c(function_entry);
+}
+
+inline address MacroAssembler::call_stub(address function_entry) {
+  return call_c(function_entry);
+}
+
+// Get the pc where the last emitted call will return to.
+inline address MacroAssembler::last_calls_return_pc() {
+  return _last_calls_return_pc;
+}
+
+inline void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) {
+  set_last_Java_frame(last_Java_sp, last_Java_pc, true);
+}
+
+inline void MacroAssembler::set_last_Java_frame_static(Register last_Java_sp, Register last_Java_pc) {
+  set_last_Java_frame(last_Java_sp, last_Java_pc, false);
+}
+
+inline void MacroAssembler::reset_last_Java_frame(void) {
+  reset_last_Java_frame(true);
+}
+
+inline void MacroAssembler::reset_last_Java_frame_static(void) {
+  reset_last_Java_frame(false);
+}
+
+inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) {
+  set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true);
+}
+
+inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1) {
+  set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true);
+}
+
+#endif // CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/codeBuffer.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/metaspaceShared.hpp"
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument. Example:
+//
+//   oop obj;
+//   int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();.
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#undef __
+#define __ masm->
+
+void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
+                                              void** vtable,
+                                              char** md_top,
+                                              char* md_end,
+                                              char** mc_top,
+                                              char* mc_end) {
+
+  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+  *(intptr_t *)(*md_top) = vtable_bytes;
+  *md_top += sizeof(intptr_t);
+  void** dummy_vtable = (void**)*md_top;
+  *vtable = dummy_vtable;
+  *md_top += vtable_bytes;
+
+  // Get ready to generate dummy methods.
+
+  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+  __ unimplemented();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+
+#ifdef PRODUCT
+#define __ _masm->
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+  return RegisterOrConstant(value);
+}
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg,
+                                          Register temp_reg, Register temp2_reg) {
+  if (VerifyMethodHandles) {
+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+                 temp_reg, temp2_reg, "MH argument is a Class");
+  }
+  __ z_lg(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, "%s should be nonzero", xname);
+  return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else
+#define NONZERO(x) (x)
+#endif
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj_reg, SystemDictionary::WKID klass_id,
+                                 Register temp_reg, Register temp2_reg,
+                                 const char* error_message) {
+
+  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
+  KlassHandle klass = SystemDictionary::well_known_klass(klass_id);
+
+  assert(temp_reg != Z_R0 && // Is used as base register!
+         temp_reg != noreg && temp2_reg != noreg, "need valid registers!");
+
+  NearLabel L_ok, L_bad;
+
+  BLOCK_COMMENT("verify_klass {");
+
+  __ verify_oop(obj_reg);
+  __ compareU64_and_branch(obj_reg, (intptr_t)0L, Assembler::bcondEqual, L_bad);
+  __ load_klass(temp_reg, obj_reg);
+  // klass_addr is a klass in allstatic SystemDictionaryHandles. Can't get GCed.
+  __ load_const_optimized(temp2_reg, (address)klass_addr);
+  __ z_lg(temp2_reg, Address(temp2_reg));
+  __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok);
+
+  intptr_t super_check_offset = klass->super_check_offset();
+  __ z_lg(temp_reg, Address(temp_reg, super_check_offset));
+  __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok);
+  __ BIND(L_bad);
+  __ stop(error_message);
+  __ BIND(L_ok);
+
+  BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind,
+                                    Register member_reg, Register temp  ) {
+  NearLabel L;
+  BLOCK_COMMENT("verify_ref_kind {");
+
+  __ z_llgf(temp,
+            Address(member_reg,
+                    NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+  __ z_srl(temp,  java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+  __ z_nilf(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+  __ compare32_and_branch(temp, constant(ref_kind), Assembler::bcondEqual, L);
+
+  {
+    char *buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual || ref_kind == JVM_REF_invokeSpecial) {
+      // Could do this for all ref_kinds, but would explode assembly code size.
+      trace_method_handle(_masm, buf);
+    }
+    __ stop(buf);
+  }
+
+  BLOCK_COMMENT("} verify_ref_kind");
+
+  __ bind(L);
+}
+#endif // ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target,
+                                            Register temp, bool for_compiler_entry) {
+  assert(method == Z_method, "interpreter calling convention");
+  __ verify_method_ptr(method);
+
+  assert(target != method, "don 't you kill the method reg!");
+
+  Label L_no_such_method;
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    // JVMTI events, such as single-stepping, are implemented partly
+    // by avoiding running compiled code in threads for which the
+    // event is enabled. Check here for interp_only_mode if these
+    // events CAN be enabled.
+    __ verify_thread();
+
+    Label run_compiled_code;
+
+    __ load_and_test_int(temp, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+    __ z_bre(run_compiled_code);
+
+    // Null method test is replicated below in compiled case,
+    // it might be able to address across the verify_thread().
+    __ z_ltgr(temp, method);
+    __ z_bre(L_no_such_method);
+
+    __ z_lg(target, Address(method, Method::interpreter_entry_offset()));
+    __ z_br(target);
+
+    __ bind(run_compiled_code);
+  }
+
+  // Compiled case, either static or fall-through from runtime conditional.
+  __ z_ltgr(temp, method);
+  __ z_bre(L_no_such_method);
+
+  ByteSize offset = for_compiler_entry ?
+                       Method::from_compiled_offset() : Method::from_interpreted_offset();
+  Address method_from(method, offset);
+
+  __ z_lg(target, method_from);
+  __ z_br(target);
+
+  __ bind(L_no_such_method);
+  assert(StubRoutines::throw_AbstractMethodError_entry() != NULL, "not yet generated!");
+  __ load_const_optimized(target, StubRoutines::throw_AbstractMethodError_entry());
+  __ z_br(target);
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2, Register temp3,
+                                        bool for_compiler_entry) {
+
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2, temp3);
+  assert(method_temp == Z_method, "required register for loading method");
+
+  BLOCK_COMMENT("jump_to_lambda_form {");
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp,
+                     Address(recv,
+                             NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp,
+                     Address(method_temp,
+                             NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  // The following assumes that a method is normally compressed in the vmtarget field.
+  __ z_lg(method_temp,
+          Address(method_temp,
+                  NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // Make sure recv is already on stack.
+    NearLabel L;
+    Address paramSize(temp2, ConstMethod::size_of_parameters_offset());
+
+    __ z_lg(temp2, Address(method_temp, Method::const_offset()));
+    __ load_sized_value(temp2, paramSize, sizeof(u2), /*is_signed*/ false);
+    // if (temp2 != recv) stop
+    __ z_lg(temp2, __ argument_address(temp2, temp2, 0));
+    __ compare64_and_branch(temp2, recv, Assembler::bcondEqual, L);
+    __ stop("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, Z_R0, for_compiler_entry);
+
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+// code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // This is the interpreter entry.
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+
+  if (iid == vmIntrinsics::_invokeGeneric || iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java
+    // are not directly used. They are linked to Java-generated
+    // adapters via MethodHandleNatives.linkMethod. They all allow an
+    // appendix argument.
+    __ should_not_reach_here();           // Empty stubs make SG sick.
+    return NULL;
+  }
+
+  // Z_R10: sender SP (must preserve; see prepare_to_jump_from_interprted)
+  // Z_method: method
+  // Z_ARG1 (Gargs): incoming argument list (must preserve)
+  Register Z_R4_param_size = Z_R4;   // size of parameters
+  address code_start = __ pc();
+
+  // Here is where control starts out:
+  __ align(CodeEntryAlignment);
+
+  address entry_point = __ pc();
+
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+
+    // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit.
+    if (Method::intrinsic_id_size_in_bytes() == 1) {
+      __ z_cli(Address(Z_method, Method::intrinsic_id_offset_in_bytes()), (int)iid);
+    } else {
+      assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id");
+      __ z_lh(Z_R0_scratch, Address(Z_method, Method::intrinsic_id_offset_in_bytes()));
+      __ z_chi(Z_R0_scratch, (int)iid);
+    }
+    __ z_bre(L);
+
+    if (iid == vmIntrinsics::_linkToVirtual || iid == vmIntrinsics::_linkToSpecial) {
+      // Could do this for all kinds, but would explode assembly code size.
+      trace_method_handle(_masm, "bad Method::intrinsic_id");
+    }
+
+    __ stop("bad Method::intrinsic_id");
+    __ bind(L);
+
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task: Find out how big the argument list is.
+  Address Z_R4_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic,
+         "must be _invokeBasic or a linkTo intrinsic");
+
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+     Address paramSize(Z_R1_scratch, ConstMethod::size_of_parameters_offset());
+
+    __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+    __ load_sized_value(Z_R4_param_size, paramSize, sizeof(u2), /*is_signed*/ false);
+    Z_R4_first_arg_addr = __ argument_address(Z_R4_param_size, Z_R4_param_size, 0);
+  } else {
+    DEBUG_ONLY(Z_R4_param_size = noreg);
+  }
+
+  Register Z_mh = noreg;
+  if (!is_signature_polymorphic_static(iid)) {
+    Z_mh = Z_ARG4;
+    __ z_lg(Z_mh, Z_R4_first_arg_addr);
+    DEBUG_ONLY(Z_R4_param_size = noreg);
+  }
+
+  // Z_R4_first_arg_addr is live!
+
+  trace_method_handle_interpreter_entry(_masm, iid);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    __ pc(); // just for the block comment
+    generate_method_handle_dispatch(_masm, iid, Z_mh, noreg, not_for_compiler_entry);
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register Z_recv = noreg;
+
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver)
+      // up from the interpreter stack.
+      __ z_lg(Z_recv = Z_R5, Z_R4_first_arg_addr);
+      DEBUG_ONLY(Z_R4_param_size = noreg);
+    }
+
+    Register Z_member = Z_method;  // MemberName ptr; incoming method ptr is dead now
+
+    __ z_lg(Z_member, __ argument_address(constant(1)));
+    __ add2reg(Z_esp, Interpreter::stackElementSize);
+    generate_method_handle_dispatch(_masm, iid, Z_recv, Z_member, not_for_compiler_entry);
+  }
+
+  return entry_point;
+}
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+
+  Register temp1 = for_compiler_entry ? Z_R10 : Z_R6;
+  Register temp2 = Z_R12;
+  Register temp3 = Z_R11;
+  Register temp4 = Z_R13;
+
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : Z_ARG1),
+           "only valid assignment");
+  }
+  if (receiver_reg != noreg) {
+    assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
+  }
+  if (member_reg != noreg) {
+    assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
+  }
+  if (!for_compiler_entry) {  // Don't trash last SP.
+    assert_different_registers(temp1, temp2, temp3, temp4, Z_R10);
+  }
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    __ pc(); // Just for the block comment.
+    // Indirect through MH.form.vmentry.vmtarget.
+    jump_to_lambda_form(_masm, receiver_reg, Z_method, Z_R1, temp3, for_compiler_entry);
+    return;
+  }
+
+  // The method is a member invoker used by direct method handles.
+  if (VerifyMethodHandles) {
+    // Make sure the trailing argument really is a MemberName (caller responsibility).
+    verify_klass(_masm, member_reg,
+                 SystemDictionary::WK_KLASS_ENUM_NAME(MemberName_klass),
+                 temp1, temp2,
+                 "MemberName required for invokeVirtual etc.");
+  }
+
+  Address  member_clazz(   member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+  Address  member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+  Address  member_vmtarget(member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+  Register temp1_recv_klass = temp1;
+
+  if (iid != vmIntrinsics::_linkToStatic) {
+    __ verify_oop(receiver_reg);
+    if (iid == vmIntrinsics::_linkToSpecial) {
+      // Don't actually load the klass; just null-check the receiver.
+      __ null_check(receiver_reg);
+    } else {
+      // Load receiver klass itself.
+      __ null_check(receiver_reg, Z_R0, oopDesc::klass_offset_in_bytes());
+      __ load_klass(temp1_recv_klass, receiver_reg);
+      __ verify_klass_ptr(temp1_recv_klass);
+    }
+    BLOCK_COMMENT("check_receiver {");
+    // The receiver for the MemberName must be in receiver_reg.
+    // Check the receiver against the MemberName.clazz.
+    if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+      // Did not load it above...
+      __ load_klass(temp1_recv_klass, receiver_reg);
+      __ verify_klass_ptr(temp1_recv_klass);
+    }
+
+    if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+      NearLabel L_ok;
+      Register temp2_defc = temp2;
+
+      __ load_heap_oop(temp2_defc, member_clazz);
+      load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
+      __ verify_klass_ptr(temp2_defc);
+      __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
+      // If we get here, the type check failed!
+      __ stop("receiver class disagrees with MemberName.clazz");
+      __ bind(L_ok);
+    }
+    BLOCK_COMMENT("} check_receiver");
+  }
+  if (iid == vmIntrinsics::_linkToSpecial || iid == vmIntrinsics::_linkToStatic) {
+    DEBUG_ONLY(temp1_recv_klass = noreg);  // These guys didn't load the recv_klass.
+  }
+
+  // Live registers at this point:
+  //   member_reg       - MemberName that was the trailing argument.
+  //   temp1_recv_klass - Klass of stacked receiver, if needed.
+  //   Z_R10            - Interpreter linkage if interpreted.
+
+  bool method_is_live = false;
+
+  switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ z_lg(Z_method, member_vmtarget);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ z_lg(Z_method, member_vmtarget);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToVirtual: {
+      // Same as TemplateTable::invokevirtual, minus the CP setup and profiling.
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // Pick out the vtable index from the MemberName, and then we can discard it.
+      Register temp2_index = temp2;
+      __ z_lg(temp2_index, member_vmindex);
+
+      if (VerifyMethodHandles) {
+        // if (member_vmindex < 0) stop
+        NearLabel L_index_ok;
+        __ compare32_and_branch(temp2_index, constant(0), Assembler::bcondNotLow, L_index_ok);
+        __ stop("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point. And VerifyMethodHandles has already checked clazz, if needed.
+
+      // Get target method and entry point.
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, Z_method);
+      method_is_live = true;
+      break;
+    }
+
+    case vmIntrinsics::_linkToInterface: {
+      // Same as TemplateTable::invokeinterface, minus the CP setup
+      // and profiling, with different argument motion.
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+
+      __ load_heap_oop(temp3_intf, member_clazz);
+      load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
+
+      Register Z_index = Z_method;
+
+      __ z_lg(Z_index, member_vmindex);
+
+      if (VerifyMethodHandles) {
+        NearLabel L;
+        // if (member_vmindex < 0) stop
+        __ compare32_and_branch(Z_index, constant(0), Assembler::bcondNotLow, L);
+        __ stop("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
+
+      // Given interface, index, and recv klass, dispatch to the implementation method.
+      Label L_no_such_interface;
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // Note: next two args must be the same:
+                                 Z_index, Z_method, temp2, noreg,
+                                 L_no_such_interface);
+      jump_from_method_handle(_masm, Z_method, temp2, Z_R0, for_compiler_entry);
+
+      __ bind(L_no_such_interface);
+
+      // Throw exception.
+      __ load_const_optimized(Z_R1, StubRoutines::throw_IncompatibleClassChangeError_entry());
+      __ z_br(Z_R1);
+      break;
+    }
+
+    default:
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
+      break;
+  }
+
+  if (method_is_live) {
+    // Live at this point: Z_method, O5_savedSP (if interpreted).
+
+    // After figuring out which concrete method to call, jump into it.
+    // Note that this works in the interpreter with no data motion.
+    // But the compiled version will require that rcx_recv be shifted out.
+    jump_from_method_handle(_masm, Z_method, temp1, Z_R0, for_compiler_entry);
+  }
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+                              oopDesc* mh,
+                              intptr_t* sender_sp,
+                              intptr_t* args,
+                              intptr_t* tracing_fp) {
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // Static linkers don't have MH.
+  const char* mh_reg_name = has_mh ? "Z_R4_mh" : "Z_R4";
+  tty->print_cr("MH %s %s=" INTPTR_FORMAT " sender_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT,
+                adaptername, mh_reg_name,
+                p2i(mh), p2i(sender_sp), p2i(args));
+
+  if (Verbose) {
+    // Dumping last frame with frame::describe.
+
+    JavaThread* p = JavaThread::active();
+
+    ResourceMark rm;
+    PRESERVE_EXCEPTION_MARK; // May not be needed by safer and unexpensive here.
+    FrameValues values;
+
+    // Note: We want to allow trace_method_handle from any call site.
+    // While trace_method_handle creates a frame, it may be entered
+    // without a valid return PC in Z_R14 (e.g. not just after a call).
+    // Walking that frame could lead to failures due to that invalid PC.
+    // => carefully detect that frame when doing the stack walking.
+
+    // Walk up to the right frame using the "tracing_fp" argument.
+    frame cur_frame = os::current_frame(); // Current C frame.
+
+    while (cur_frame.fp() != tracing_fp) {
+      cur_frame = os::get_sender_for_C_frame(&cur_frame);
+    }
+
+    // Safely create a frame and call frame::describe.
+    intptr_t *dump_sp = cur_frame.sender_sp();
+    intptr_t *dump_fp = cur_frame.link();
+
+    bool walkable = has_mh; // Whether the traced frame shoud be walkable.
+
+    // The sender for cur_frame is the caller of trace_method_handle.
+    if (walkable) {
+      // The previous definition of walkable may have to be refined
+      // if new call sites cause the next frame constructor to start
+      // failing. Alternatively, frame constructors could be
+      // modified to support the current or future non walkable
+      // frames (but this is more intrusive and is not considered as
+      // part of this RFE, which will instead use a simpler output).
+      frame dump_frame = frame(dump_sp);
+      dump_frame.describe(values, 1);
+    } else {
+      // Robust dump for frames which cannot be constructed from sp/younger_sp
+      // Add descriptions without building a Java frame to avoid issues.
+      values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+      values.describe(-1, dump_sp, "sp");
+    }
+
+    bool has_args = has_mh; // Whether Z_esp is meaningful.
+
+    // Mark args, if seems valid (may not be valid for some adapters).
+    if (has_args) {
+      if ((args >= dump_sp) && (args < dump_fp)) {
+        values.describe(-1, args, "*Z_esp");
+      }
+    }
+
+    // Note: the unextended_sp may not be correct.
+    tty->print_cr("  stack layout:");
+    values.print(p);
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) {
+          java_lang_invoke_MethodHandle::form(mh)->print();
+        }
+      }
+    }
+  }
+}
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
+  if (!TraceMethodHandles) { return; }
+
+  BLOCK_COMMENT("trace_method_handle {");
+
+  // Save argument registers (they are used in raise exception stub).
+  __ z_stg(Z_ARG1, Address(Z_SP, 16));
+  __ z_stg(Z_ARG2, Address(Z_SP, 24));
+  __ z_stg(Z_ARG3, Address(Z_SP, 32));
+  __ z_stg(Z_ARG4, Address(Z_SP, 40));
+  __ z_stg(Z_ARG5, Address(Z_SP, 48));
+
+  // Setup arguments.
+  __ z_lgr(Z_ARG2, Z_ARG4); // mh, see generate_method_handle_interpreter_entry()
+  __ z_lgr(Z_ARG3, Z_R10);  // sender_sp
+  __ z_lgr(Z_ARG4, Z_esp);
+  __ load_const_optimized(Z_ARG1, (void *)adaptername);
+  __ z_lgr(Z_ARG5, Z_SP);   // tracing_fp
+  __ save_return_pc();      // saves Z_R14
+  __ push_frame_abi160(0);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));
+  __ pop_frame();
+  __ restore_return_pc();   // restores to Z_R14
+  __ z_lg(Z_ARG1, Address(Z_SP, 16));
+  __ z_lg(Z_ARG2, Address(Z_SP, 24));
+  __ z_lg(Z_ARG3, Address(Z_SP, 32));
+  __ z_lg(Z_ARG4, Address(Z_SP, 40));
+  __ z_lg(Z_ARG5, Address(Z_SP, 45));
+  __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, 50, -1);
+  __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, -1, 5);
+
+  BLOCK_COMMENT("} trace_method_handle");
+}
+#endif // !PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+  // Adapters
+  enum /* platform_dependent_constants */ {
+    adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
+  };
+
+  // Additional helper methods for MethodHandles code generation:
+ public:
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj_reg, SystemDictionary::WKID klass_id,
+                           Register temp_reg, Register temp2_reg,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
+                                   Register temp_reg, Register temp2_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 temp_reg, temp2_reg,
+                 "reference is a MH");
+  }
+
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+  // Takes care of special dispatch from single stepping too.
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method,
+                                      Register temp, Register temp2,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2, Register temp3,
+                                  bool for_compiler_entry);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by JL, LS
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+#define LUCY_DBG
+
+//-------------------------------------
+//  N a t i v e I n s t r u c t i o n
+//-------------------------------------
+
+// Define this switch to prevent identity updates.
+// In high-concurrency scenarios, it is beneficial to prevent
+// identity updates. It has a positive effect on cache line steals.
+// and invalidations.
+// Test runs of JVM98, JVM2008, and JBB2005 show a very low frequency
+// of identity updates. Detection is therefore disabled.
+#undef SUPPRESS_IDENTITY_UPDATE
+
+void NativeInstruction::verify() {
+  // Make sure code pattern is actually an instruction address.
+  // Do not allow:
+  //  - NULL
+  //  - any address in first page (0x0000 .. 0x0fff)
+  //  - odd address (will cause a "specification exception")
+  address addr = addr_at(0);
+  if ((addr == 0) || (((unsigned long)addr & ~0x0fff) == 0) || ((intptr_t)addr & 1) != 0) {
+    tty->print_cr(INTPTR_FORMAT ": bad instruction address", p2i(addr));
+    fatal("not an instruction address");
+  }
+}
+
+// Print location and value (hex representation) of current NativeInstruction
+void NativeInstruction::print(const char* msg) const {
+  int len = Assembler::instr_len(addr_at(0));
+  if (msg == NULL) { // Output line without trailing blanks.
+    switch (len) {
+      case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x",             p2i(addr_at(0)), len, halfword_at(0));                                 break;
+      case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x",       p2i(addr_at(0)), len, halfword_at(0), halfword_at(2));                 break;
+      case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4)); break;
+      default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy.
+        ShouldNotReachHere();
+        break;
+    }
+  } else { // Output line with filler blanks to have msg aligned.
+    switch (len) {
+      case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x           %s",   p2i(addr_at(0)), len, halfword_at(0), msg);                                 break;
+      case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x      %s",  p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), msg);                 break;
+      case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4), msg); break;
+      default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy.
+        ShouldNotReachHere();
+        break;
+    }
+  }
+}
+void NativeInstruction::print() const {
+  print(NULL);
+}
+
+// Hex-Dump of storage around current NativeInstruction. Also try disassembly.
+void NativeInstruction::dump(const unsigned int range, const char* msg) const {
+  Assembler::dump_code_range(tty, addr_at(0), range, (msg == NULL) ? "":msg);
+}
+
+void NativeInstruction::dump(const unsigned int range) const {
+  dump(range, NULL);
+}
+
+void NativeInstruction::dump() const {
+  dump(32, NULL);
+}
+
+void NativeInstruction::set_halfword_at(int offset, short i) {
+  address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+  *(short*)addr = i;
+#else
+  if (*(short*)addr != i) {
+    *(short*)addr = i;
+  }
+#endif
+  ICache::invalidate_word(addr);
+}
+
+void NativeInstruction::set_word_at(int offset, int i) {
+  address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+  *(int*)addr = i;
+#else
+  if (*(int*)addr != i) {
+    *(int*)addr = i;
+  }
+#endif
+  ICache::invalidate_word(addr);
+}
+
+void NativeInstruction::set_jlong_at(int offset, jlong i) {
+  address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+  *(jlong*)addr = i;
+#else
+  if (*(jlong*)addr != i) {
+    *(jlong*)addr = i;
+  }
+#endif
+  // Don't need to invalidate 2 words here, because
+  // the flush instruction operates on doublewords.
+  ICache::invalidate_word(addr);
+}
+
+#undef  SUPPRESS_IDENTITY_UPDATE
+
+//------------------------------------------------------------
+
+int NativeInstruction::illegal_instruction() {
+  return 0;
+}
+
+bool NativeInstruction::is_illegal() {
+  // An instruction with main opcode 0x00 (leftmost byte) is not a valid instruction
+  // (and will never be) and causes a SIGILL where the pc points to the next instruction.
+  // The caller of this method wants to know if such a situation exists at the current pc.
+  //
+  // The result of this method is unsharp with respect to the following facts:
+  // - Stepping backwards in the instruction stream is not possible on z/Architecture.
+  // - z/Architecture instructions are 2, 4, or 6 bytes in length.
+  // - The instruction length is coded in the leftmost two bits of the main opcode.
+  // - The result is exact if the caller knows by some other means that the
+  //   instruction is of length 2.
+  //
+  // If this method returns false, then the 2-byte instruction at *-2 is not a 0x00 opcode.
+  // If this method returns true, then the 2-byte instruction at *-2 is a 0x00 opcode.
+  return halfword_at(-2) == illegal_instruction();
+}
+
+// We use an illtrap for marking a method as not_entrant or zombie.
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  if (!is_illegal()) return false; // Just a quick path.
+
+  // One-sided error of is_illegal tolerable here
+  // (see implementation of is_illegal() for details).
+
+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr_at(0));
+  if (cb == NULL || !cb->is_nmethod()) {
+    return false;
+  }
+
+  nmethod *nm = (nmethod *)cb;
+  // This method is not_entrant or zombie if the illtrap instruction
+  // is located at the verified entry point.
+  // BE AWARE: the current pc (this) points to the instruction after the
+  // "illtrap" location.
+  address sig_addr = ((address) this) - 2;
+  return nm->verified_entry_point() == sig_addr;
+}
+
+bool NativeInstruction::is_jump() {
+  unsigned long inst;
+  Assembler::get_instruction((address)this, &inst);
+  return MacroAssembler::is_branch_pcrelative_long(inst);
+}
+
+//---------------------------------------------------
+//  N a t i v e I l l e g a l I n s t r u c t i o n
+//---------------------------------------------------
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  NativeIllegalInstruction* nii = (NativeIllegalInstruction*) nativeInstruction_at(code_pos);
+  nii->set_halfword_at(0, illegal_instruction());
+}
+
+//-----------------------
+//  N a t i v e C a l l
+//-----------------------
+
+void NativeCall::verify() {
+  if (NativeCall::is_call_at(addr_at(0))) return;
+
+  fatal("this is not a `NativeCall' site");
+}
+
+address NativeCall::destination() const {
+  if (MacroAssembler::is_call_far_pcrelative(instruction_address())) {
+    address here = addr_at(MacroAssembler::nop_size());
+    return MacroAssembler::get_target_addr_pcrel(here);
+  }
+
+  return (address)((NativeMovConstReg *)this)->data();
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times. Thus, the displacement field must be
+// 4-byte-aligned. We enforce this on z/Architecture by inserting a nop
+// instruction in front of 'brasl' when needed.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+void NativeCall::set_destination_mt_safe(address dest) {
+  if (MacroAssembler::is_call_far_pcrelative(instruction_address())) {
+    address iaddr = addr_at(MacroAssembler::nop_size());
+    // Ensure that patching is atomic hence mt safe.
+    assert(((long)addr_at(MacroAssembler::call_far_pcrelative_size()) & (call_far_pcrelative_displacement_alignment-1)) == 0,
+           "constant must be 4-byte aligned");
+    set_word_at(MacroAssembler::call_far_pcrelative_size() - 4, Assembler::z_pcrel_off(dest, iaddr));
+  } else {
+    assert(MacroAssembler::is_load_const_from_toc(instruction_address()), "unsupported instruction");
+    nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest));
+  }
+}
+
+//-----------------------------
+//  N a t i v e F a r C a l l
+//-----------------------------
+
+void NativeFarCall::verify() {
+  NativeInstruction::verify();
+  if (NativeFarCall::is_far_call_at(addr_at(0))) return;
+  fatal("not a NativeFarCall");
+}
+
+address NativeFarCall::destination() {
+  assert(MacroAssembler::is_call_far_patchable_at((address)this), "unexpected call type");
+  address ctable = NULL;
+  if (MacroAssembler::call_far_patchable_requires_alignment_nop((address)this)) {
+    return MacroAssembler::get_dest_of_call_far_patchable_at(((address)this)+MacroAssembler::nop_size(), ctable);
+  } else {
+    return MacroAssembler::get_dest_of_call_far_patchable_at((address)this, ctable);
+  }
+}
+
+
+// Handles both patterns of patchable far calls.
+void NativeFarCall::set_destination(address dest, int toc_offset) {
+  address inst_addr = (address)this;
+
+  // Set new destination (implementation of call may change here).
+  assert(MacroAssembler::is_call_far_patchable_at(inst_addr), "unexpected call type");
+
+  if (!MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) {
+    address ctable = CodeCache::find_blob(inst_addr)->ctable_begin();
+    // Need distance of TOC entry from current instruction.
+    toc_offset = (ctable + toc_offset) - inst_addr;
+    // Call is via constant table entry.
+    MacroAssembler::set_dest_of_call_far_patchable_at(inst_addr, dest, toc_offset);
+  } else {
+    // Here, we have a pc-relative call (brasl).
+    // Be aware: dest may have moved in this case, so really patch the displacement,
+    // when necessary!
+    // This while loop will also consume the nop which always preceeds a call_far_pcrelative.
+    // We need to revert this after the loop. Pc-relative calls are always assumed to have a leading nop.
+    unsigned int nop_sz    = MacroAssembler::nop_size();
+    unsigned int nop_bytes = 0;
+    while(MacroAssembler::is_z_nop(inst_addr+nop_bytes)) {
+      nop_bytes += nop_sz;
+    }
+    if (nop_bytes > 0) {
+      inst_addr += nop_bytes - nop_sz;
+    }
+
+    assert(MacroAssembler::is_call_far_pcrelative(inst_addr), "not a pc-relative call");
+    address target = MacroAssembler::get_target_addr_pcrel(inst_addr + nop_sz);
+    if (target != dest) {
+      NativeCall *call = nativeCall_at(inst_addr);
+      call->set_destination_mt_safe(dest);
+    }
+  }
+}
+
+//-------------------------------------
+//  N a t i v e M o v C o n s t R e g
+//-------------------------------------
+
+// Do not use an assertion here. Let clients decide whether they only
+// want this when assertions are enabled.
+void NativeMovConstReg::verify() {
+  address   loc = addr_at(0);
+
+  // This while loop will also consume the nop which always preceeds a
+  // call_far_pcrelative.  We need to revert this after the
+  // loop. Pc-relative calls are always assumed to have a leading nop.
+  unsigned int nop_sz    = MacroAssembler::nop_size();
+  unsigned int nop_bytes = 0;
+  while(MacroAssembler::is_z_nop(loc+nop_bytes)) {
+    nop_bytes += nop_sz;
+  }
+
+  if (nop_bytes > 0) {
+    if (MacroAssembler::is_call_far_pcrelative(loc+nop_bytes-nop_sz)) return;
+    loc += nop_bytes;
+  }
+
+  if (!MacroAssembler::is_load_const_from_toc(loc)            &&    // Load const from TOC.
+      !MacroAssembler::is_load_const(loc)                     &&    // Load const inline.
+      !MacroAssembler::is_load_narrow_oop(loc)                &&    // Load narrow oop.
+      !MacroAssembler::is_load_narrow_klass(loc)              &&    // Load narrow Klass ptr.
+      !MacroAssembler::is_compare_immediate_narrow_oop(loc)   &&    // Compare immediate narrow.
+      !MacroAssembler::is_compare_immediate_narrow_klass(loc) &&    // Compare immediate narrow.
+      !MacroAssembler::is_pcrelative_instruction(loc)) {            // Just to make it run.
+    tty->cr();
+    tty->print_cr("NativeMovConstReg::verify(): verifying addr %p(0x%x), %d leading nops", loc, *(uint*)loc, nop_bytes/nop_sz);
+    tty->cr();
+    ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::verify()");
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#endif
+    fatal("this is not a `NativeMovConstReg' site");
+  }
+}
+
+address NativeMovConstReg::next_instruction_address(int offset) const  {
+  address inst_addr = addr_at(offset);
+
+  // Load address (which is a constant) pc-relative.
+  if (MacroAssembler::is_load_addr_pcrel(inst_addr))                  { return addr_at(offset+MacroAssembler::load_addr_pcrel_size()); }
+
+  // Load constant from TOC.
+  if (MacroAssembler::is_load_const_from_toc(inst_addr))              { return addr_at(offset+MacroAssembler::load_const_from_toc_size()); }
+
+  // Load constant inline.
+  if (MacroAssembler::is_load_const(inst_addr))                       { return addr_at(offset+MacroAssembler::load_const_size()); }
+
+  // Load constant narrow inline.
+  if (MacroAssembler::is_load_narrow_oop(inst_addr))                  { return addr_at(offset+MacroAssembler::load_narrow_oop_size()); }
+  if (MacroAssembler::is_load_narrow_klass(inst_addr))                { return addr_at(offset+MacroAssembler::load_narrow_klass_size()); }
+
+  // Compare constant narrow inline.
+  if (MacroAssembler::is_compare_immediate_narrow_oop(inst_addr))     { return addr_at(offset+MacroAssembler::compare_immediate_narrow_oop_size()); }
+  if (MacroAssembler::is_compare_immediate_narrow_klass(inst_addr))   { return addr_at(offset+MacroAssembler::compare_immediate_narrow_klass_size()); }
+
+  if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) { return addr_at(offset+MacroAssembler::call_far_patchable_size()); }
+
+  if (MacroAssembler::is_pcrelative_instruction(inst_addr))           { return addr_at(offset+Assembler::instr_len(inst_addr)); }
+
+  ((NativeMovConstReg*)inst_addr)->dump(64, "NativeMovConstReg site is not recognized as such");
+#ifdef LUCY_DBG
+  VM_Version::z_SIGSEGV();
+#else
+  guarantee(false, "Not a NativeMovConstReg site");
+#endif
+  return NULL;
+}
+
+intptr_t NativeMovConstReg::data() const {
+  address loc = addr_at(0);
+  if (MacroAssembler::is_load_const(loc)) {
+    return MacroAssembler::get_const(loc);
+  } else if (MacroAssembler::is_load_narrow_oop(loc)              ||
+             MacroAssembler::is_compare_immediate_narrow_oop(loc) ||
+             MacroAssembler::is_load_narrow_klass(loc)            ||
+             MacroAssembler::is_compare_immediate_narrow_klass(loc)) {
+    ((NativeMovConstReg*)loc)->dump(32, "NativeMovConstReg::data(): cannot extract data from narrow ptr (oop or klass)");
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#else
+    ShouldNotReachHere();
+#endif
+    return *(intptr_t *)NULL;
+  } else {
+    // Otherwise, assume data resides in TOC. Is asserted in called method.
+    return MacroAssembler::get_const_from_toc(loc);
+  }
+}
+
+
+// Patch in a new constant.
+//
+// There are situations where we have multiple (hopefully two at most)
+// relocations connected to one instruction. Loading an oop from CP
+// using pcrelative addressing would one such example. Here we have an
+// oop relocation, modifying the oop itself, and an internal word relocation,
+// modifying the relative address.
+//
+// NativeMovConstReg::set_data is then called once for each relocation. To be
+// able to distinguish between the relocations, we use a rather dirty hack:
+//
+// All calls that deal with an internal word relocation to fix their relative
+// address are on a faked, odd instruction address. The instruction can be
+// found on the next lower, even address.
+//
+// All other calls are "normal", i.e. on even addresses.
+address NativeMovConstReg::set_data_plain(intptr_t src, CodeBlob *cb) {
+  unsigned long x = (unsigned long)src;
+  address loc = instruction_address();
+  address next_address;
+
+  if (MacroAssembler::is_load_addr_pcrel(loc)) {
+    MacroAssembler::patch_target_addr_pcrel(loc, (address)src);
+    ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size());
+    next_address = next_instruction_address();
+  } else if (MacroAssembler::is_load_const_from_toc(loc)) {  // Load constant from TOC.
+    MacroAssembler::set_const_in_toc(loc, src, cb);
+    next_address = next_instruction_address();
+  } else if (MacroAssembler::is_load_const(loc)) {
+    // Not mt safe, ok in methods like CodeBuffer::copy_code().
+    MacroAssembler::patch_const(loc, x);
+    ICache::invalidate_range(loc, MacroAssembler::load_const_size());
+    next_address = next_instruction_address();
+  }
+  // cOops
+  else if (MacroAssembler::is_load_narrow_oop(loc)) {
+    MacroAssembler::patch_load_narrow_oop(loc, (oop) (void*) x);
+    ICache::invalidate_range(loc, MacroAssembler::load_narrow_oop_size());
+    next_address = next_instruction_address();
+  }
+  // compressed klass ptrs
+  else if (MacroAssembler::is_load_narrow_klass(loc)) {
+    MacroAssembler::patch_load_narrow_klass(loc, (Klass*)x);
+    ICache::invalidate_range(loc, MacroAssembler::load_narrow_klass_size());
+    next_address = next_instruction_address();
+  }
+  // cOops
+  else if (MacroAssembler::is_compare_immediate_narrow_oop(loc)) {
+    MacroAssembler::patch_compare_immediate_narrow_oop(loc, (oop) (void*) x);
+    ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_oop_size());
+    next_address = next_instruction_address();
+  }
+  // compressed klass ptrs
+  else if (MacroAssembler::is_compare_immediate_narrow_klass(loc)) {
+    MacroAssembler::patch_compare_immediate_narrow_klass(loc, (Klass*)x);
+    ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_klass_size());
+    next_address = next_instruction_address();
+  }
+  else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) {
+    assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?");
+    // This NativeMovConstReg site does not need to be patched. It was
+    // patched when it was converted to a call_pcrelative site
+    // before. The value of the src argument is not related to the
+    // branch target.
+    next_address = next_instruction_address();
+  }
+
+  else {
+    tty->print_cr("WARNING: detected an unrecognized code pattern at loc = %p -> 0x%8.8x %8.8x",
+                  loc, *((unsigned int*)loc), *((unsigned int*)(loc+4)));
+    next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#endif
+  }
+
+  return next_address;
+}
+
+// Divided up in set_data_plain() which patches the instruction in the
+// code stream and set_data() which additionally patches the oop pool
+// if necessary.
+void NativeMovConstReg::set_data(intptr_t src) {
+  // Also store the value into an oop_Relocation cell, if any.
+  CodeBlob *cb = CodeCache::find_blob(instruction_address());
+  address next_address = set_data_plain(src, cb);
+
+  relocInfo::update_oop_pool(instruction_address(), next_address, (address)src, cb);
+}
+
+void NativeMovConstReg::set_narrow_oop(intptr_t data) {
+  const address start = addr_at(0);
+  int           range = 0;
+  if (MacroAssembler::is_load_narrow_oop(start)) {
+    range = MacroAssembler::patch_load_narrow_oop(start, cast_to_oop <intptr_t> (data));
+  } else if (MacroAssembler::is_compare_immediate_narrow_oop(start)) {
+    range = MacroAssembler::patch_compare_immediate_narrow_oop(start, cast_to_oop <intptr_t>(data));
+  } else {
+    fatal("this is not a `NativeMovConstReg::narrow_oop' site");
+  }
+  ICache::invalidate_range(start, range);
+}
+
+// Compressed klass ptrs. patch narrow klass constant.
+void NativeMovConstReg::set_narrow_klass(intptr_t data) {
+  const address start = addr_at(0);
+  int           range = 0;
+  if (MacroAssembler::is_load_narrow_klass(start)) {
+    range = MacroAssembler::patch_load_narrow_klass(start, (Klass*)data);
+  } else if (MacroAssembler::is_compare_immediate_narrow_klass(start)) {
+    range = MacroAssembler::patch_compare_immediate_narrow_klass(start, (Klass*)data);
+  } else {
+    fatal("this is not a `NativeMovConstReg::narrow_klass' site");
+  }
+  ICache::invalidate_range(start, range);
+}
+
+void NativeMovConstReg::set_pcrel_addr(intptr_t newTarget, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) {
+  address next_address;
+  address loc = addr_at(0);
+
+  if (MacroAssembler::is_load_addr_pcrel(loc)) {
+    address oldTarget = MacroAssembler::get_target_addr_pcrel(loc);
+    MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget);
+
+    ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size());
+    next_address = loc + MacroAssembler::load_addr_pcrel_size();
+  } else if (MacroAssembler::is_load_const_from_toc_pcrelative(loc) ) {  // Load constant from TOC.
+    address oldTarget = MacroAssembler::get_target_addr_pcrel(loc);
+    MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget);
+
+    ICache::invalidate_range(loc, MacroAssembler::load_const_from_toc_size());
+    next_address = loc + MacroAssembler::load_const_from_toc_size();
+  } else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) {
+    assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?");
+    next_address = next_instruction_address();
+  } else {
+    assert(false, "Not a NativeMovConstReg site for set_pcrel_addr");
+    next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+  }
+
+  if (copy_back_to_oop_pool) {
+    if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newTarget, NULL)) {
+      ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_addr(): found oop reloc for pcrel_addr");
+#ifdef LUCY_DBG
+      VM_Version::z_SIGSEGV();
+#else
+      assert(false, "Ooooops: found oop reloc for pcrel_addr");
+#endif
+    }
+  }
+}
+
+void NativeMovConstReg::set_pcrel_data(intptr_t newData, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) {
+  address  next_address;
+  address  loc = addr_at(0);
+
+  if (MacroAssembler::is_load_const_from_toc(loc) ) {  // Load constant from TOC.
+    // Offset is +/- 2**32 -> use long.
+    long     offset  = MacroAssembler::get_load_const_from_toc_offset(loc);
+    address  target  = MacroAssembler::get_target_addr_pcrel(loc);
+    intptr_t oldData = *(intptr_t*)target;
+    if (oldData != newData) { // Update only if data changes. Prevents cache invalidation.
+      *(intptr_t *)(target) = newData;
+    }
+
+    // ICache::invalidate_range(target, sizeof(unsigned long));  // No ICache invalidate for CP data.
+    next_address = loc + MacroAssembler::load_const_from_toc_size();
+  } else if (MacroAssembler::is_call_far_pcrelative(loc)) {
+    ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::set_pcrel_data() has a problem: setting data for a pc-relative call?");
+#ifdef LUCY_DBG
+    VM_Version::z_SIGSEGV();
+#else
+    assert(false, "Ooooops: setting data for a pc-relative call");
+#endif
+    next_address = next_instruction_address();
+  } else {
+    assert(false, "Not a NativeMovConstReg site for set_pcrel_data");
+    next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+  }
+
+  if (copy_back_to_oop_pool) {
+    if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newData, NULL)) {
+      ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_data(): found oop reloc for pcrel_data");
+#ifdef LUCY_DBG
+      VM_Version::z_SIGSEGV();
+#else
+      assert(false, "Ooooops: found oop reloc for pcrel_data");
+#endif
+    }
+  }
+}
+
+#ifdef COMPILER1
+//--------------------------------
+//  N a t i v e M o v R e g M e m
+//--------------------------------
+
+void NativeMovRegMem::verify() {
+  address l1 = addr_at(0);
+  address l2 = addr_at(MacroAssembler::load_const_size());
+
+  if (!MacroAssembler::is_load_const(l1)) {
+    tty->cr();
+    tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT, p2i(l1));
+    tty->cr();
+    ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()");
+    fatal("this is not a `NativeMovRegMem' site");
+  }
+
+  unsigned long inst1;
+  Assembler::get_instruction(l2, &inst1);
+
+  if (!Assembler::is_z_lb(inst1)                         &&
+      !Assembler::is_z_llgh(inst1)                       &&
+      !Assembler::is_z_lh(inst1)                         &&
+      !Assembler::is_z_l(inst1)                          &&
+      !Assembler::is_z_llgf(inst1)                       &&
+      !Assembler::is_z_lg(inst1)                         &&
+      !Assembler::is_z_le(inst1)                         &&
+      !Assembler::is_z_ld(inst1)                         &&
+      !Assembler::is_z_stc(inst1)                        &&
+      !Assembler::is_z_sth(inst1)                        &&
+      !Assembler::is_z_st(inst1)                         &&
+      !(Assembler::is_z_lgr(inst1) && UseCompressedOops) &&
+      !Assembler::is_z_stg(inst1)                        &&
+      !Assembler::is_z_ste(inst1)                        &&
+      !Assembler::is_z_std(inst1)) {
+    tty->cr();
+    tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT
+                  ": wrong or missing load or store at " PTR_FORMAT, p2i(l1), p2i(l2));
+    tty->cr();
+    ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()");
+    fatal("this is not a `NativeMovRegMem' site");
+  }
+}
+#endif // COMPILER1
+
+//-----------------------
+//  N a t i v e J u m p
+//-----------------------
+
+void NativeJump::verify() {
+  if (NativeJump::is_jump_at(addr_at(0))) return;
+  fatal("this is not a `NativeJump' site");
+}
+
+// Patch atomically with an illtrap.
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+  ResourceMark rm;
+  int code_size = 2;
+  CodeBuffer cb(verified_entry, code_size + 1);
+  MacroAssembler* a = new MacroAssembler(&cb);
+#ifdef COMPILER2
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+#endif
+  a->z_illtrap();
+  ICache::invalidate_range(verified_entry, code_size);
+}
+
+#undef LUCY_DBG
+
+//-------------------------------------
+//  N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+#ifndef PRODUCT
+void NativeGeneralJump::verify() {
+  unsigned long inst;
+  Assembler::get_instruction((address)this, &inst);
+  assert(MacroAssembler::is_branch_pcrelative_long(inst), "not a general jump instruction");
+}
+#endif
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  uint64_t instr = BRCL_ZOPC |
+                   Assembler::uimm4(Assembler::bcondAlways, 8, 48) |
+                   Assembler::simm32(RelAddr::pcrel_off32(entry, code_pos), 16, 48);
+  *(uint64_t*) code_pos = (instr << 16); // Must shift into big end, then the brcl will be written to code_pos.
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  assert(((intptr_t)instr_addr & (BytesPerWord-1)) == 0, "requirement for mt safe patching");
+  // Bytes_after_jump cannot change, because we own the Patching_lock.
+  assert(Patching_lock->owned_by_self(), "must hold lock to patch instruction");
+  intptr_t bytes_after_jump = (*(intptr_t*)instr_addr)  & 0x000000000000ffffL; // 2 bytes after jump.
+  intptr_t load_const_bytes = (*(intptr_t*)code_buffer) & 0xffffffffffff0000L;
+  *(intptr_t*)instr_addr = load_const_bytes | bytes_after_jump;
+  ICache::invalidate_range(instr_addr, 6);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,673 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by AHa, JL, LS
+
+#ifndef CPU_S390_VM_NATIVEINST_S390_HPP
+#define CPU_S390_VM_NATIVEINST_S390_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+class NativeCall;
+class NativeFarCall;
+class NativeMovConstReg;
+class NativeJump;
+#ifndef COMPILER2
+class NativeGeneralJump;
+class NativeMovRegMem;
+#endif
+class NativeInstruction;
+
+NativeCall* nativeCall_before(address return_address);
+NativeCall* nativeCall_at(address instr);
+NativeFarCall* nativeFarCall_before(address return_address);
+NativeFarCall* nativeFarCall_at(address instr);
+NativeMovConstReg* nativeMovConstReg_at(address address);
+NativeMovConstReg* nativeMovConstReg_before(address address);
+NativeJump* nativeJump_at(address address);
+#ifndef COMPILER2
+NativeMovRegMem* nativeMovRegMem_at (address address);
+NativeGeneralJump* nativeGeneralJump_at(address address);
+#endif
+NativeInstruction* nativeInstruction_at(address address);
+
+// We have interface for the following instructions:
+// - NativeInstruction
+//   - NativeCall
+//   - NativeFarCall
+//   - NativeMovConstReg
+//   - NativeMovRegMem
+//   - NativeJump
+//   - NativeGeneralJump
+//   - NativeIllegalInstruction
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+//-------------------------------------
+//  N a t i v e I n s t r u c t i o n
+//-------------------------------------
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+  friend class Relocation;
+
+ public:
+
+  enum z_specific_constants {
+    nop_instruction_size = 2
+  };
+
+  bool is_illegal();
+
+  // Bcrl is currently the only accepted instruction here.
+  bool is_jump();
+
+  // We use an illtrap for marking a method as not_entrant or zombie.
+  bool is_sigill_zombie_not_entrant();
+
+  bool is_safepoint_poll() {
+    // Is the current instruction a POTENTIAL read access to the polling page?
+    // The instruction's current arguments are not checked!
+    return MacroAssembler::is_load_from_polling_page(addr_at(0));
+  }
+
+  address get_poll_address(void *ucontext) {
+    // Extract poll address from instruction and ucontext.
+    return MacroAssembler::get_poll_address(addr_at(0), ucontext);
+  }
+
+  uint get_poll_register() {
+    // Extract poll register from instruction.
+    return MacroAssembler::get_poll_register(addr_at(0));
+  }
+
+  bool is_memory_serialization(JavaThread *thread, void *ucontext) {
+    // Is the current instruction a write access of thread to the
+    // memory serialization page?
+    return MacroAssembler::is_memory_serialization(long_at(0), thread, ucontext);
+  }
+
+ public:
+
+  // The output of __ breakpoint_trap().
+  static int illegal_instruction();
+
+  // The address of the currently processed instruction.
+  address instruction_address() const { return addr_at(0); }
+
+ protected:
+  address addr_at(int offset) const { return address(this) + offset; }
+
+  // z/Architecture terminology
+  //   halfword   = 2 bytes
+  //   word       = 4 bytes
+  //   doubleword = 8 bytes
+  unsigned short halfword_at(int offset) const { return *(unsigned short*)addr_at(offset); }
+  int  word_at(int offset)               const { return *(jint*)addr_at(offset); }
+  long long_at(int offset)               const { return *(jlong*)addr_at(offset); }
+  void set_halfword_at(int offset, short i); // Deals with I-cache.
+  void set_word_at(int offset, int i);       // Deals with I-cache.
+  void set_jlong_at(int offset, jlong i);    // Deals with I-cache.
+  void set_addr_at(int offset, address x);   // Deals with I-cache.
+
+  void print() const;
+  void print(const char* msg) const;
+  void dump() const;
+  void dump(const unsigned int range) const;
+  void dump(const unsigned int range, const char* msg) const;
+
+ public:
+
+  void verify();
+
+  // unit test stuff
+  static void test() {}                        // Override for testing.
+
+  friend NativeInstruction* nativeInstruction_at(address address) {
+    NativeInstruction* inst = (NativeInstruction*)address;
+    #ifdef ASSERT
+      inst->verify();
+    #endif
+    return inst;
+  }
+};
+
+//---------------------------------------------------
+//  N a t i v e I l l e g a l I n s t r u c t i o n
+//---------------------------------------------------
+
+class NativeIllegalInstruction: public NativeInstruction {
+ public:
+  enum z_specific_constants {
+    instruction_size = 2
+  };
+
+  // Insert illegal opcode at specific address.
+  static void insert(address code_pos);
+};
+
+//-----------------------
+//  N a t i v e C a l l
+//-----------------------
+
+// The NativeCall is an abstraction for accessing/manipulating call
+// instructions. It is used to manipulate inline caches, primitive &
+// dll calls, etc.
+
+// A native call, as defined by this abstraction layer, consists of
+// all instructions required to set up for and actually make the call.
+//
+// On z/Architecture, there exist three different forms of native calls:
+// 1) Call with pc-relative address, 1 instruction
+//    The location of the target function is encoded as relative address
+//    in the call instruction. The short form (BRAS) allows for a
+//    16-bit signed relative address (in 2-byte units). The long form
+//    (BRASL) allows for a 32-bit signed relative address (in 2-byte units).
+// 2) Call with immediate address, 3 or 5 instructions.
+//    The location of the target function is given by an immediate
+//    constant which is loaded into a (scratch) register. Depending on
+//    the hardware capabilities, this takes 2 or 4 instructions.
+//    The call itself is then a "call by register"(BASR) instruction.
+// 3) Call with address from constant pool, 2(3) instructions (with dynamic TOC)
+//    The location of the target function is stored in the constant pool
+//    during compilation. From there it is loaded into a (scratch) register.
+//    The call itself is then a "call by register"(BASR) instruction.
+//
+// When initially generating a call, the compiler uses form 2) (not
+// patchable, target address constant, e.g. runtime calls) or 3) (patchable,
+// target address might eventually get relocated). Later in the process,
+// a call could be transformed into form 1) (also patchable) during ShortenBranches.
+//
+// If a call is/has to be patchable, the instruction sequence generated for it
+// has to be constant in length. Excessive space, created e.g. by ShortenBranches,
+// is allocated to lower addresses and filled with nops. That is necessary to
+// keep the return address constant, no matter what form the call has.
+// Methods dealing with such calls have "patchable" as part of their name.
+
+class NativeCall: public NativeInstruction {
+ public:
+
+  static int get_IC_pos_in_java_to_interp_stub() {
+    return 0;
+  }
+
+  enum z_specific_constants {
+    instruction_size                           = 18, // Used in shared code for calls with reloc_info:
+                                                     // value correct if !has_long_displacement_fast().
+    call_far_pcrelative_displacement_offset    =  4, // Includes 2 bytes for the nop.
+    call_far_pcrelative_displacement_alignment =  4
+  };
+
+
+  // Maximum size (in bytes) of a call to an absolute address.
+  // Used when emitting call to deopt handler blob, which is a
+  // "load_const_call". The code pattern is:
+  //   tmpReg := load_const(address);   (* depends on CPU ArchLvl, but is otherwise constant *)
+  //   call(tmpReg);                    (* basr, 2 bytes *)
+  static unsigned int max_instruction_size() {
+    return MacroAssembler::load_const_size() + MacroAssembler::call_byregister_size();
+  }
+
+  // address instruction_address() const { return addr_at(0); }
+
+  // For the ordering of the checks see note at nativeCall_before.
+  address next_instruction_address() const  {
+    address iaddr = instruction_address();
+
+    if (MacroAssembler::is_load_const_call(iaddr)) {
+      // Form 2): load_const, BASR
+      return addr_at(MacroAssembler::load_const_call_size());
+    }
+
+    if (MacroAssembler::is_load_const_from_toc_call(iaddr)) {
+      // Form 3): load_const_from_toc (LARL+LG/LGRL), BASR.
+      return addr_at(MacroAssembler::load_const_from_toc_call_size());
+    }
+
+    if (MacroAssembler::is_call_far_pcrelative(iaddr)) {
+      // Form 1): NOP, BRASL
+      // The BRASL (Branch Relative And Save Long) is patched into the space created
+      // by the load_const_from_toc_call sequence (typically (LARL-LG)/LGRL - BASR.
+      // The BRASL must be positioned such that it's end is FW (4-byte) aligned (for atomic patching).
+      // It is achieved by aligning the end of the entire sequence on a 4byte boundary, by inserting
+      // a nop, if required, at the very beginning of the instruction sequence. The nop needs to
+      // be accounted for when calculating the next instruction address. The alignment takes place
+      // already when generating the original instruction sequence. The alignment requirement
+      // makes the size depend on location.
+      // The return address of the call must always be at the end of the instruction sequence.
+      // Inserting the extra alignment nop (or anything else) at the end is not an option.
+      // The patched-in brasl instruction is prepended with a nop to make it easier to
+      // distinguish from a load_const_from_toc_call sequence.
+      return addr_at(MacroAssembler::call_far_pcrelative_size());
+    }
+
+    ((NativeCall*)iaddr)->print();
+    guarantee(false, "Not a NativeCall site");
+    return NULL;
+  }
+
+  address return_address() const {
+    return next_instruction_address();
+  }
+
+  address destination() const;
+
+  void set_destination_mt_safe(address dest);
+
+  void verify_alignment() {} // Yet another real do nothing guy :)
+  void verify();
+
+  // unit test stuff
+  static void test();
+
+  // Creation.
+  friend NativeCall* nativeCall_at(address instr) {
+    NativeCall* call;
+
+    // Make sure not to return garbage.
+    if (NativeCall::is_call_at(instr)) {
+      call = (NativeCall*)instr;
+    } else {
+      call = (NativeCall*)instr;
+      call->print();
+      guarantee(false, "Not a NativeCall site");
+    }
+
+#ifdef ASSERT
+    call->verify();
+#endif
+    return call;
+  }
+
+  // This is a very tricky function to implement. It involves stepping
+  // backwards in the instruction stream. On architectures with variable
+  // instruction length, this is a risky endeavor. From the return address,
+  // you do not know how far to step back to be at a location (your starting
+  // point) that will eventually bring you back to the return address.
+  // Furthermore, it may happen that there are multiple starting points.
+  //
+  // With only a few possible (allowed) code patterns, the risk is lower but
+  // does not diminish completely. Experience shows that there are code patterns
+  // which look like a load_const_from_toc_call @(return address-8), but in
+  // fact are a call_far_pcrelative @(return address-6). The other way around
+  // is possible as well, but was not knowingly observed so far.
+  //
+  // The unpredictability is caused by the pc-relative address field in both
+  // the call_far_pcrelative (BASR) and the load_const_from_toc (LGRL)
+  // instructions. This field can contain an arbitrary bit pattern.
+  //
+  // Here is a real-world example:
+  // Mnemonics: <not a valid sequence>   LGRL r10,<addr> BASR r14,r10
+  // Hex code:  eb01 9008 007a c498 ffff c4a8 c0e5 ffc1 0dea
+  // Mnemonics: AGSI <mem>,I8  LGRL r9,<addr> BRASL r14,<addr>  correct
+  //
+  // If you first check for a load_const_from_toc_call @(-8), you will find
+  // a false positive. In this example, it is obviously false, because the
+  // preceding bytes do not form a valid instruction pattern. If you first
+  // check for call_far_pcrelative @(-6), you get a true positive - in this
+  // case.
+  //
+  // The following remedy has been implemented/enforced:
+  // 1) Everywhere, the permissible code patterns are checked in the same
+  //    sequence: Form 2) - Form 3) - Form 1).
+  // 2) The call_far_pcrelative, which would ideally be just one BRASL
+  //    instruction, is always prepended with a NOP. This measure avoids
+  //    ambiguities with load_const_from_toc_call.
+  friend NativeCall* nativeCall_before(address return_address) {
+    NativeCall *call = NULL;
+
+    // Make sure not to return garbage
+    address instp = return_address - MacroAssembler::load_const_call_size();
+    if (MacroAssembler::is_load_const_call(instp)) {                 // Form 2)
+      call = (NativeCall*)(instp);                                   // load_const + basr
+    } else {
+      instp = return_address - MacroAssembler::load_const_from_toc_call_size();
+      if (MacroAssembler::is_load_const_from_toc_call(instp)) {      // Form 3)
+        call = (NativeCall*)(instp);                                 // load_const_from_toc + basr
+      } else {
+        instp = return_address - MacroAssembler::call_far_pcrelative_size();
+        if (MacroAssembler::is_call_far_pcrelative(instp)) {         // Form 1)
+          call = (NativeCall*)(instp);                               // brasl (or nop + brasl)
+        } else {
+          call = (NativeCall*)(instp);
+          call->print();
+          guarantee(false, "Not a NativeCall site");
+        }
+      }
+    }
+
+#ifdef ASSERT
+    call->verify();
+#endif
+    return call;
+  }
+
+  // Ordering of checks 2) 3) 1) is relevant!
+  static bool is_call_at(address a) {
+    // Check plain instruction sequence. Do not care about filler or alignment nops.
+    bool b = MacroAssembler::is_load_const_call(a) ||           // load_const + basr
+             MacroAssembler::is_load_const_from_toc_call(a) ||  // load_const_from_toc + basr
+             MacroAssembler::is_call_far_pcrelative(a);         // nop + brasl
+    return b;
+  }
+
+  // Ordering of checks 2) 3) 1) is relevant!
+  static bool is_call_before(address a) {
+    // check plain instruction sequence. Do not care about filler or alignment nops.
+    bool b = MacroAssembler::is_load_const_call(         a - MacroAssembler::load_const_call_size()) ||           // load_const + basr
+             MacroAssembler::is_load_const_from_toc_call(a - MacroAssembler::load_const_from_toc_call_size()) ||  // load_const_from_toc + basr
+             MacroAssembler::is_call_far_pcrelative(     a - MacroAssembler::call_far_pcrelative_size());         // nop+brasl
+    return b;
+  }
+
+  static bool is_call_to(address instr, address target) {
+    // Check whether there is a `NativeCall' at the address `instr'
+    // calling to the address `target'.
+    return is_call_at(instr) && target == ((NativeCall *)instr)->destination();
+  }
+
+  bool is_pcrelative() {
+    return MacroAssembler::is_call_far_pcrelative((address)this);
+  }
+};
+
+//-----------------------------
+//  N a t i v e F a r C a l l
+//-----------------------------
+
+// The NativeFarCall is an abstraction for accessing/manipulating native
+// call-anywhere instructions.
+// Used to call native methods which may be loaded anywhere in the address
+// space, possibly out of reach of a call instruction.
+
+// Refer to NativeCall for a description of the supported call forms.
+
+class NativeFarCall: public NativeInstruction {
+
+ public:
+  // We use MacroAssembler::call_far_patchable() for implementing a
+  // call-anywhere instruction.
+
+  static int instruction_size()      { return MacroAssembler::call_far_patchable_size(); }
+  static int return_address_offset() { return MacroAssembler::call_far_patchable_ret_addr_offset(); }
+
+  // address instruction_address() const { return addr_at(0); }
+
+  address next_instruction_address() const {
+    return addr_at(instruction_size());
+  }
+
+  address return_address() const {
+    return addr_at(return_address_offset());
+  }
+
+  // Returns the NativeFarCall's destination.
+  address destination();
+
+  // Sets the NativeCall's destination, not necessarily mt-safe.
+  // Used when relocating code.
+  void set_destination(address dest, int toc_offset);
+
+  // Checks whether instr points at a NativeFarCall instruction.
+  static bool is_far_call_at(address instr) {
+    // Use compound inspection function which, in addition to instruction sequence,
+    // also checks for expected nops and for instruction alignment.
+    return MacroAssembler::is_call_far_patchable_at(instr);
+  }
+
+  // Does the NativeFarCall implementation use a pc-relative encoding
+  // of the call destination?
+  // Used when relocating code.
+  bool is_pcrelative() {
+    address iaddr = (address)this;
+    assert(is_far_call_at(iaddr), "unexpected call type");
+    return MacroAssembler::is_call_far_patchable_pcrelative_at(iaddr);
+  }
+
+  void verify();
+
+  // Unit tests
+  static void test();
+
+  // Instantiates a NativeFarCall object starting at the given instruction
+  // address and returns the NativeFarCall object.
+  inline friend NativeFarCall* nativeFarCall_at(address instr) {
+    NativeFarCall* call = (NativeFarCall*)instr;
+#ifdef ASSERT
+    call->verify();
+#endif
+    return call;
+  }
+};
+
+
+//-------------------------------------
+//  N a t i v e M o v C o n s t R e g
+//-------------------------------------
+
+// An interface for accessing/manipulating native set_oop imm, reg instructions.
+// (Used to manipulate inlined data references, etc.)
+
+// A native move of a constant into a register, as defined by this abstraction layer,
+// deals with instruction sequences that load "quasi constant" oops into registers
+// for addressing. For multiple causes, those "quasi constant" oops eventually need
+// to be changed (i.e. patched). The reason is quite simple: objects might get moved
+// around in storage. Pc-relative oop addresses have to be patched also if the
+// reference location is moved. That happens when executable code is relocated.
+
+class NativeMovConstReg: public NativeInstruction {
+ public:
+
+  enum z_specific_constants {
+    instruction_size = 10 // Used in shared code for calls with reloc_info.
+  };
+
+  // address instruction_address() const { return addr_at(0); }
+
+  // The current instruction might be located at an offset.
+  address next_instruction_address(int offset = 0) const;
+
+  // (The [set_]data accessor respects oop_type relocs also.)
+  intptr_t data() const;
+
+  // Patch data in code stream.
+  address set_data_plain(intptr_t x, CodeBlob *code);
+  // Patch data in code stream and oop pool if necessary.
+  void set_data(intptr_t x);
+
+  // Patch narrow oop constant in code stream.
+  void set_narrow_oop(intptr_t data);
+  void set_narrow_klass(intptr_t data);
+  void set_pcrel_addr(intptr_t addr, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false);
+  void set_pcrel_data(intptr_t data, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false);
+
+  void verify();
+
+  // unit test stuff
+  static void test();
+
+  // Creation.
+  friend NativeMovConstReg* nativeMovConstReg_at(address address) {
+    NativeMovConstReg* test = (NativeMovConstReg*)address;
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+
+#ifdef COMPILER1
+//---------------------------------
+//  N a t i v e M o v R e g M e m
+//---------------------------------
+
+// Interface to manipulate a code sequence that performs a memory access (load/store).
+// The code is the patchable version of memory accesses generated by
+// LIR_Assembler::reg2mem() and LIR_Assembler::mem2reg().
+//
+// Loading the offset for the mem access is target of the manipulation.
+//
+// The instruction sequence looks like this:
+//   iihf        %r1,$bits1              ; load offset for mem access
+//   iilf        %r1,$bits2
+//   [compress oop]                      ; optional, load only
+//   load/store  %r2,0(%r1,%r2)          ; memory access
+
+class NativeMovRegMem;
+inline NativeMovRegMem* nativeMovRegMem_at (address address);
+class NativeMovRegMem: public NativeInstruction {
+ public:
+  intptr_t offset() const {
+    return nativeMovConstReg_at(addr_at(0))->data();
+  }
+  void set_offset(intptr_t x) {
+    nativeMovConstReg_at(addr_at(0))->set_data(x);
+  }
+  void add_offset_in_bytes(intptr_t radd_offset) {
+    set_offset(offset() + radd_offset);
+  }
+  void verify();
+
+ private:
+  friend inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+    NativeMovRegMem* test = (NativeMovRegMem*)address;
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+#endif // COMPILER1
+
+
+//-----------------------
+//  N a t i v e J u m p
+//-----------------------
+
+
+// An interface for accessing/manipulating native jumps
+class NativeJump: public NativeInstruction {
+ public:
+  enum z_constants {
+    instruction_size = 2 // Size of z_illtrap().
+  };
+
+  // Maximum size (in bytes) of a jump to an absolute address.
+  // Used when emitting branch to an exception handler which is a "load_const_optimized_branch".
+  // Thus, a pessimistic estimate is obtained when using load_const.
+  // code pattern is:
+  //   tmpReg := load_const(address);   (* varying size *)
+  //   jumpTo(tmpReg);                  (* bcr, 2 bytes *)
+  //
+  static unsigned int max_instruction_size() {
+    return MacroAssembler::load_const_size() + MacroAssembler::jump_byregister_size();
+  }
+
+
+//  address instruction_address() const { return addr_at(0); }
+
+  address jump_destination() const {
+    return (address)nativeMovConstReg_at(instruction_address())->data();
+  }
+
+  void set_jump_destination(address dest) {
+    nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest));
+  }
+
+  // Creation
+  friend NativeJump* nativeJump_at(address address) {
+    NativeJump* jump = (NativeJump*)address;
+    #ifdef ASSERT
+      jump->verify();
+    #endif
+    return jump;
+  }
+
+  static bool is_jump_at(address a) {
+    int off = 0;
+    bool b = (MacroAssembler::is_load_const_from_toc(a+off) &&
+              Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_from_toc_size())));
+    b = b || (MacroAssembler::is_load_const(a+off) &&
+              Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_size())));
+    return b;
+  }
+
+  void verify();
+
+  // Unit testing stuff
+  static void test();
+
+  // Insertion of native jump instruction.
+  static void insert(address code_pos, address entry);
+
+  // MT-safe insertion of native jump at verified method entry.
+  static void check_verified_entry_alignment(address entry, address verified_entry) { }
+
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+//-------------------------------------
+//  N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+// Despite the name, handles only simple branches.
+// On ZARCH_64 BRCL only.
+class NativeGeneralJump;
+inline NativeGeneralJump* nativeGeneralJump_at(address address);
+class NativeGeneralJump: public NativeInstruction {
+ public:
+  enum ZARCH_specific_constants {
+    instruction_size = 6
+  };
+
+  address instruction_address() const { return addr_at(0); }
+  address jump_destination()    const { return addr_at(0) + MacroAssembler::get_pcrel_offset(addr_at(0)); }
+
+  // Creation
+  friend inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+    NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+#ifdef ASSERT
+    jump->verify();
+#endif
+    return jump;
+  }
+
+  // Insertion of native general jump instruction.
+  static void insert_unconditional(address code_pos, address entry);
+
+  void set_jump_destination(address dest) {
+    Unimplemented();
+    // set_word_at(MacroAssembler::call_far_pcrelative_size()-4, Assembler::z_pcrel_off(dest, addr_at(0)));
+  }
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  void verify() PRODUCT_RETURN;
+};
+
+#endif // CPU_S390_VM_NATIVEINST_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/registerMap_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTERMAP_S390_HPP
+#define CPU_S390_VM_REGISTERMAP_S390_HPP
+
+// Machine-dependent implementation for register maps.
+
+ friend class frame;
+
+ private:
+  // This is the hook for finding a register in a "well-known" location,
+  // such as a register block of a predetermined format.
+  // Since there is none, we just return NULL.
+   address pd_location(VMReg reg) const {return NULL;}
+
+  // No PD state to clear or copy.
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_S390_VM_REGISTERMAP_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/registerSaver_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTERSAVER_S390_HPP
+#define CPU_S390_VM_REGISTERSAVER_S390_HPP
+
+class RegisterSaver {
+  // Used for saving volatile registers.
+
+  // Class declaration moved to separate file to make it available elsewhere.
+  // Implementation remains in sharedRuntime_s390.cpp
+
+ public:
+
+  // Set of registers to be saved.
+  typedef enum {
+    all_registers,
+    all_registers_except_r2,
+    all_integer_registers,
+    all_volatile_registers, // According to ABI calling convention.
+    arg_registers
+  } RegisterSet;
+
+  // Boolean flags to force only argument registers to be saved.
+  static int live_reg_save_size(RegisterSet reg_set);
+  static int live_reg_frame_size(RegisterSet reg_set);
+  // Specify the register that should be stored as the return pc in the current frame.
+  static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14);
+  static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set);
+
+  // Generate the OopMap (again, regs where saved before).
+  static OopMap* generate_oop_map(MacroAssembler* masm, RegisterSet reg_set);
+
+  // During deoptimization only the result register need to be restored
+  // all the other values have already been extracted.
+  static void restore_result_registers(MacroAssembler* masm);
+
+  // Constants and data structures:
+
+  typedef enum {
+    int_reg           = 0,
+    float_reg         = 1,
+    excluded_reg      = 2,  // Not saved/restored.
+  } RegisterType;
+
+  typedef enum {
+    reg_size          = 8,
+    half_reg_size     = reg_size / 2,
+  } RegisterConstants;
+
+  // Remember type, number, and VMReg.
+  typedef struct {
+    RegisterType        reg_type;
+    int                 reg_num;
+    VMReg               vmreg;
+  } LiveRegType;
+
+};
+
+#endif // CPU_S390_VM_REGISTERSAVER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_definitions_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Make sure the defines don't screw up the declarations later on in this file.
+#define DONT_USE_REGISTER_DEFINES
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "register_s390.hpp"
+#include "interp_masm_s390.hpp"
+
+REGISTER_DEFINITION(Register, noreg);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_s390.hpp"
+
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * 2;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
+                                          FloatRegisterImpl::number_of_registers * 2;
+
+const char* RegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "Z_R0",  "Z_R1",  "Z_R2",  "Z_R3",  "Z_R4",  "Z_R5",  "Z_R6",  "Z_R7",
+    "Z_R8",  "Z_R9",  "Z_R10", "Z_R11", "Z_R12", "Z_R13", "Z_R14", "Z_R15"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "Z_F0",  "Z_F1",   "Z_F2",  "Z_F3",   "Z_F4",  "Z_F5",   "Z_F6",  "Z_F7",   "Z_F8",  "Z_F9",
+    "Z_F10", "Z_F11",  "Z_F12", "Z_F13",  "Z_F14", "Z_F15"
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTER_S390_HPP
+#define CPU_S390_VM_REGISTER_S390_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_s390.hpp"
+
+class Address;
+class VMRegImpl;
+
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut.
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+// The implementation of integer registers for z/Architecture.
+
+// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
+//
+//   r0-r1     General purpose (volatile)
+//   r2        Parameter and return value (volatile)
+//   r3        TOC pointer (volatile)
+//   r3-r5     Parameters (volatile)
+//   r6        Parameter (nonvolatile)
+//   r7-r11    Locals (nonvolatile)
+//   r12       Local, often used as GOT pointer (nonvolatile)
+//   r13       Local, often used as toc (nonvolatile)
+//   r14       return address (volatile)
+//   r15       stack pointer (nonvolatile)
+//
+//   f0,f2,f4,f6 Parameters (volatile)
+//   f1,f3,f5,f7 General purpose (volatile)
+//   f8-f15      General purpose (nonvolatile)
+
+inline Register as_Register(int encoding) {
+  return (Register)(long)encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 16,
+    number_of_arg_registers = 5
+  };
+
+  // general construction
+  inline friend Register as_Register(int encoding);
+
+  inline VMReg as_VMReg();
+
+  // accessors
+  int   encoding() const      { assert(is_valid(), "invalid register"); return value(); }
+  const char* name() const;
+
+  // testers
+  bool is_valid() const       { return (0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); }
+  bool is_even() const        { return (encoding() & 1) == 0; }
+  bool is_volatile() const    { return (0 <= (value()&0x7F) && (value()&0x7F) <= 5) || (value()&0x7F)==14; }
+  bool is_nonvolatile() const { return is_valid() && !is_volatile(); }
+
+ public:
+  // derived registers, offsets, and addresses
+  Register predecessor() const { return as_Register((encoding()-1) & (number_of_registers-1)); }
+  Register successor() const   { return as_Register((encoding() + 1) & (number_of_registers-1)); }
+};
+
+// The integer registers of the z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(Register, Z_R0,   (0));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R1,   (1));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R2,   (2));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R3,   (3));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R4,   (4));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R5,   (5));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R6,   (6));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R7,   (7));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R8,   (8));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R9,   (9));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R10, (10));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R11, (11));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R12, (12));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
+
+// Use ConditionRegister as shortcut
+class ConditionRegisterImpl;
+typedef ConditionRegisterImpl* ConditionRegister;
+
+// The implementation of condition register(s) for the z/Architecture.
+
+class ConditionRegisterImpl: public AbstractRegisterImpl {
+ public:
+
+  enum {
+    number_of_registers = 1
+  };
+
+  // accessors
+  int encoding() const {
+    assert(is_valid(), "invalid register"); return value();
+  }
+
+  // testers
+  bool is_valid() const {
+    return (0 <= value() && value() < number_of_registers);
+  }
+  bool is_volatile() const {
+    return true;
+  }
+  bool is_nonvolatile() const {
+    return false;
+  }
+
+  // construction.
+  inline friend ConditionRegister as_ConditionRegister(int encoding);
+
+  inline VMReg as_VMReg();
+};
+
+inline ConditionRegister as_ConditionRegister(int encoding) {
+  assert(encoding >= 0 && encoding < ConditionRegisterImpl::number_of_registers, "bad condition register encoding");
+  return (ConditionRegister)(long)encoding;
+}
+
+// The condition register of the z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
+
+// Because z/Architecture has so many registers, #define'ing values for them is
+// beneficial in code size and is worth the cost of some of the
+// dangers of defines.
+// If a particular file has a problem with these defines then it's possible
+// to turn them off in that file by defining
+// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
+// so that it's able to provide real definitions of these registers
+// for use in debuggers and such.
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define noreg ((Register)(noreg_RegisterEnumValue))
+
+#define Z_R0  ((Register)(Z_R0_RegisterEnumValue))
+#define Z_R1  ((Register)(Z_R1_RegisterEnumValue))
+#define Z_R2  ((Register)(Z_R2_RegisterEnumValue))
+#define Z_R3  ((Register)(Z_R3_RegisterEnumValue))
+#define Z_R4  ((Register)(Z_R4_RegisterEnumValue))
+#define Z_R5  ((Register)(Z_R5_RegisterEnumValue))
+#define Z_R6  ((Register)(Z_R6_RegisterEnumValue))
+#define Z_R7  ((Register)(Z_R7_RegisterEnumValue))
+#define Z_R8  ((Register)(Z_R8_RegisterEnumValue))
+#define Z_R9  ((Register)(Z_R9_RegisterEnumValue))
+#define Z_R10 ((Register)(Z_R10_RegisterEnumValue))
+#define Z_R11 ((Register)(Z_R11_RegisterEnumValue))
+#define Z_R12 ((Register)(Z_R12_RegisterEnumValue))
+#define Z_R13 ((Register)(Z_R13_RegisterEnumValue))
+#define Z_R14 ((Register)(Z_R14_RegisterEnumValue))
+#define Z_R15 ((Register)(Z_R15_RegisterEnumValue))
+
+#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+// The implementation of float registers for the z/Architecture.
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(long)encoding;
+}
+
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 16,
+    number_of_arg_registers = 4
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  inline VMReg as_VMReg();
+
+  // accessors
+  int encoding() const                                {
+     assert(is_valid(), "invalid register"); return value();
+  }
+
+  bool  is_valid() const          { return 0 <= value() && value() < number_of_registers; }
+  bool is_volatile() const        { return (0 <= (value()&0x7F) && (value()&0x7F) <= 7); }
+  bool is_nonvolatile() const     { return (8 <= (value()&0x7F) && (value()&0x7F) <= 15); }
+
+  const char* name() const;
+
+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+};
+
+// The float registers of z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F0,  (0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F1,  (1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F2,  (2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F3,  (3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F4,  (4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F5,  (5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F6,  (6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F7,  (7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F8,  (8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister,  Z_F9,  (9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F10, (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F11, (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F12, (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F13, (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F14, (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
+#define Z_F0  ((FloatRegister)(   Z_F0_FloatRegisterEnumValue))
+#define Z_F1  ((FloatRegister)(   Z_F1_FloatRegisterEnumValue))
+#define Z_F2  ((FloatRegister)(   Z_F2_FloatRegisterEnumValue))
+#define Z_F3  ((FloatRegister)(   Z_F3_FloatRegisterEnumValue))
+#define Z_F4  ((FloatRegister)(   Z_F4_FloatRegisterEnumValue))
+#define Z_F5  ((FloatRegister)(   Z_F5_FloatRegisterEnumValue))
+#define Z_F6  ((FloatRegister)(   Z_F6_FloatRegisterEnumValue))
+#define Z_F7  ((FloatRegister)(   Z_F7_FloatRegisterEnumValue))
+#define Z_F8  ((FloatRegister)(   Z_F8_FloatRegisterEnumValue))
+#define Z_F9  ((FloatRegister)(   Z_F9_FloatRegisterEnumValue))
+#define Z_F10 ((FloatRegister)(  Z_F10_FloatRegisterEnumValue))
+#define Z_F11 ((FloatRegister)(  Z_F11_FloatRegisterEnumValue))
+#define Z_F12 ((FloatRegister)(  Z_F12_FloatRegisterEnumValue))
+#define Z_F13 ((FloatRegister)(  Z_F13_FloatRegisterEnumValue))
+#define Z_F14 ((FloatRegister)(  Z_F14_FloatRegisterEnumValue))
+#define Z_F15 ((FloatRegister)(  Z_F15_FloatRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers =
+      (RegisterImpl::number_of_registers +
+      FloatRegisterImpl::number_of_registers)
+      * 2 // register halves
+      + 1 // condition code register
+  };
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+// Single, Double and Quad fp reg classes. These exist to map the ADLC
+// encoding for a floating point register, to the FloatRegister number
+// desired by the macroassembler. A FloatRegister is a number between
+// 0 and 31 passed around as a pointer. For ADLC, an fp register encoding
+// is the actual bit encoding used by the z/Architecture hardware. When ADLC used
+// the macroassembler to generate an instruction that references, e.g., a
+// double fp reg, it passed the bit encoding to the macroassembler via
+// as_FloatRegister, which, for double regs > 30, returns an illegal
+// register number.
+//
+// Therefore we provide the following classes for use by ADLC. Their
+// sole purpose is to convert from z/Architecture register encodings to FloatRegisters.
+// At some future time, we might replace FloatRegister with these classes,
+// hence the definitions of as_xxxFloatRegister as class methods rather
+// than as external inline routines.
+
+class SingleFloatRegisterImpl;
+typedef SingleFloatRegisterImpl *SingleFloatRegister;
+
+class SingleFloatRegisterImpl {
+ public:
+  friend FloatRegister as_SingleFloatRegister(int encoding) {
+    assert(encoding < 32, "bad single float register encoding");
+    return as_FloatRegister(encoding);
+  }
+};
+
+class DoubleFloatRegisterImpl;
+typedef DoubleFloatRegisterImpl *DoubleFloatRegister;
+
+class DoubleFloatRegisterImpl {
+ public:
+  friend FloatRegister as_DoubleFloatRegister(int encoding) {
+    assert(encoding < 32, "bad double float register encoding");
+    return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1e));
+  }
+};
+
+class QuadFloatRegisterImpl;
+typedef QuadFloatRegisterImpl *QuadFloatRegister;
+
+class QuadFloatRegisterImpl {
+ public:
+  friend FloatRegister as_QuadFloatRegister(int encoding) {
+    assert(encoding < 32 && ((encoding & 2) == 0), "bad quad float register encoding");
+    return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1c));
+  }
+};
+
+
+// Common register declarations used in assembler code.
+REGISTER_DECLARATION(Register,      Z_EXC_OOP, Z_R2);
+REGISTER_DECLARATION(Register,      Z_EXC_PC,  Z_R3);
+REGISTER_DECLARATION(Register,      Z_RET,     Z_R2);
+REGISTER_DECLARATION(Register,      Z_ARG1,    Z_R2);
+REGISTER_DECLARATION(Register,      Z_ARG2,    Z_R3);
+REGISTER_DECLARATION(Register,      Z_ARG3,    Z_R4);
+REGISTER_DECLARATION(Register,      Z_ARG4,    Z_R5);
+REGISTER_DECLARATION(Register,      Z_ARG5,    Z_R6);
+REGISTER_DECLARATION(Register,      Z_SP,     Z_R15);
+REGISTER_DECLARATION(FloatRegister, Z_FRET,    Z_F0);
+REGISTER_DECLARATION(FloatRegister, Z_FARG1,   Z_F0);
+REGISTER_DECLARATION(FloatRegister, Z_FARG2,   Z_F2);
+REGISTER_DECLARATION(FloatRegister, Z_FARG3,   Z_F4);
+REGISTER_DECLARATION(FloatRegister, Z_FARG4,   Z_F6);
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_EXC_OOP         AS_REGISTER(Register,  Z_R2)
+#define Z_EXC_PC          AS_REGISTER(Register,  Z_R3)
+#define Z_RET             AS_REGISTER(Register,  Z_R2)
+#define Z_ARG1            AS_REGISTER(Register,  Z_R2)
+#define Z_ARG2            AS_REGISTER(Register,  Z_R3)
+#define Z_ARG3            AS_REGISTER(Register,  Z_R4)
+#define Z_ARG4            AS_REGISTER(Register,  Z_R5)
+#define Z_ARG5            AS_REGISTER(Register,  Z_R6)
+#define Z_SP              AS_REGISTER(Register, Z_R15)
+#define Z_FRET            AS_REGISTER(FloatRegister, Z_F0)
+#define Z_FARG1           AS_REGISTER(FloatRegister, Z_F0)
+#define Z_FARG2           AS_REGISTER(FloatRegister, Z_F2)
+#define Z_FARG3           AS_REGISTER(FloatRegister, Z_F4)
+#define Z_FARG4           AS_REGISTER(FloatRegister, Z_F6)
+#endif
+
+// Register declarations to be used in frame manager assembly code.
+// Use only non-volatile registers in order to keep values across C-calls.
+
+// Register to cache the integer value on top of the operand stack.
+REGISTER_DECLARATION(Register, Z_tos,         Z_R2);
+// Register to cache the fp value on top of the operand stack.
+REGISTER_DECLARATION(FloatRegister, Z_ftos,   Z_F0);
+// Expression stack pointer in interpreted java frame.
+REGISTER_DECLARATION(Register, Z_esp,         Z_R7);
+// Address of current thread.
+REGISTER_DECLARATION(Register, Z_thread,      Z_R8);
+// Address of current method. only valid in interpreter_entry.
+REGISTER_DECLARATION(Register, Z_method,      Z_R9);
+// Inline cache register. used by c1 and c2.
+REGISTER_DECLARATION(Register, Z_inline_cache,Z_R9);
+// Frame pointer of current interpreter frame. only valid while
+// executing bytecodes.
+REGISTER_DECLARATION(Register, Z_fp,          Z_R9);
+// Address of the locals array in an interpreted java frame.
+REGISTER_DECLARATION(Register, Z_locals,      Z_R12);
+// Bytecode pointer.
+REGISTER_DECLARATION(Register, Z_bcp,         Z_R13);
+// Bytecode which is dispatched (short lived!).
+REGISTER_DECLARATION(Register, Z_bytecode,    Z_R14);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_tos             AS_REGISTER(Register, Z_R2)
+#define Z_ftos            AS_REGISTER(FloatRegister, Z_F0)
+#define Z_esp             AS_REGISTER(Register, Z_R7)
+#define Z_thread          AS_REGISTER(Register, Z_R8)
+#define Z_method          AS_REGISTER(Register, Z_R9)
+#define Z_inline_cache    AS_REGISTER(Register, Z_R9)
+#define Z_fp              AS_REGISTER(Register, Z_R9)
+#define Z_locals          AS_REGISTER(Register, Z_R12)
+#define Z_bcp             AS_REGISTER(Register, Z_R13)
+#define Z_bytecode        AS_REGISTER(Register, Z_R14)
+#endif
+
+// Temporary registers to be used within frame manager. We can use
+// the nonvolatiles because the call stub has saved them.
+// Use only non-volatile registers in order to keep values across C-calls.
+REGISTER_DECLARATION(Register, Z_tmp_1,  Z_R10);
+REGISTER_DECLARATION(Register, Z_tmp_2,  Z_R11);
+REGISTER_DECLARATION(Register, Z_tmp_3,  Z_R12);
+REGISTER_DECLARATION(Register, Z_tmp_4,  Z_R13);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_tmp_1      AS_REGISTER(Register, Z_R10)
+#define Z_tmp_2      AS_REGISTER(Register, Z_R11)
+#define Z_tmp_3      AS_REGISTER(Register, Z_R12)
+#define Z_tmp_4      AS_REGISTER(Register, Z_R13)
+#endif
+
+// Scratch registers are volatile.
+REGISTER_DECLARATION(Register, Z_R0_scratch, Z_R0);
+REGISTER_DECLARATION(Register, Z_R1_scratch, Z_R1);
+REGISTER_DECLARATION(FloatRegister, Z_fscratch_1, Z_F1);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_R0_scratch  AS_REGISTER(Register, Z_R0)
+#define Z_R1_scratch  AS_REGISTER(Register, Z_R1)
+#define Z_fscratch_1  AS_REGISTER(FloatRegister, Z_F1)
+#endif
+
+
+#endif // CPU_S390_VM_REGISTER_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+  // we don't support splitting of relocations, so o must be zero:
+  assert(o == 0, "tried to split relocations");
+  if (!verify_only) {
+    switch (format()) {
+      case relocInfo::uncompressed_format:
+        nativeMovConstReg_at(addr())->set_data_plain(((intptr_t)x) + o, code());
+        break;
+      case relocInfo::compressed_format:
+        if (type() == relocInfo::metadata_type)
+          nativeMovConstReg_at(addr())->set_narrow_klass(((intptr_t)x) + o);
+        else if (type() == relocInfo::oop_type)
+          nativeMovConstReg_at(addr())->set_narrow_oop(((intptr_t)x) + o);
+        else
+          guarantee(false, "bad relocInfo type for relocInfo::narrow_oop_format");
+        break;
+      case relocInfo::pcrel_addr_format:  // patch target location
+        nativeMovConstReg_at(addr())->set_pcrel_addr(((intptr_t)x) + o, code());
+        break;
+      case relocInfo::pcrel_data_format:  // patch data at target location
+        nativeMovConstReg_at(addr())->set_pcrel_data(((intptr_t)x) + o, code());
+        break;
+      default:
+        assert(false, "not a valid relocInfo format");
+        break;
+    }
+  } else {
+    // TODO: Reading of narrow oops out of code stream is not implemented
+    // (see nativeMovConstReg::data()). Implement this if you want to verify.
+    // assert(x == (address) nativeMovConstReg_at(addr())->data(), "Instructions must match");
+    switch (format()) {
+      case relocInfo::uncompressed_format:
+        break;
+      case relocInfo::compressed_format:
+        break;
+      case relocInfo::pcrel_addr_format:
+        break;
+      case relocInfo::pcrel_data_format:
+        break;
+      default:
+        assert(false, "not a valid relocInfo format");
+        break;
+    }
+  }
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+  address   inst_addr = addr();
+
+  if (NativeFarCall::is_far_call_at(inst_addr)) {
+    if (!ShortenBranches) {
+      if (MacroAssembler::is_call_far_pcrelative(inst_addr)) {
+        address a1 = MacroAssembler::get_target_addr_pcrel(orig_addr+MacroAssembler::nop_size());
+#ifdef ASSERT
+        address a2 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size());
+        address a3 = nativeFarCall_at(orig_addr)->destination();
+        address a4 = nativeFarCall_at(inst_addr)->destination();
+        if ((a1 != a3) || (a2 != a4)) {
+          unsigned int range = 128;
+          Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?");
+          Assembler::dump_code_range(tty, orig_addr, range, "pc-relative call w/o ShortenBranches?");
+          assert(false, "pc-relative call w/o ShortenBranches?");
+        }
+#endif
+        return a1;
+      }
+      return (address)(-1);
+    }
+    NativeFarCall* call;
+    if (orig_addr == NULL) {
+      call = nativeFarCall_at(inst_addr);
+    } else {
+      if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) {
+        call = nativeFarCall_at(orig_addr);
+      } else {
+        call = nativeFarCall_at(orig_addr);  // must access location (in CP) where destination is stored in unmoved code, because load from CP is pc-relative
+      }
+    }
+    return call->destination();
+  }
+
+  if (NativeCall::is_call_at(inst_addr)) {
+    NativeCall* call = nativeCall_at(inst_addr);
+    if (call->is_pcrelative()) {
+      intptr_t off = inst_addr - orig_addr;
+      return (address) (call->destination()-off);
+    }
+  }
+
+  return (address) nativeMovConstReg_at(inst_addr)->data();
+}
+
+void Relocation::pd_set_call_destination(address x) {
+  address inst_addr = addr();
+
+  if (NativeFarCall::is_far_call_at(inst_addr)) {
+    if (!ShortenBranches) {
+      if (MacroAssembler::is_call_far_pcrelative(inst_addr)) {
+        address a1 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size());
+#ifdef ASSERT
+        address a3 = nativeFarCall_at(inst_addr)->destination();
+        if (a1 != a3) {
+          unsigned int range = 128;
+          Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?");
+          assert(false, "pc-relative call w/o ShortenBranches?");
+        }
+#endif
+        nativeFarCall_at(inst_addr)->set_destination(x, 0);
+        return;
+      }
+      assert(x == (address)-1, "consistency check");
+      return;
+    }
+    int toc_offset = -1;
+    if (type() == relocInfo::runtime_call_w_cp_type) {
+      toc_offset = ((runtime_call_w_cp_Relocation *)this)->get_constant_pool_offset();
+    }
+    if (toc_offset>=0) {
+      NativeFarCall* call = nativeFarCall_at(inst_addr);
+      call->set_destination(x, toc_offset);
+      return;
+    }
+  }
+
+  if (NativeCall::is_call_at(inst_addr)) {
+    NativeCall* call = nativeCall_at(inst_addr);
+    if (call->is_pcrelative()) {
+      call->set_destination_mt_safe(x);
+      return;
+    }
+  }
+
+  // constant is absolute, must use x
+  nativeMovConstReg_at(inst_addr)->set_data(((intptr_t)x));
+}
+
+
+// store the new target address into an oop_Relocation cell, if any
+// return indication if update happened.
+bool relocInfo::update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb) {
+
+  //  Try to find the CodeBlob, if not given by caller
+  if (cb == NULL) cb = CodeCache::find_blob(begin);
+#ifdef ASSERT
+  else
+    assert(cb == CodeCache::find_blob(begin), "consistency");
+#endif
+
+  //  'RelocIterator' requires an nmethod
+  nmethod*  nm = cb ? cb->as_nmethod_or_null() : NULL;
+  if (nm != NULL) {
+    RelocIterator iter(nm, begin, end);
+    oop* oop_addr = NULL;
+    Metadata** metadata_addr = NULL;
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop_Relocation *r = iter.oop_reloc();
+        if (oop_addr == NULL) {
+          oop_addr = r->oop_addr();
+          *oop_addr = (oop)newTarget;
+        } else {
+          assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
+        }
+      }
+      if (iter.type() == relocInfo::metadata_type) {
+        metadata_Relocation *r = iter.metadata_reloc();
+        if (metadata_addr == NULL) {
+          metadata_addr = r->metadata_addr();
+          *metadata_addr = (Metadata*)newTarget;
+        } else {
+          assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here");
+        }
+      }
+    }
+    return oop_addr || metadata_addr;
+  }
+  return false;
+}
+
+
+address* Relocation::pd_address_in_code() {
+ ShouldNotReachHere();
+ return 0;
+}
+
+address Relocation::pd_get_address_from_code() {
+   return  (address) (nativeMovConstReg_at(addr())->data());
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_RELOCINFO_S390_HPP
+#define CPU_S390_VM_RELOCINFO_S390_HPP
+
+//----------------------------
+//  relocInfo layout
+//----------------------------
+
+// This description should be contained in code/relocInfo.hpp
+// but was put here to minimize shared code diffs.
+
+// Relocation information for a nmethod is stored in compressed
+// form in an array of element type short int (16 bits).
+// Each array element constitutes one relocInfo record.
+// The layout of one such record is described here.
+
+// +------------+---+---+------------------------------+
+// |    type    |  fmt  |      offset/offset_unit      |
+// +------------+---+---+------------------------------+
+//
+// |<-- value_width (16) ----------------------------->|
+// |<type_width>|<-- nontype_width (12) -------------->|
+//      (4)
+// |            |<--+-->|<-- offset_width (10) ------->|
+//              /       \
+//             /   (2)   \
+//            /<--format->\
+//            |    width  |
+
+
+// only for type == data_prefix_tag:
+// +------------+---+---+------------------------------+
+// |    type    |   |              data                |
+// +------------+---+---+------------------------------+
+// |     15     |<->|<-- datalen_width (11) ---------->|
+//                |
+//                +--datalen_tag (1)
+
+// relocType
+//   The type field holds a value of relocType (which is
+//   an enum of all possible relocation types). Currently,
+//   there are 16 distinct relocation types, requiring
+//   type_width to be (at least) 4.
+// relocFormat
+//   The format field holds a value of relocFormat (which is
+//   an enum of all possible relocation formats). Currently,
+//   there are 4 distinct relocation formats, requiring
+//   format_width to be (at least) 2.
+// offset
+//   Each relocInfo is related to one specific address in the CodeBlob.
+//   The address always points to the first byte of the target instruction.
+//   It does NOT refer directly to the relocation subfield or embedded constant.
+//   offset contains the distance of this relocInfo from the previous one.
+//   offset is scaled by offset_unit (the platform-specific instruction
+//   alignment requirement) to maximize the encodable distance.
+//   To obtain the absolute address in the CodeBlob the relocInfo is
+//   related to, you have to iterate over all relocInfos from the
+//   beginning, and then use RelocIterator::addr() to get the address.
+
+// relocType == data_prefix_tag
+//   These are relocInfo records containing inline data that belongs to
+//   the next non-data relocInfo record. Usage of that inline data is
+//   specific and private to that relocInfo record.
+//   For details refer to code/relocInfo.hpp
+
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+    // Instructions are HW (2-byte) aligned on z/Architecture.
+    offset_unit        =  2,
+
+    // Encodes Assembler::disp32_operand vs. Assembler::imm64_operand.
+    // (Assembler::call32_operand is used on call instructions only.)
+    format_width       =  2
+  };
+
+ public:
+
+  enum relocFormat {
+    no_format           = 0,
+    uncompressed_format = 0,  // Relocation is for a regular oop.
+    compressed_format   = 1,  // Relocation is for a narrow (compressed) oop or klass.
+                              // Similar to relocInfo::narrow_oop_in_const.
+    pcrel_addr_format   = 2,  // Relocation is for the target LOCATION of a pc-relative instruction.
+    pcrel_data_format   = 3   // Relocation is for the target data of a pc-relative instruction.
+  };
+
+  // Store the new target address into an oop_Relocation cell, if any.
+  // Return indication if update happened.
+  static bool update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb);
+
+#endif // CPU_S390_VM_RELOCINFO_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/runtime_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_s390.inline.hpp"
+#endif
+
+#define __ masm->
+
+
+//------------------------------generate_exception_blob---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in s390.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers), unwind the frame, and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a branch.
+//
+// Arguments:
+//   Z_R2(=Z_ARG1): exception oop
+//   Z_R3(=Z_ARG2): exception pc
+//
+// Results:
+//   Z_R2: exception oop
+//   Z_R3: exception pc in caller
+//   destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+
+void OptoRuntime::generate_exception_blob() {
+
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("exception_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  Register handle_exception = Z_ARG5;
+
+  __ verify_thread();
+  __ z_stg(Z_ARG1/*exception oop*/, Address(Z_thread, JavaThread::exception_oop_offset()));
+  __ z_stg(Z_ARG2/*issuing pc*/,    Address(Z_thread, JavaThread::exception_pc_offset()));
+
+  // Store issuing pc as return pc into
+  // caller's frame. stack-walking needs it. R14 is not valid here,
+  // because this code gets entered with a jump.
+  __ z_stg(Z_ARG2/*issuing pc*/, _z_abi(return_pc), Z_SP);
+
+  // The following call to function OptoRuntime::handle_exception_C
+  // does all the hard work. It checks if an
+  // exception catch exists in the method. If so, it returns the
+  // handler address. If the nmethod has been deoptimized and it had
+  // a handler the handler address is the deopt blob's
+  // unpack_with_exception entry.
+
+  // push a C frame for the exception blob. it is needed for the
+  // C call later on.
+
+  Register saved_sp = Z_R11;
+
+  __ z_lgr(saved_sp, Z_SP);
+
+  // push frame for blob.
+  int frame_size = __ push_frame_abi160(0);
+
+  __ get_PC(Z_R1/*scratch*/);
+  __ set_last_Java_frame(/*sp=*/Z_SP, /*pc=*/Z_R1);
+
+  // This call can lead to deoptimization of the nmethod holding the handler.
+  __ z_lgr(Z_ARG1, Z_thread);   // argument of C function
+  __ call_c(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C));
+
+  __ z_lgr(handle_exception, Z_RET);
+  __ reset_last_Java_frame();
+
+  // Pop the exception blob's C frame that has been pushed before.
+  __ z_lgr(Z_SP, saved_sp);
+
+  // [Z_RET]!=NULL was possible in hotspot5 but not in sapjvm6.
+  // C2I adapter extensions are now removed by a resize in the frame manager
+  // (unwind_initial_activation_pending_exception).
+#ifdef ASSERT
+  __ z_ltgr(handle_exception, handle_exception);
+  __ asm_assert_ne("handler must not be NULL", 0x852);
+#endif
+
+  // Handle_exception contains the handler address. If the associated frame
+  // has been deoptimized then the handler has been patched to jump to
+  // the deoptimization blob.
+
+  // If the exception handler jumps to the deoptimization blob, the
+  // exception pc will be read from there.
+  __ z_lg(Z_ARG2, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+  __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
+
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ clear_mem(Address(Z_thread, JavaThread::exception_oop_offset()),sizeof(intptr_t));
+#ifdef ASSERT
+  __ clear_mem(Address(Z_thread, JavaThread::exception_handler_pc_offset()), sizeof(intptr_t));
+  __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), sizeof(intptr_t));
+#endif
+
+  __ z_br(handle_exception);
+
+  // Make sure all code is generated.
+  masm->flush();
+
+  // Set exception blob.
+  OopMapSet *oop_maps = NULL;
+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, frame_size/wordSize);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/s390.ad	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,10802 @@
+//
+// Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2016 SAP SE. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// z/Architecture Architecture Description File
+
+// Major contributions by AS, JL, LS.
+
+//
+// Following information is derived from private mail communication
+// (Oct. 2011).
+//
+// General branch target alignment considerations
+//
+// z/Architecture does not imply a general branch target alignment requirement.
+// There are side effects and side considerations, though, which may
+// provide some performance benefit. These are:
+//  - Align branch target on octoword (32-byte) boundary
+//    On more recent models (from z9 on), I-fetch is done on a Octoword
+//    (32 bytes at a time) basis. To avoid I-fetching unnecessary
+//    instructions, branch targets should be 32-byte aligend. If this
+//    exact alingment cannot be achieved, having the branch target in
+//    the first doubleword still provides some benefit.
+//  - Avoid branch targets at the end of cache lines (> 64 bytes distance).
+//    Sequential instruction prefetching after the branch target starts
+//    immediately after having fetched the octoword containing the
+//    branch target. When I-fetching crosses a cache line, there may be
+//    a small stall. The worst case: the branch target (at the end of
+//    a cache line) is a L1 I-cache miss and the next line as well.
+//    Then, the entire target line must be filled first (to contine at the
+//    branch target). Only then can the next sequential line be filled.
+//  - Avoid multiple poorly predicted branches in a row.
+//
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// architecture.
+
+register %{
+
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name (register save type, C convention save type,
+//                   ideal register type, encoding);
+//
+// Register Save Types:
+//
+//   NS  = No-Save:     The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+//   SOC = Save-On-Call: The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+//   SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call sites.
+//
+//   AS  = Always-Save: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// z/Architecture register definitions, based on the z/Architecture Principles
+// of Operation, 5th Edition, September 2005, and z/Linux Elf ABI Supplement,
+// 5th Edition, March 2001.
+//
+// For each 64-bit register we must define two registers: the register
+// itself, e.g. Z_R3, and a corresponding virtual other (32-bit-)'half',
+// e.g. Z_R3_H, which is needed by the allocator, but is not used
+// for stores, loads, etc.
+
+  // Integer/Long Registers
+  // ----------------------------
+
+  // z/Architecture has 16 64-bit integer registers.
+
+  // types: v = volatile, nv = non-volatile, s = system
+  reg_def Z_R0   (SOC, SOC, Op_RegI,  0, Z_R0->as_VMReg());   // v   scratch1
+  reg_def Z_R0_H (SOC, SOC, Op_RegI, 99, Z_R0->as_VMReg()->next());
+  reg_def Z_R1   (SOC, SOC, Op_RegI,  1, Z_R1->as_VMReg());   // v   scratch2
+  reg_def Z_R1_H (SOC, SOC, Op_RegI, 99, Z_R1->as_VMReg()->next());
+  reg_def Z_R2   (SOC, SOC, Op_RegI,  2, Z_R2->as_VMReg());   // v   iarg1 & iret
+  reg_def Z_R2_H (SOC, SOC, Op_RegI, 99, Z_R2->as_VMReg()->next());
+  reg_def Z_R3   (SOC, SOC, Op_RegI,  3, Z_R3->as_VMReg());   // v   iarg2
+  reg_def Z_R3_H (SOC, SOC, Op_RegI, 99, Z_R3->as_VMReg()->next());
+  reg_def Z_R4   (SOC, SOC, Op_RegI,  4, Z_R4->as_VMReg());   // v   iarg3
+  reg_def Z_R4_H (SOC, SOC, Op_RegI, 99, Z_R4->as_VMReg()->next());
+  reg_def Z_R5   (SOC, SOC, Op_RegI,  5, Z_R5->as_VMReg());   // v   iarg4
+  reg_def Z_R5_H (SOC, SOC, Op_RegI, 99, Z_R5->as_VMReg()->next());
+  reg_def Z_R6   (SOC, SOE, Op_RegI,  6, Z_R6->as_VMReg());   // v   iarg5
+  reg_def Z_R6_H (SOC, SOE, Op_RegI, 99, Z_R6->as_VMReg()->next());
+  reg_def Z_R7   (SOC, SOE, Op_RegI,  7, Z_R7->as_VMReg());
+  reg_def Z_R7_H (SOC, SOE, Op_RegI, 99, Z_R7->as_VMReg()->next());
+  reg_def Z_R8   (SOC, SOE, Op_RegI,  8, Z_R8->as_VMReg());
+  reg_def Z_R8_H (SOC, SOE, Op_RegI, 99, Z_R8->as_VMReg()->next());
+  reg_def Z_R9   (SOC, SOE, Op_RegI,  9, Z_R9->as_VMReg());
+  reg_def Z_R9_H (SOC, SOE, Op_RegI, 99, Z_R9->as_VMReg()->next());
+  reg_def Z_R10  (SOC, SOE, Op_RegI, 10, Z_R10->as_VMReg());
+  reg_def Z_R10_H(SOC, SOE, Op_RegI, 99, Z_R10->as_VMReg()->next());
+  reg_def Z_R11  (SOC, SOE, Op_RegI, 11, Z_R11->as_VMReg());
+  reg_def Z_R11_H(SOC, SOE, Op_RegI, 99, Z_R11->as_VMReg()->next());
+  reg_def Z_R12  (SOC, SOE, Op_RegI, 12, Z_R12->as_VMReg());
+  reg_def Z_R12_H(SOC, SOE, Op_RegI, 99, Z_R12->as_VMReg()->next());
+  reg_def Z_R13  (SOC, SOE, Op_RegI, 13, Z_R13->as_VMReg());
+  reg_def Z_R13_H(SOC, SOE, Op_RegI, 99, Z_R13->as_VMReg()->next());
+  reg_def Z_R14  (NS,  NS,  Op_RegI, 14, Z_R14->as_VMReg());   // s  return_pc
+  reg_def Z_R14_H(NS,  NS,  Op_RegI, 99, Z_R14->as_VMReg()->next());
+  reg_def Z_R15  (NS,  NS,  Op_RegI, 15, Z_R15->as_VMReg());   // s  SP
+  reg_def Z_R15_H(NS,  NS,  Op_RegI, 99, Z_R15->as_VMReg()->next());
+
+  // Float/Double Registers
+
+  // The rules of ADL require that double registers be defined in pairs.
+  // Each pair must be two 32-bit values, but not necessarily a pair of
+  // single float registers. In each pair, ADLC-assigned register numbers
+  // must be adjacent, with the lower number even. Finally, when the
+  // CPU stores such a register pair to memory, the word associated with
+  // the lower ADLC-assigned number must be stored to the lower address.
+
+  // z/Architecture has 16 64-bit floating-point registers. Each can store a single
+  // or double precision floating-point value.
+
+  // types: v = volatile, nv = non-volatile, s = system
+  reg_def Z_F0   (SOC, SOC, Op_RegF,  0, Z_F0->as_VMReg());   // v   farg1 & fret
+  reg_def Z_F0_H (SOC, SOC, Op_RegF, 99, Z_F0->as_VMReg()->next());
+  reg_def Z_F1   (SOC, SOC, Op_RegF,  1, Z_F1->as_VMReg());
+  reg_def Z_F1_H (SOC, SOC, Op_RegF, 99, Z_F1->as_VMReg()->next());
+  reg_def Z_F2   (SOC, SOC, Op_RegF,  2, Z_F2->as_VMReg());   // v   farg2
+  reg_def Z_F2_H (SOC, SOC, Op_RegF, 99, Z_F2->as_VMReg()->next());
+  reg_def Z_F3   (SOC, SOC, Op_RegF,  3, Z_F3->as_VMReg());
+  reg_def Z_F3_H (SOC, SOC, Op_RegF, 99, Z_F3->as_VMReg()->next());
+  reg_def Z_F4   (SOC, SOC, Op_RegF,  4, Z_F4->as_VMReg());   // v   farg3
+  reg_def Z_F4_H (SOC, SOC, Op_RegF, 99, Z_F4->as_VMReg()->next());
+  reg_def Z_F5   (SOC, SOC, Op_RegF,  5, Z_F5->as_VMReg());
+  reg_def Z_F5_H (SOC, SOC, Op_RegF, 99, Z_F5->as_VMReg()->next());
+  reg_def Z_F6   (SOC, SOC, Op_RegF,  6, Z_F6->as_VMReg());
+  reg_def Z_F6_H (SOC, SOC, Op_RegF, 99, Z_F6->as_VMReg()->next());
+  reg_def Z_F7   (SOC, SOC, Op_RegF,  7, Z_F7->as_VMReg());
+  reg_def Z_F7_H (SOC, SOC, Op_RegF, 99, Z_F7->as_VMReg()->next());
+  reg_def Z_F8   (SOC, SOE, Op_RegF,  8, Z_F8->as_VMReg());
+  reg_def Z_F8_H (SOC, SOE, Op_RegF, 99, Z_F8->as_VMReg()->next());
+  reg_def Z_F9   (SOC, SOE, Op_RegF,  9, Z_F9->as_VMReg());
+  reg_def Z_F9_H (SOC, SOE, Op_RegF, 99, Z_F9->as_VMReg()->next());
+  reg_def Z_F10  (SOC, SOE, Op_RegF, 10, Z_F10->as_VMReg());
+  reg_def Z_F10_H(SOC, SOE, Op_RegF, 99, Z_F10->as_VMReg()->next());
+  reg_def Z_F11  (SOC, SOE, Op_RegF, 11, Z_F11->as_VMReg());
+  reg_def Z_F11_H(SOC, SOE, Op_RegF, 99, Z_F11->as_VMReg()->next());
+  reg_def Z_F12  (SOC, SOE, Op_RegF, 12, Z_F12->as_VMReg());
+  reg_def Z_F12_H(SOC, SOE, Op_RegF, 99, Z_F12->as_VMReg()->next());
+  reg_def Z_F13  (SOC, SOE, Op_RegF, 13, Z_F13->as_VMReg());
+  reg_def Z_F13_H(SOC, SOE, Op_RegF, 99, Z_F13->as_VMReg()->next());
+  reg_def Z_F14  (SOC, SOE, Op_RegF, 14, Z_F14->as_VMReg());
+  reg_def Z_F14_H(SOC, SOE, Op_RegF, 99, Z_F14->as_VMReg()->next());
+  reg_def Z_F15  (SOC, SOE, Op_RegF, 15, Z_F15->as_VMReg());
+  reg_def Z_F15_H(SOC, SOE, Op_RegF, 99, Z_F15->as_VMReg()->next());
+
+
+  // Special Registers
+
+  // Condition Codes Flag Registers
+
+  // z/Architecture has the PSW (program status word) that contains
+  // (among other information) the condition code. We treat this
+  // part of the PSW as a condition register CR. It consists of 4
+  // bits. Floating point instructions influence the same condition register CR.
+
+  reg_def Z_CR(SOC, SOC, Op_RegFlags, 0, Z_CR->as_VMReg());   // volatile
+
+
+// Specify priority of register selection within phases of register
+// allocation. Highest priority is first. A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, and choose no-save registers before save-on-call, and
+// save-on-call before save-on-entry. Registers which participate in
+// fix calling sequences should come last. Registers which are used
+// as pairs must fall on an even boundary.
+
+// It's worth about 1% on SPEC geomean to get this right.
+
+// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
+// in adGlobals_s390.hpp which defines the <register>_num values, e.g.
+// Z_R3_num. Therefore, Z_R3_num may not be (and in reality is not)
+// the same as Z_R3->encoding()! Furthermore, we cannot make any
+// assumptions on ordering, e.g. Z_R3_num may be less than Z_R2_num.
+// Additionally, the function
+//   static enum RC rc_class(OptoReg::Name reg)
+// maps a given <register>_num value to its chunk type (except for flags)
+// and its current implementation relies on chunk0 and chunk1 having a
+// size of 64 each.
+
+alloc_class chunk0(
+  // chunk0 contains *all* 32 integer registers halves.
+
+  // potential SOE regs
+  Z_R13,Z_R13_H,
+  Z_R12,Z_R12_H,
+  Z_R11,Z_R11_H,
+  Z_R10,Z_R10_H,
+
+  Z_R9,Z_R9_H,
+  Z_R8,Z_R8_H,
+  Z_R7,Z_R7_H,
+
+  Z_R1,Z_R1_H,
+  Z_R0,Z_R0_H,
+
+  // argument registers
+  Z_R6,Z_R6_H,
+  Z_R5,Z_R5_H,
+  Z_R4,Z_R4_H,
+  Z_R3,Z_R3_H,
+  Z_R2,Z_R2_H,
+
+  // special registers
+  Z_R14,Z_R14_H,
+  Z_R15,Z_R15_H
+);
+
+alloc_class chunk1(
+  // Chunk1 contains *all* 64 floating-point registers halves.
+
+  Z_F15,Z_F15_H,
+  Z_F14,Z_F14_H,
+  Z_F13,Z_F13_H,
+  Z_F12,Z_F12_H,
+  Z_F11,Z_F11_H,
+  Z_F10,Z_F10_H,
+  Z_F9,Z_F9_H,
+  Z_F8,Z_F8_H,
+  // scratch register
+  Z_F7,Z_F7_H,
+  Z_F5,Z_F5_H,
+  Z_F3,Z_F3_H,
+  Z_F1,Z_F1_H,
+  // argument registers
+  Z_F6,Z_F6_H,
+  Z_F4,Z_F4_H,
+  Z_F2,Z_F2_H,
+  Z_F0,Z_F0_H
+);
+
+alloc_class chunk2(
+  Z_CR
+);
+
+
+//-------Architecture Description Register Classes-----------------------
+
+// Several register classes are automatically defined based upon
+// information in this architecture description.
+
+// 1) reg_class inline_cache_reg           (as defined in frame section)
+// 2) reg_class compiler_method_oop_reg    (as defined in frame section)
+// 2) reg_class interpreter_method_oop_reg (as defined in frame section)
+// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */)
+
+// Integer Register Classes
+reg_class z_int_reg(
+/*Z_R0*/              // R0
+/*Z_R1*/
+  Z_R2,
+  Z_R3,
+  Z_R4,
+  Z_R5,
+  Z_R6,
+  Z_R7,
+/*Z_R8,*/             // Z_thread
+  Z_R9,
+  Z_R10,
+  Z_R11,
+  Z_R12,
+  Z_R13
+/*Z_R14*/             // return_pc
+/*Z_R15*/             // SP
+);
+
+reg_class z_no_odd_int_reg(
+/*Z_R0*/              // R0
+/*Z_R1*/
+  Z_R2,
+  Z_R3,
+  Z_R4,
+/*Z_R5,*/             // odd part of fix register pair
+  Z_R6,
+  Z_R7,
+/*Z_R8,*/             // Z_thread
+  Z_R9,
+  Z_R10,
+  Z_R11,
+  Z_R12,
+  Z_R13
+/*Z_R14*/             // return_pc
+/*Z_R15*/             // SP
+);
+
+reg_class z_no_arg_int_reg(
+/*Z_R0*/              // R0
+/*Z_R1*/              // scratch
+/*Z_R2*/
+/*Z_R3*/
+/*Z_R4*/
+/*Z_R5*/
+/*Z_R6*/
+  Z_R7,
+/*Z_R8*/              // Z_thread
+  Z_R9,
+  Z_R10,
+  Z_R11,
+  Z_R12,
+  Z_R13
+/*Z_R14*/             // return_pc
+/*Z_R15*/             // SP
+);
+
+reg_class z_rarg1_int_reg(Z_R2);
+reg_class z_rarg2_int_reg(Z_R3);
+reg_class z_rarg3_int_reg(Z_R4);
+reg_class z_rarg4_int_reg(Z_R5);
+reg_class z_rarg5_int_reg(Z_R6);
+
+// Pointer Register Classes
+
+// 64-bit build means 64-bit pointers means hi/lo pairs.
+
+reg_class z_rarg5_ptrN_reg(Z_R6);
+
+reg_class z_rarg1_ptr_reg(Z_R2_H,Z_R2);
+reg_class z_rarg2_ptr_reg(Z_R3_H,Z_R3);
+reg_class z_rarg3_ptr_reg(Z_R4_H,Z_R4);
+reg_class z_rarg4_ptr_reg(Z_R5_H,Z_R5);
+reg_class z_rarg5_ptr_reg(Z_R6_H,Z_R6);
+reg_class z_thread_ptr_reg(Z_R8_H,Z_R8);
+
+reg_class z_ptr_reg(
+/*Z_R0_H,Z_R0*/     // R0
+/*Z_R1_H,Z_R1*/
+  Z_R2_H,Z_R2,
+  Z_R3_H,Z_R3,
+  Z_R4_H,Z_R4,
+  Z_R5_H,Z_R5,
+  Z_R6_H,Z_R6,
+  Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/    // Z_thread
+  Z_R9_H,Z_R9,
+  Z_R10_H,Z_R10,
+  Z_R11_H,Z_R11,
+  Z_R12_H,Z_R12,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/   // return_pc
+/*Z_R15_H,Z_R15*/   // SP
+);
+
+reg_class z_lock_ptr_reg(
+/*Z_R0_H,Z_R0*/     // R0
+/*Z_R1_H,Z_R1*/
+  Z_R2_H,Z_R2,
+  Z_R3_H,Z_R3,
+  Z_R4_H,Z_R4,
+/*Z_R5_H,Z_R5,*/
+/*Z_R6_H,Z_R6,*/
+  Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/    // Z_thread
+  Z_R9_H,Z_R9,
+  Z_R10_H,Z_R10,
+  Z_R11_H,Z_R11,
+  Z_R12_H,Z_R12,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/   // return_pc
+/*Z_R15_H,Z_R15*/   // SP
+);
+
+reg_class z_no_arg_ptr_reg(
+/*Z_R0_H,Z_R0*/        // R0
+/*Z_R1_H,Z_R1*/        // scratch
+/*Z_R2_H,Z_R2*/
+/*Z_R3_H,Z_R3*/
+/*Z_R4_H,Z_R4*/
+/*Z_R5_H,Z_R5*/
+/*Z_R6_H,Z_R6*/
+  Z_R7_H, Z_R7,
+/*Z_R8_H,Z_R8*/        // Z_thread
+  Z_R9_H,Z_R9,
+  Z_R10_H,Z_R10,
+  Z_R11_H,Z_R11,
+  Z_R12_H,Z_R12,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/      // return_pc
+/*Z_R15_H,Z_R15*/      // SP
+);
+
+// Special class for storeP instructions, which can store SP or RPC to
+// TLS. (Note: Do not generalize this to "any_reg". If you add
+// another register, such as FP, to this mask, the allocator may try
+// to put a temp in it.)
+// Register class for memory access base registers,
+// This class is a superset of z_ptr_reg including Z_thread.
+reg_class z_memory_ptr_reg(
+/*Z_R0_H,Z_R0*/     // R0
+/*Z_R1_H,Z_R1*/
+  Z_R2_H,Z_R2,
+  Z_R3_H,Z_R3,
+  Z_R4_H,Z_R4,
+  Z_R5_H,Z_R5,
+  Z_R6_H,Z_R6,
+  Z_R7_H,Z_R7,
+  Z_R8_H,Z_R8,      // Z_thread
+  Z_R9_H,Z_R9,
+  Z_R10_H,Z_R10,
+  Z_R11_H,Z_R11,
+  Z_R12_H,Z_R12,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/   // return_pc
+/*Z_R15_H,Z_R15*/   // SP
+);
+
+// Other special pointer regs.
+reg_class z_r1_regP(Z_R1_H,Z_R1);
+reg_class z_r9_regP(Z_R9_H,Z_R9);
+
+
+// Long Register Classes
+
+reg_class z_rarg1_long_reg(Z_R2_H,Z_R2);
+reg_class z_rarg2_long_reg(Z_R3_H,Z_R3);
+reg_class z_rarg3_long_reg(Z_R4_H,Z_R4);
+reg_class z_rarg4_long_reg(Z_R5_H,Z_R5);
+reg_class z_rarg5_long_reg(Z_R6_H,Z_R6);
+
+// Longs in 1 register. Aligned adjacent hi/lo pairs.
+reg_class z_long_reg(
+/*Z_R0_H,Z_R0*/     // R0
+/*Z_R1_H,Z_R1*/
+  Z_R2_H,Z_R2,
+  Z_R3_H,Z_R3,
+  Z_R4_H,Z_R4,
+  Z_R5_H,Z_R5,
+  Z_R6_H,Z_R6,
+  Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/    // Z_thread
+  Z_R9_H,Z_R9,
+  Z_R10_H,Z_R10,
+  Z_R11_H,Z_R11,
+  Z_R12_H,Z_R12,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14,*/  // return_pc
+/*Z_R15_H,Z_R15*/   // SP
+);
+
+
+// Special Class for Condition Code Flags Register
+
+reg_class z_condition_reg(
+  Z_CR
+);
+
+// Scratch register for late profiling. Callee saved.
+reg_class z_rscratch2_bits64_reg(Z_R2_H, Z_R2);
+
+
+// Float Register Classes
+
+reg_class z_flt_reg(
+  Z_F0,
+/*Z_F1,*/ // scratch
+  Z_F2,
+  Z_F3,
+  Z_F4,
+  Z_F5,
+  Z_F6,
+  Z_F7,
+  Z_F8,
+  Z_F9,
+  Z_F10,
+  Z_F11,
+  Z_F12,
+  Z_F13,
+  Z_F14,
+  Z_F15
+);
+reg_class z_rscratch1_flt_reg(Z_F1);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+reg_class z_dbl_reg(
+  Z_F0,Z_F0_H,
+/*Z_F1,Z_F1_H,*/ // scratch
+  Z_F2,Z_F2_H,
+  Z_F3,Z_F3_H,
+  Z_F4,Z_F4_H,
+  Z_F5,Z_F5_H,
+  Z_F6,Z_F6_H,
+  Z_F7,Z_F7_H,
+  Z_F8,Z_F8_H,
+  Z_F9,Z_F9_H,
+  Z_F10,Z_F10_H,
+  Z_F11,Z_F11_H,
+  Z_F12,Z_F12_H,
+  Z_F13,Z_F13_H,
+  Z_F14,Z_F14_H,
+  Z_F15,Z_F15_H
+);
+reg_class z_rscratch1_dbl_reg(Z_F1,Z_F1_H);
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define 'name --> value' mappings to inform the ADLC of an integer valued name.
+// Current support includes integer values in the range [0, 0x7FFFFFFF].
+// Format:
+//        int_def  <name>         (<int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert(<name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+  // The default cost (of an ALU instruction).
+  int_def DEFAULT_COST      (   100,     100);
+  int_def DEFAULT_COST_LOW  (    80,      80);
+  int_def DEFAULT_COST_HIGH (   120,     120);
+  int_def HUGE_COST         (1000000, 1000000);
+
+  // Put an advantage on REG_MEM vs. MEM+REG_REG operations.
+  int_def ALU_REG_COST      (   100, DEFAULT_COST);
+  int_def ALU_MEMORY_COST   (   150,          150);
+
+  // Memory refs are twice as expensive as run-of-the-mill.
+  int_def MEMORY_REF_COST_HI (   220, 2 * DEFAULT_COST+20);
+  int_def MEMORY_REF_COST    (   200, 2 * DEFAULT_COST);
+  int_def MEMORY_REF_COST_LO (   180, 2 * DEFAULT_COST-20);
+
+  // Branches are even more expensive.
+  int_def BRANCH_COST       (   300, DEFAULT_COST * 3);
+  int_def CALL_COST         (   300, DEFAULT_COST * 3);
+%}
+
+source %{
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str)
+#define BIND(label)        __ bind(label)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+#define __ _masm.
+
+#define Z_DISP_SIZE Immediate::is_uimm12((long)opnd_array(1)->disp(ra_,this,2)) ?  4 : 6
+#define Z_DISP3_SIZE 6
+
+// Tertiary op of a LoadP or StoreP encoding.
+#define REGP_OP true
+
+// Given a register encoding, produce an Integer Register object.
+static Register reg_to_register_object(int register_encoding);
+
+// ****************************************************************************
+
+// REQUIRED FUNCTIONALITY
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+
+int MachCallStaticJavaNode::ret_addr_offset() {
+  if (_method) {
+    return 8;
+  } else {
+    return MacroAssembler::call_far_patchable_ret_addr_offset();
+  }
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+  // Consider size of receiver type profiling (C2 tiers).
+  int profile_receiver_type_size = 0;
+
+  int vtable_index = this->_vtable_index;
+  if (vtable_index == -4) {
+    return 14 + profile_receiver_type_size;
+  } else {
+    assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
+    return 36 + profile_receiver_type_size;
+  }
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  return 12 + MacroAssembler::call_far_patchable_ret_addr_offset();
+}
+
+// Compute padding required for nodes which need alignment
+//
+// The addresses of the call instructions needs to be 4-byte aligned to
+// ensure that they don't span a cache line so that they are atomically patchable.
+// The actual calls get emitted at different offsets within the node emitters.
+// ins_alignment needs to be set to 2 which means that up to 1 nop may get inserted.
+
+int CallStaticJavaDirect_dynTOCNode::compute_padding(int current_offset) const {
+  return (0 - current_offset) & 2;
+}
+
+int CallDynamicJavaDirect_dynTOCNode::compute_padding(int current_offset) const {
+  return (6 - current_offset) & 2;
+}
+
+int CallRuntimeDirectNode::compute_padding(int current_offset) const {
+  return (12 - current_offset) & 2;
+}
+
+int CallLeafDirectNode::compute_padding(int current_offset) const {
+  return (12 - current_offset) & 2;
+}
+
+int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
+  return (12 - current_offset) & 2;
+}
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since z/Architecture does not have absolute addressing, it does.
+bool SafePointNode::needs_polling_address_input() {
+  return true;
+}
+
+void emit_nop(CodeBuffer &cbuf) {
+  MacroAssembler _masm(&cbuf);
+  __ z_nop();
+}
+
+// Emit an interrupt that is caught by the debugger (for debugging compiler).
+void emit_break(CodeBuffer &cbuf) {
+  MacroAssembler _masm(&cbuf);
+  __ z_illtrap();
+}
+
+#if !defined(PRODUCT)
+void MachBreakpointNode::format(PhaseRegAlloc *, outputStream *os) const {
+  os->print("TA");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+static inline void z_emit16(CodeBuffer &cbuf, long value) {
+  // 32bit instructions may become sign extended.
+  assert(value >= 0, "unintended sign extension (int->long)");
+  assert(value < (1L << 16), "instruction too large");
+  *((unsigned short*)(cbuf.insts_end())) = (unsigned short)value;
+  cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned short));
+}
+
+static inline void z_emit32(CodeBuffer &cbuf, long value) {
+  // 32bit instructions may become sign extended.
+  assert(value < (1L << 32), "instruction too large");
+  *((unsigned int*)(cbuf.insts_end())) = (unsigned int)value;
+  cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned int));
+}
+
+static inline void z_emit48(CodeBuffer &cbuf, long value) {
+  // 32bit instructions may become sign extended.
+  assert(value >= 0, "unintended sign extension (int->long)");
+  assert(value < (1L << 48), "instruction too large");
+  value = value<<16;
+  memcpy(cbuf.insts_end(), (unsigned char*)&value, 6);
+  cbuf.set_insts_end(cbuf.insts_end() + 6);
+}
+
+static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
+  if (value < 0) {
+    // There obviously has been an unintended sign extension (int->long). Revert it.
+    value = (long)((unsigned long)((unsigned int)value));
+  }
+
+  if (value < (1L << 16)) { // 2-byte instruction
+    z_emit16(cbuf, value);
+    return 2;
+  }
+
+  if (value < (1L << 32)) { // 4-byte instruction, might be unaligned store
+    z_emit32(cbuf, value);
+    return 4;
+  }
+
+  // 6-byte instruction, probably unaligned store.
+  z_emit48(cbuf, value);
+  return 6;
+}
+
+// Check effective address (at runtime) for required alignment.
+static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
+  MacroAssembler _masm(&cbuf);
+
+  __ z_lay(Z_R0, disp, index, base);
+  __ z_nill(Z_R0, alignment-1);
+  __ z_brc(Assembler::bcondEqual, +3);
+  __ z_illtrap();
+}
+
+int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
+                    PhaseRegAlloc* ra_, bool is_native_call = false) {
+  __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
+  address old_mark = __ inst_mark();
+  unsigned int start_off = __ offset();
+
+  if (is_native_call) {
+    ShouldNotReachHere();
+  }
+
+  if (rtype == relocInfo::runtime_call_w_cp_type) {
+    assert((__ offset() & 2) == 0, "misaligned emit_call_reloc");
+    address call_addr = __ call_c_opt((address)entry_point);
+    if (call_addr == NULL) {
+      Compile::current()->env()->record_out_of_memory_failure();
+      return -1;
+    }
+  } else {
+    assert(rtype == relocInfo::none || rtype == relocInfo::opt_virtual_call_type ||
+           rtype == relocInfo::static_call_type, "unexpected rtype");
+    __ relocate(rtype);
+    // BRASL must be prepended with a nop to identify it in the instruction stream.
+    __ z_nop();
+    __ z_brasl(Z_R14, (address)entry_point);
+  }
+
+  unsigned int ret_off = __ offset();
+
+  return (ret_off - start_off);
+}
+
+static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
+  __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
+  address old_mark = __ inst_mark();
+  unsigned int start_off = __ offset();
+
+  relocInfo::relocType rtype = rspec.type();
+  assert(rtype == relocInfo::opt_virtual_call_type || rtype == relocInfo::static_call_type,
+         "unexpected rtype");
+
+  __ relocate(rspec);
+  __ z_nop();
+  __ z_brasl(Z_R14, (address)entry_point);
+
+  unsigned int ret_off = __ offset();
+
+  return (ret_off - start_off);
+}
+
+//=============================================================================
+
+const RegMask& MachConstantBaseNode::_out_RegMask = _Z_PTR_REG_mask;
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+// Even with PC-relative TOC addressing, we still need this node.
+// Float loads/stores do not support PC-relative addresses.
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  MacroAssembler _masm(&cbuf);
+  Register Rtoc = as_Register(ra_->get_encode(this));
+  __ load_toc(Rtoc);
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  // PCrelative TOC access.
+  return 6;   // sizeof(LARL)
+}
+
+#if !defined(PRODUCT)
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  Register r = as_Register(ra_->get_encode(this));
+  st->print("LARL    %s,&constant_pool # MachConstantBaseNode", r->name());
+}
+#endif
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  Compile* C = ra_->C;
+  st->print_cr("--- MachPrologNode ---");
+  st->print("\t");
+  for (int i = 0; i < OptoPrologueNops; i++) {
+    st->print_cr("NOP"); st->print("\t");
+  }
+
+  if (VerifyThread) {
+    st->print_cr("Verify_Thread");
+    st->print("\t");
+  }
+
+  long framesize = C->frame_size_in_bytes();
+  int bangsize   = C->bang_size_in_bytes();
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them. But be
+  // careful, because some VM calls (such as call site linkage) can
+  // use several kilobytes of stack. But the stack safety zone should
+  // account for that. See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize) && UseStackBanging) {
+    st->print_cr("# stack bang"); st->print("\t");
+  }
+  st->print_cr("push_frame %d", (int)-framesize);
+  st->print("\t");
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  __ verify_thread();
+
+  size_t framesize = C->frame_size_in_bytes();
+  size_t bangsize  = C->bang_size_in_bytes();
+
+  assert(framesize % wordSize == 0, "must preserve wordSize alignment");
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them. But be
+  // careful, because some VM calls (such as call site linkage) can
+  // use several kilobytes of stack. But the stack safety zone should
+  // account for that. See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize) && UseStackBanging) {
+    __ generate_stack_overflow_check(bangsize);
+  }
+
+  assert(Immediate::is_uimm32((long)framesize), "to do: choose suitable types!");
+  __ save_return_pc();
+
+  // The z/Architecture abi is already accounted for in `framesize' via the
+  // 'out_preserve_stack_slots' declaration.
+  __ push_frame((unsigned int)framesize/*includes JIT ABI*/);
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+  // Variable size. Determine dynamically.
+  return MachNode::size(ra_);
+}
+
+int MachPrologNode::reloc() const {
+  // Return number of relocatable values contained in this instruction.
+  return 1; // One reloc entry for load_const(toc).
+}
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+  os->print_cr("epilog");
+  os->print("\t");
+  if (do_polling() && ra_->C->is_method_compilation()) {
+    os->print_cr("load_from_polling_page Z_R1_scratch");
+    os->print("\t");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  Compile* C = ra_->C;
+  __ verify_thread();
+
+  // If this does safepoint polling, then do it here.
+  bool need_polling = do_polling() && C->is_method_compilation();
+
+  // Touch the polling page.
+  // Part 1: get the page's address.
+  if (need_polling) {
+    AddressLiteral pp(os::get_polling_page());
+    __ load_const_optimized(Z_R1_scratch, pp);
+  }
+
+  // Pop frame, restore return_pc, and all stuff needed by interpreter.
+  // Pop frame by add insted of load (a penny saved is a penny got :-).
+  int frame_size_in_bytes = Assembler::align((C->frame_slots() << LogBytesPerInt), frame::alignment_in_bytes);
+  int retPC_offset        = frame_size_in_bytes + _z_abi16(return_pc);
+  if (Displacement::is_validDisp(retPC_offset)) {
+    __ z_lg(Z_R14, retPC_offset, Z_SP);
+    __ add2reg(Z_SP, frame_size_in_bytes);
+  } else {
+    __ add2reg(Z_SP, frame_size_in_bytes);
+    __ restore_return_pc();
+  }
+
+  // Touch the polling page,
+  // part 2: touch the page now.
+  if (need_polling) {
+    // We need to mark the code position where the load from the safepoint
+    // polling page was emitted as relocInfo::poll_return_type here.
+    __ relocate(relocInfo::poll_return_type);
+    __ load_from_polling_page(Z_R1_scratch);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  // variable size. determine dynamically.
+  return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+  // Return number of relocatable values contained in this instruction.
+  return 1; // One for load_from_polling_page.
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const {
+  assert(do_polling(), "no return for this epilog node");
+  return 0;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+  // Return the register class for the given register. The given register
+  // reg is a <register>_num value, which is an index into the MachRegisterNumbers
+  // enumeration in adGlobals_s390.hpp.
+
+  if (reg == OptoReg::Bad) {
+    return rc_bad;
+  }
+
+  // We have 32 integer register halves, starting at index 0.
+  if (reg < 32) {
+    return rc_int;
+  }
+
+  // We have 32 floating-point register halves, starting at index 32.
+  if (reg < 32+32) {
+    return rc_float;
+  }
+
+  // Between float regs & stack are the flags regs.
+  assert(reg >= OptoReg::stack0(), "blow up if spilling flags");
+  return rc_stack;
+}
+
+// Returns size as obtained from z_emit_instr.
+static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigned long opcode,
+                                   int reg, int offset, bool do_print, outputStream *os) {
+
+  if (cbuf) {
+    if (opcode > (1L<<32)) {
+      return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 48) |
+                         Assembler::simm20(offset) | Assembler::reg(Z_R0, 12, 48) | Assembler::regz(Z_SP, 16, 48));
+    } else {
+      return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 32) |
+                         Assembler::uimm12(offset, 20, 32) | Assembler::reg(Z_R0, 12, 32) | Assembler::regz(Z_SP, 16, 32));
+    }
+  }
+
+#if !defined(PRODUCT)
+  if (do_print) {
+    os->print("%s    %s,#%d[,SP]\t # MachCopy spill code",op_str, Matcher::regName[reg], offset);
+  }
+#endif
+  return (opcode > (1L << 32)) ? 6 : 4;
+}
+
+static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    __ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
+  }
+
+#if !defined(PRODUCT)
+  else if (do_print) {
+    os->print("MVC     %d(%d,SP),%d(SP)\t # MachCopy spill code",dst_off, len, src_off);
+  }
+#endif
+
+  return 6;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *os) const {
+  // Get registers to move.
+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+  enum RC src_hi_rc = rc_class(src_hi);
+  enum RC src_lo_rc = rc_class(src_lo);
+  enum RC dst_hi_rc = rc_class(dst_hi);
+  enum RC dst_lo_rc = rc_class(dst_lo);
+
+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+  bool is64 = (src_hi_rc != rc_bad);
+  assert(!is64 ||
+         ((src_lo&1) == 0 && src_lo+1 == src_hi && (dst_lo&1) == 0 && dst_lo+1 == dst_hi),
+         "expected aligned-adjacent pairs");
+
+  // Generate spill code!
+
+  if (src_lo == dst_lo && src_hi == dst_hi) {
+    return 0;            // Self copy, no move.
+  }
+
+  int  src_offset = ra_->reg2offset(src_lo);
+  int  dst_offset = ra_->reg2offset(dst_lo);
+  bool print = !do_size;
+  bool src12 = Immediate::is_uimm12(src_offset);
+  bool dst12 = Immediate::is_uimm12(dst_offset);
+
+  const char   *mnemo = NULL;
+  unsigned long opc = 0;
+
+  // Memory->Memory Spill. Use Z_R0 to hold the value.
+  if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+
+    assert(!is64 || (src_hi_rc==rc_stack && dst_hi_rc==rc_stack),
+           "expected same type of move for high parts");
+
+    if (src12 && dst12) {
+      return z_mvc_helper(cbuf, is64 ? 8 : 4, dst_offset, src_offset, print, os);
+    }
+
+    int r0 = Z_R0_num;
+    if (is64) {
+      return z_ld_st_helper(cbuf, "LG  ", LG_ZOPC, r0, src_offset, print, os) +
+             z_ld_st_helper(cbuf, "STG ", STG_ZOPC, r0, dst_offset, print, os);
+    }
+
+    return z_ld_st_helper(cbuf, "LY   ", LY_ZOPC, r0, src_offset, print, os) +
+           z_ld_st_helper(cbuf, "STY  ", STY_ZOPC, r0, dst_offset, print, os);
+  }
+
+  // Check for float->int copy. Requires a trip through memory.
+  if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
+    Unimplemented();  // Unsafe, do not remove!
+  }
+
+  // Check for integer reg-reg copy.
+  if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
+    if (cbuf) {
+      MacroAssembler _masm(cbuf);
+      Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
+      Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
+      __ z_lgr(Rdst, Rsrc);
+      return 4;
+    }
+#if !defined(PRODUCT)
+    // else
+    if (print) {
+      os->print("LGR     %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+    }
+#endif
+    return 4;
+  }
+
+  // Check for integer store.
+  if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
+    assert(!is64 || (src_hi_rc==rc_int && dst_hi_rc==rc_stack),
+           "expected same type of move for high parts");
+
+    if (is64) {
+      return z_ld_st_helper(cbuf, "STG ", STG_ZOPC, src_lo, dst_offset, print, os);
+    }
+
+    // else
+    mnemo = dst12 ? "ST  " : "STY ";
+    opc = dst12 ? ST_ZOPC : STY_ZOPC;
+
+    return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+  }
+
+  // Check for integer load
+  // Always load cOops zero-extended. That doesn't hurt int loads.
+  if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
+
+    assert(!is64 || (dst_hi_rc==rc_int && src_hi_rc==rc_stack),
+           "expected same type of move for high parts");
+
+    mnemo = is64 ? "LG  " : "LLGF";
+    opc = is64 ? LG_ZOPC : LLGF_ZOPC;
+
+    return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+  }
+
+  // Check for float reg-reg copy.
+  if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
+    if (cbuf) {
+      MacroAssembler _masm(cbuf);
+      FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
+      FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
+      __ z_ldr(Rdst, Rsrc);
+      return 2;
+    }
+#if !defined(PRODUCT)
+    // else
+    if (print) {
+      os->print("LDR      %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+    }
+#endif
+    return 2;
+  }
+
+  // Check for float store.
+  if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
+    assert(!is64 || (src_hi_rc==rc_float && dst_hi_rc==rc_stack),
+           "expected same type of move for high parts");
+
+    if (is64) {
+      mnemo = dst12 ? "STD  " : "STDY ";
+      opc = dst12 ? STD_ZOPC : STDY_ZOPC;
+      return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+    }
+    // else
+
+    mnemo = dst12 ? "STE  " : "STEY ";
+    opc = dst12 ? STE_ZOPC : STEY_ZOPC;
+    return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+  }
+
+  // Check for float load.
+  if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
+    assert(!is64 || (dst_hi_rc==rc_float && src_hi_rc==rc_stack),
+           "expected same type of move for high parts");
+
+    if (is64) {
+      mnemo = src12 ? "LD   " : "LDY  ";
+      opc = src12 ? LD_ZOPC : LDY_ZOPC;
+      return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+    }
+    // else
+
+    mnemo = src12 ? "LE   " : "LEY  ";
+    opc = src12 ? LE_ZOPC : LEY_ZOPC;
+    return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+  }
+
+  // --------------------------------------------------------------------
+  // Check for hi bits still needing moving. Only happens for misaligned
+  // arguments to native calls.
+  if (src_hi == dst_hi) {
+    return 0;               // Self copy, no move.
+  }
+
+  assert(is64 && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
+  Unimplemented();  // Unsafe, do not remove!
+
+  return 0; // never reached, but make the compiler shut up!
+}
+
+#if !defined(PRODUCT)
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+  if (ra_ && ra_->node_regs_max_index() > 0) {
+    implementation(NULL, ra_, false, os);
+  } else {
+    if (req() == 2 && in(1)) {
+      os->print("N%d = N%d\n", _idx, in(1)->_idx);
+    } else {
+      const char *c = "(";
+      os->print("N%d = ", _idx);
+      for (uint i = 1; i < req(); ++i) {
+        os->print("%sN%d", c, in(i)->_idx);
+        c = ", ";
+      }
+      os->print(")");
+    }
+  }
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return implementation(NULL, ra_, true, NULL);
+}
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
+  os->print("NOP     # pad for alignment (%d nops, %d bytes)", _count, _count*MacroAssembler::nop_size());
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
+  MacroAssembler _masm(&cbuf);
+
+  int rem_space = 0;
+  if (!(ra_->C->in_scratch_emit_size())) {
+    rem_space = cbuf.insts()->remaining();
+    if (rem_space <= _count*2 + 8) {
+      tty->print("NopNode: _count = %3.3d, remaining space before = %d", _count, rem_space);
+    }
+  }
+
+  for (int i = 0; i < _count; i++) {
+    __ z_nop();
+  }
+
+  if (!(ra_->C->in_scratch_emit_size())) {
+    if (rem_space <= _count*2 + 8) {
+      int rem_space2 = cbuf.insts()->remaining();
+      tty->print_cr(", after = %d", rem_space2);
+    }
+  }
+}
+
+uint MachNopNode::size(PhaseRegAlloc *ra_) const {
+   return 2 * _count;
+}
+
+#if !defined(PRODUCT)
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  if (ra_ && ra_->node_regs_max_index() > 0) {
+    int reg = ra_->get_reg_first(this);
+    os->print("ADDHI  %s, SP, %d\t//box node", Matcher::regName[reg], offset);
+  } else {
+    os->print("ADDHI  N%d = SP + %d\t// box node", _idx, offset);
+  }
+}
+#endif
+
+// Take care of the size function, if you make changes here!
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_encode(this);
+  __ z_lay(as_Register(reg), offset, Z_SP);
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
+  return 6;
+}
+
+ %} // end source section
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+//--------------------------------------------------------------
+// Used for optimization in Compile::Shorten_branches
+//--------------------------------------------------------------
+
+class CallStubImpl {
+ public:
+
+  // call trampolines
+  // Size of call trampoline stub. For add'l comments, see size_java_to_interp().
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // call trampolines
+  // Number of relocations needed by a call trampoline stub.
+  static uint reloc_call_trampoline() {
+    return 0; // No call trampolines on this platform.
+  }
+};
+
+%} // end source_hpp section
+
+source %{
+
+#if !defined(PRODUCT)
+void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+  os->print_cr("---- MachUEPNode ----");
+  os->print_cr("\tTA");
+  os->print_cr("\tload_const Z_R1, SharedRuntime::get_ic_miss_stub()");
+  os->print_cr("\tBR(Z_R1)");
+  os->print_cr("\tTA  # pad with illtraps");
+  os->print_cr("\t...");
+  os->print_cr("\tTA");
+  os->print_cr("\tLTGR    Z_R2, Z_R2");
+  os->print_cr("\tBRU     ic_miss");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  const int ic_miss_offset = 2;
+
+  // Inline_cache contains a klass.
+  Register ic_klass = as_Register(Matcher::inline_cache_reg_encode());
+  // ARG1 is the receiver oop.
+  Register R2_receiver = Z_ARG1;
+  int      klass_offset = oopDesc::klass_offset_in_bytes();
+  AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
+  Register R1_ic_miss_stub_addr = Z_R1_scratch;
+
+  // Null check of receiver.
+  // This is the null check of the receiver that actually should be
+  // done in the caller. It's here because in case of implicit null
+  // checks we get it for free.
+  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
+         "second word in oop should not require explicit null check.");
+  if (!ImplicitNullChecks) {
+    Label valid;
+    if (VM_Version::has_CompareBranch()) {
+      __ z_cgij(R2_receiver, 0, Assembler::bcondNotEqual, valid);
+    } else {
+      __ z_ltgr(R2_receiver, R2_receiver);
+      __ z_bre(valid);
+    }
+    // The ic_miss_stub will handle the null pointer exception.
+    __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
+    __ z_br(R1_ic_miss_stub_addr);
+    __ bind(valid);
+  }
+
+  // Check whether this method is the proper implementation for the class of
+  // the receiver (ic miss check).
+  {
+    Label valid;
+    // Compare cached class against klass from receiver.
+    // This also does an implicit null check!
+    __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
+    __ z_bre(valid);
+    // The inline cache points to the wrong method. Call the
+    // ic_miss_stub to find the proper method.
+    __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
+    __ z_br(R1_ic_miss_stub_addr);
+    __ bind(valid);
+  }
+
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+  // Determine size dynamically.
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+%} // interrupt source section
+
+source_hpp %{ // Header information of the source block.
+
+class HandlerImpl {
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    return NativeJump::max_instruction_size();
+  }
+
+  static uint size_deopt_handler() {
+    return NativeCall::max_instruction_size();
+  }
+};
+
+%} // end source_hpp section
+
+source %{
+
+// This exception handler code snippet is placed after the method's
+// code. It is the return point if an exception occurred. it jumps to
+// the exception blob.
+//
+// If the method gets deoptimized, the method and this code snippet
+// get patched.
+//
+// 1) Trampoline code gets patched into the end of this exception
+//   handler. the trampoline code jumps to the deoptimization blob.
+//
+// 2) The return address in the method's code will get patched such
+//   that it jumps to the trampoline.
+//
+// 3) The handler will get patched such that it does not jump to the
+//   exception blob, but to an entry in the deoptimization blob being
+//   aware of the exception.
+int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
+  Register temp_reg = Z_R1;
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL) {
+    return 0;          // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+  // Use unconditional pc-relative jump with 32-bit range here.
+  __ load_const_optimized(temp_reg, (address)OptoRuntime::exception_blob()->content_begin());
+  __ z_br(temp_reg);
+
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+
+  __ end_a_stub();
+
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+  MacroAssembler _masm(&cbuf);
+  address        base = __ start_a_stub(size_deopt_handler());
+
+  if (base == NULL) {
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  // Size_deopt_handler() must be exact on zarch, so for simplicity
+  // we do not use load_const_opt here.
+  __ load_const(Z_R1, SharedRuntime::deopt_blob()->unpack());
+  __ call(Z_R1);
+  assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
+
+  __ end_a_stub();
+  return offset;
+}
+
+//=============================================================================
+
+
+// Given a register encoding, produce an Integer Register object.
+static Register reg_to_register_object(int register_encoding) {
+  assert(Z_R12->encoding() == Z_R12_enc, "wrong coding");
+  return as_Register(register_encoding);
+}
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode)) return false;
+
+  switch (opcode) {
+    case Op_CountLeadingZerosI:
+    case Op_CountLeadingZerosL:
+    case Op_CountTrailingZerosI:
+    case Op_CountTrailingZerosL:
+      // Implementation requires FLOGR instruction.
+      return UseCountLeadingZerosInstruction;
+
+    case Op_ReverseBytesI:
+    case Op_ReverseBytesL:
+      return UseByteReverseInstruction;
+
+    // PopCount supported by H/W from z/Architecture G5 (z196) on.
+    case Op_PopCountI:
+    case Op_PopCountL:
+      return UsePopCountInstruction && VM_Version::has_PopCount();
+
+    case Op_StrComp:
+      return SpecialStringCompareTo;
+    case Op_StrEquals:
+      return SpecialStringEquals;
+    case Op_StrIndexOf:
+    case Op_StrIndexOfChar:
+      return SpecialStringIndexOf;
+
+    case Op_GetAndAddI:
+    case Op_GetAndAddL:
+      return true;
+      // return VM_Version::has_AtomicMemWithImmALUOps();
+    case Op_GetAndSetI:
+    case Op_GetAndSetL:
+    case Op_GetAndSetP:
+    case Op_GetAndSetN:
+      return true;  // General CAS implementation, always available.
+
+    default:
+      return true;  // Per default match rules are supported.
+                    // BUT: make sure match rule is not disabled by a false predicate!
+  }
+
+  return true;  // Per default match rules are supported.
+                // BUT: make sure match rule is not disabled by a false predicate!
+}
+
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+  // TODO
+  // Identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+  ShouldNotReachHere();
+  return regnum - 32; // The FP registers are in the second chunk.
+}
+
+const bool Matcher::has_predicated_vectors(void) {
+  return false;
+}
+
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
+const bool Matcher::convL2FSupported(void) {
+  return true; // False means that conversion is done by runtime call.
+}
+
+//----------SUPERWORD HELPERS----------------------------------------
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  assert(MaxVectorSize == 8, "");
+  return 8;
+}
+
+// Vector ideal reg.
+const int Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize == 8 && size == 8, "");
+  return Op_RegL;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  return max_vector_size(bt); // Same as max.
+}
+
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+  fatal("vector shift is not supported");
+  return Node::NotAMachineReg;
+}
+
+// z/Architecture does support misaligned store/load at minimal extra cost.
+const bool Matcher::misaligned_vectors_ok() {
+  return true;
+}
+
+// Not yet ported to z/Architecture.
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+// RETURNS: whether this branch offset is short enough that a short
+// branch can be used.
+//
+// If the platform does not provide any short branch variants, then
+// this method should return `false' for offset 0.
+//
+// `Compile::Fill_buffer' will decide on basis of this information
+// whether to do the pass `Compile::Shorten_branches' at all.
+//
+// And `Compile::Shorten_branches' will decide on basis of this
+// information whether to replace particular branch sites by short
+// ones.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // On zarch short branches use a 16 bit signed immediate that
+  // is the pc-relative offset in halfword (= 2 bytes) units.
+  return Assembler::is_within_range_of_RelAddr16((address)((long)offset), (address)0);
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Probably always true, even if a temp register is required.
+  return true;
+}
+
+// Should correspond to setting above
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
+const int Matcher::long_cmove_cost() { return ConditionalMoveLimit; }
+
+// Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
+// Does the CPU require postalloc expand (see block.cpp for description of postalloc expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+// 32bit shifts mask in emitter, 64bit shifts need no mask.
+// Constant shift counts are handled in Ideal phase.
+const bool Matcher::need_masked_shift_count = false;
+
+// Set this as clone_shift_expressions.
+bool Matcher::narrow_oop_use_complex_address() {
+  if (Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0) return true;
+  return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+  NOT_LP64(ShouldNotCallThis());
+  assert(UseCompressedClassPointers, "only for compressed klass code");
+  // TODO HS25: z port if (MatchDecodeNodes) return true;
+  return false;
+}
+
+bool Matcher::const_oop_prefer_decode() {
+  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
+  return Universe::narrow_oop_base() == NULL;
+}
+
+bool Matcher::const_klass_prefer_decode() {
+  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
+  return Universe::narrow_klass_base() == NULL;
+}
+
+// Is it better to copy float constants, or load them directly from memory?
+// Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed. Else we split the double into 2 integer pieces and move it
+// piece-by-piece. Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = true;
+
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Do floats take an entire double register or just half?
+//
+// A float in resides in a zarch double register. When storing it by
+// z_std, it cannot be restored in C-code by reloading it as a double
+// and casting it into a float afterwards.
+bool Matcher::float_in_double() { return false; }
+
+// Do ints take an entire long register or just half?
+// The relevant question is how the int is callee-saved:
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits.
+const bool Matcher::int_in_long = true;
+
+// Constants for c2c and c calling conventions.
+
+const MachRegisterNumbers z_iarg_reg[5] = {
+  Z_R2_num, Z_R3_num, Z_R4_num, Z_R5_num, Z_R6_num
+};
+
+const MachRegisterNumbers z_farg_reg[4] = {
+  Z_F0_num, Z_F2_num, Z_F4_num, Z_F6_num
+};
+
+const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+
+const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+// Return whether or not this register is ever used as an argument. This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg(int reg) {
+  // We return true for all registers contained in z_iarg_reg[] and
+  // z_farg_reg[] and their virtual halves.
+  // We must include the virtual halves in order to get STDs and LDs
+  // instead of STWs and LWs in the trampoline stubs.
+
+  if (reg == Z_R2_num || reg == Z_R2_H_num ||
+      reg == Z_R3_num || reg == Z_R3_H_num ||
+      reg == Z_R4_num || reg == Z_R4_H_num ||
+      reg == Z_R5_num || reg == Z_R5_H_num ||
+      reg == Z_R6_num || reg == Z_R6_H_num) {
+    return true;
+  }
+
+  if (reg == Z_F0_num || reg == Z_F0_H_num ||
+      reg == Z_F2_num || reg == Z_F2_H_num ||
+      reg == Z_F4_num || reg == Z_F4_H_num ||
+      reg == Z_F6_num || reg == Z_F6_H_num) {
+    return true;
+  }
+
+  return false;
+}
+
+bool Matcher::is_spillable_arg(int reg) {
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+  return false;
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+  return _Z_RARG4_INT_REG_mask;
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+  return _Z_RARG3_INT_REG_mask;
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+  return _Z_RARG4_LONG_REG_mask;
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+  return _Z_RARG3_LONG_REG_mask;
+}
+
+// Copied from sparc.
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return RegMask();
+}
+
+const bool Matcher::convi2l_type_required = true;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+  return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
+%} // source
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams. Encoding classes are parameterized macros used by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction. Operands specify their base encoding interface with the
+// interface keyword. There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried. CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried. MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried. COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+//
+// Instructions specify two basic values for encoding. Again, a function
+// is available to check if the constant displacement is an oop. They use the
+// ins_encode keyword to specify their encoding classes (which must be
+// a sequence of enc_class names, and their parameters, specified in
+// the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode. Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+  enc_class enc_unimplemented %{
+    MacroAssembler _masm(&cbuf);
+    __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
+  %}
+
+  enc_class enc_untested %{
+#ifdef ASSERT
+    MacroAssembler _masm(&cbuf);
+    __ untested("Untested mach node encoding in AD file.");
+#endif
+  %}
+
+  enc_class z_rrform(iRegI dst, iRegI src) %{
+    assert((($primary >> 14) & 0x03) == 0, "Instruction format error");
+    assert( ($primary >> 16)         == 0, "Instruction format error");
+    z_emit16(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,16) |
+             Assembler::reg($src$$reg,12,16));
+  %}
+
+  enc_class z_rreform(iRegI dst1, iRegI src2) %{
+    assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst1$$reg,24,32) |
+             Assembler::reg($src2$$reg,28,32));
+  %}
+
+  enc_class z_rrfform(iRegI dst1, iRegI src2, iRegI src3) %{
+    assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst1$$reg,24,32) |
+             Assembler::reg($src2$$reg,28,32) |
+             Assembler::reg($src3$$reg,16,32));
+  %}
+
+  enc_class z_riform_signed(iRegI dst, immI16 src) %{
+    assert((($primary>>30) & 0x03) == 2, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,32) |
+             Assembler::simm16($src$$constant,16,32));
+  %}
+
+  enc_class z_riform_unsigned(iRegI dst, uimmI16 src) %{
+    assert((($primary>>30) & 0x03) == 2, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,32) |
+             Assembler::uimm16($src$$constant,16,32));
+  %}
+
+  enc_class z_rieform_d(iRegI dst1, iRegI src3, immI src2) %{
+    assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst1$$reg,8,48) |
+             Assembler::reg($src3$$reg,12,48) |
+             Assembler::simm16($src2$$constant,16,48));
+  %}
+
+  enc_class z_rilform_signed(iRegI dst, immL32 src) %{
+    assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::simm32($src$$constant,16,48));
+  %}
+
+  enc_class z_rilform_unsigned(iRegI dst, uimmL32 src) %{
+    assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::uimm32($src$$constant,16,48));
+  %}
+
+  enc_class z_rsyform_const(iRegI dst, iRegI src1, immI src2) %{
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::reg($src1$$reg,12,48) |
+             Assembler::simm20($src2$$constant));
+  %}
+
+  enc_class z_rsyform_reg_reg(iRegI dst, iRegI src, iRegI shft) %{
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::reg($src$$reg,12,48) |
+             Assembler::reg($shft$$reg,16,48) |
+             Assembler::simm20(0));
+  %}
+
+  enc_class z_rxform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
+    assert((($primary>>30) & 0x03) == 1, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,32) |
+             Assembler::reg($src1$$reg,12,32) |
+             Assembler::reg($src2$$reg,16,32) |
+             Assembler::uimm12($con$$constant,20,32));
+  %}
+
+  enc_class z_rxform_imm_reg(iRegL dst, immL con, iRegL src) %{
+    assert((($primary>>30) & 0x03) == 1, "Instruction format error");
+    z_emit32(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,32) |
+             Assembler::reg($src$$reg,16,32) |
+             Assembler::uimm12($con$$constant,20,32));
+  %}
+
+  enc_class z_rxyform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::reg($src1$$reg,12,48) |
+             Assembler::reg($src2$$reg,16,48) |
+             Assembler::simm20($con$$constant));
+  %}
+
+  enc_class z_rxyform_imm_reg(iRegL dst, immL con, iRegL src) %{
+    z_emit48(cbuf, $primary |
+             Assembler::reg($dst$$reg,8,48) |
+             Assembler::reg($src$$reg,16,48) |
+             Assembler::simm20($con$$constant));
+  %}
+
+  // Direct memory arithmetic.
+  enc_class z_siyform(memoryRSY mem, immI8 src) %{
+    int      disp = $mem$$disp;
+    Register base = reg_to_register_object($mem$$base);
+    int      con  = $src$$constant;
+
+    assert(VM_Version::has_MemWithImmALUOps(), "unsupported CPU");
+    z_emit_inst(cbuf, $primary |
+                Assembler::regz(base,16,48) |
+                Assembler::simm20(disp) |
+                Assembler::simm8(con,8,48));
+  %}
+
+  enc_class z_silform(memoryRS mem, immI16 src) %{
+    z_emit_inst(cbuf, $primary |
+                Assembler::regz(reg_to_register_object($mem$$base),16,48) |
+                Assembler::uimm12($mem$$disp,20,48) |
+                Assembler::simm16($src$$constant,32,48));
+  %}
+
+  // Encoder for FP ALU reg/mem instructions (support only short displacements).
+  enc_class z_form_rt_memFP(RegF dst, memoryRX mem) %{
+    Register Ridx = $mem$$index$$Register;
+    if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+    if ($primary > (1L << 32)) {
+      z_emit_inst(cbuf, $primary |
+                  Assembler::reg($dst$$reg, 8, 48) |
+                  Assembler::uimm12($mem$$disp, 20, 48) |
+                  Assembler::reg(Ridx, 12, 48) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+    } else {
+      z_emit_inst(cbuf, $primary |
+                  Assembler::reg($dst$$reg, 8, 32) |
+                  Assembler::uimm12($mem$$disp, 20, 32) |
+                  Assembler::reg(Ridx, 12, 32) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, 32));
+    }
+  %}
+
+  enc_class z_form_rt_mem(iRegI dst, memory mem) %{
+    Register Ridx = $mem$$index$$Register;
+    if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+    if ($primary > (1L<<32)) {
+      z_emit_inst(cbuf, $primary |
+                  Assembler::reg($dst$$reg, 8, 48) |
+                  Assembler::simm20($mem$$disp) |
+                  Assembler::reg(Ridx, 12, 48) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+    } else {
+      z_emit_inst(cbuf, $primary |
+                  Assembler::reg($dst$$reg, 8, 32) |
+                  Assembler::uimm12($mem$$disp, 20, 32) |
+                  Assembler::reg(Ridx, 12, 32) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, 32));
+    }
+  %}
+
+  enc_class z_form_rt_mem_opt(iRegI dst, memory mem) %{
+    int isize = $secondary > 1L << 32 ? 48 : 32;
+    Register Ridx = $mem$$index$$Register;
+    if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+
+    if (Displacement::is_shortDisp((long)$mem$$disp)) {
+      z_emit_inst(cbuf, $secondary |
+                  Assembler::reg($dst$$reg, 8, isize) |
+                  Assembler::uimm12($mem$$disp, 20, isize) |
+                  Assembler::reg(Ridx, 12, isize) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, isize));
+    } else if (Displacement::is_validDisp((long)$mem$$disp)) {
+      z_emit_inst(cbuf, $primary |
+                  Assembler::reg($dst$$reg, 8, 48) |
+                  Assembler::simm20($mem$$disp) |
+                  Assembler::reg(Ridx, 12, 48) |
+                  Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+    } else {
+        MacroAssembler _masm(&cbuf);
+        __ load_const_optimized(Z_R1_scratch, $mem$$disp);
+        if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
+        z_emit_inst(cbuf, $secondary |
+                    Assembler::reg($dst$$reg, 8, isize) |
+                    Assembler::uimm12(0, 20, isize) |
+                    Assembler::reg(Z_R1_scratch, 12, isize) |
+                    Assembler::regz(reg_to_register_object($mem$$base), 16, isize));
+    }
+  %}
+
+  enc_class z_enc_brul(Label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+    __ z_brul(l);
+  %}
+
+  enc_class z_enc_bru(Label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+    __ z_bru(l);
+  %}
+
+  enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+    __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+  %}
+
+  enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+    __ z_brc((Assembler::branch_condition)$cmp$$cmpcode, l);
+  %}
+
+  enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+    Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+    unsigned long instr = $primary;
+    if (instr == CRJ_ZOPC) {
+      __ z_crj($src1$$Register, $src2$$Register, cc, l);
+    } else if (instr == CLRJ_ZOPC) {
+      __ z_clrj($src1$$Register, $src2$$Register, cc, l);
+    } else if (instr == CGRJ_ZOPC) {
+      __ z_cgrj($src1$$Register, $src2$$Register, cc, l);
+    } else {
+      guarantee(instr == CLGRJ_ZOPC, "opcode not implemented");
+      __ z_clgrj($src1$$Register, $src2$$Register, cc, l);
+    }
+  %}
+
+  enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+
+    unsigned long instr = $primary;
+    if (instr == CR_ZOPC) {
+      __ z_cr($src1$$Register, $src2$$Register);
+    } else if (instr == CLR_ZOPC) {
+      __ z_clr($src1$$Register, $src2$$Register);
+    } else if (instr == CGR_ZOPC) {
+      __ z_cgr($src1$$Register, $src2$$Register);
+    } else {
+      guarantee(instr == CLGR_ZOPC, "opcode not implemented");
+      __ z_clgr($src1$$Register, $src2$$Register);
+    }
+
+    __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+  %}
+
+  enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+
+    Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+    unsigned long instr = $primary;
+    if (instr == CIJ_ZOPC) {
+      __ z_cij($src1$$Register, $src2$$constant, cc, l);
+    } else if (instr == CLIJ_ZOPC) {
+      __ z_clij($src1$$Register, $src2$$constant, cc, l);
+    } else if (instr == CGIJ_ZOPC) {
+      __ z_cgij($src1$$Register, $src2$$constant, cc, l);
+    } else {
+      guarantee(instr == CLGIJ_ZOPC, "opcode not implemented");
+      __ z_clgij($src1$$Register, $src2$$constant, cc, l);
+    }
+  %}
+
+  enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
+    MacroAssembler _masm(&cbuf);
+    Label* p = $lbl$$label;
+
+    // 'p' is `NULL' when this encoding class is used only to
+    // determine the size of the encoded instruction.
+    // Use a bound dummy label in that case.
+    Label d;
+    __ bind(d);
+    Label& l = (NULL == p) ? d : *(p);
+
+    unsigned long instr = $primary;
+    if (instr == CHI_ZOPC) {
+      __ z_chi($src1$$Register, $src2$$constant);
+    } else if (instr == CLFI_ZOPC) {
+      __ z_clfi($src1$$Register, $src2$$constant);
+    } else if (instr == CGHI_ZOPC) {
+      __ z_cghi($src1$$Register, $src2$$constant);
+    } else {
+      guarantee(instr == CLGFI_ZOPC, "opcode not implemented");
+      __ z_clgfi($src1$$Register, $src2$$constant);
+    }
+
+    __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+  %}
+
+  // Call from Java to runtime.
+  enc_class z_enc_java_to_runtime_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    // Save return pc before call to the place where we need it, since
+    // callee doesn't.
+    unsigned int start_off = __ offset();
+    // Compute size of "larl + stg + call_c_opt".
+    const int size_of_code = 6 + 6 + MacroAssembler::call_far_patchable_size();
+    __ get_PC(Z_R14, size_of_code);
+    __ save_return_pc();
+    assert(__ offset() - start_off == 12, "bad prelude len: %d", __ offset() - start_off);
+
+    assert((__ offset() & 2) == 0, "misaligned z_enc_java_to_runtime_call");
+    address call_addr = __ call_c_opt((address)$meth$$method);
+    if (call_addr == NULL) {
+      Compile::current()->env()->record_out_of_memory_failure();
+      return;
+    }
+
+#ifdef ASSERT
+    // Plausibility check for size_of_code assumptions.
+    unsigned int actual_ret_off = __ offset();
+    assert(start_off + size_of_code == actual_ret_off, "wrong return_pc");
+#endif
+  %}
+
+  enc_class z_enc_java_static_call(method meth) %{
+    // Call to fixup routine. Fixup routine uses ScopeDesc info to determine
+    // whom we intended to call.
+    MacroAssembler _masm(&cbuf);
+    int ret_offset = 0;
+
+    if (!_method) {
+      ret_offset = emit_call_reloc(_masm, $meth$$method,
+                                   relocInfo::runtime_call_w_cp_type, ra_);
+    } else {
+      int method_index = resolved_method_index(cbuf);
+      if (_optimized_virtual) {
+        ret_offset = emit_call_reloc(_masm, $meth$$method,
+                                     opt_virtual_call_Relocation::spec(method_index));
+      } else {
+        ret_offset = emit_call_reloc(_masm, $meth$$method,
+                                     static_call_Relocation::spec(method_index));
+      }
+    }
+    assert(__ inst_mark() != NULL, "emit_call_reloc must set_inst_mark()");
+
+    if (_method) { // Emit stub for static call.
+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+      if (stub == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    }
+  %}
+
+  // Java dynamic call
+  enc_class z_enc_java_dynamic_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+    unsigned int start_off = __ offset();
+
+    int vtable_index = this->_vtable_index;
+    if (vtable_index == -4) {
+      Register ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
+      address virtual_call_oop_addr = NULL;
+
+      AddressLiteral empty_ic((address) Universe::non_oop_word());
+      virtual_call_oop_addr = __ pc();
+      bool success = __ load_const_from_toc(ic_reg, empty_ic);
+      if (!success) {
+        Compile::current()->env()->record_out_of_memory_failure();
+        return;
+      }
+
+      // Call to fixup routine. Fixup routine uses ScopeDesc info
+      // to determine who we intended to call.
+      int method_index = resolved_method_index(cbuf);
+      __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
+      unsigned int ret_off = __ offset();
+      assert(__ offset() - start_off == 6, "bad prelude len: %d", __ offset() - start_off);
+      ret_off += emit_call_reloc(_masm, $meth$$method, relocInfo::none, ra_);
+      assert(_method, "lazy_constant may be wrong when _method==null");
+    } else {
+      assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
+      // Go through the vtable. Get receiver klass. Receiver already
+      // checked for non-null. If we'll go thru a C2I adapter, the
+      // interpreter expects method in Z_method.
+      // Use Z_method to temporarily hold the klass oop. Z_R1_scratch is destroyed
+      // by load_heap_oop_not_null.
+      __ load_klass(Z_method, Z_R2);
+
+      int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
+      int v_off        = entry_offset + vtableEntry::method_offset_in_bytes();
+
+      if (Displacement::is_validDisp(v_off) ) {
+        // Can use load instruction with large offset.
+        __ z_lg(Z_method, Address(Z_method /*class oop*/, v_off /*method offset*/));
+      } else {
+        // Worse case, must load offset into register.
+        __ load_const(Z_R1_scratch, v_off);
+        __ z_lg(Z_method, Address(Z_method /*class oop*/, Z_R1_scratch /*method offset*/));
+      }
+      // NOTE: for vtable dispatches, the vtable entry will never be
+      // null. However it may very well end up in handle_wrong_method
+      // if the method is abstract for the particular class.
+      __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
+      // Call target. Either compiled code or C2I adapter.
+      __ z_basr(Z_R14, Z_R1_scratch);
+      unsigned int ret_off = __ offset();
+    }
+  %}
+
+  enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rdst = reg_to_register_object($dst$$reg);
+    Register Rsrc = reg_to_register_object($src$$reg);
+
+    // Don't emit code if operands are identical (same register).
+    if (Rsrc != Rdst) {
+      Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+
+      if (VM_Version::has_LoadStoreConditional()) {
+        __ z_locgr(Rdst, Rsrc, cc);
+      } else {
+        // Branch if not (cmp cr).
+        Label done;
+        __ z_brc(Assembler::inverse_condition(cc), done);
+        __ z_lgr(Rdst, Rsrc); // Used for int and long+ptr.
+        __ bind(done);
+      }
+    }
+  %}
+
+  enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rdst = reg_to_register_object($dst$$reg);
+    int      Csrc = $src$$constant;
+    Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+    Label done;
+    // Branch if not (cmp cr).
+    __ z_brc(Assembler::inverse_condition(cc), done);
+    if (Csrc == 0) {
+      // Don't set CC.
+      __ clear_reg(Rdst, true, false);  // Use for int, long & ptr.
+    } else {
+      __ z_lghi(Rdst, Csrc); // Use for int, long & ptr.
+    }
+    __ bind(done);
+  %}
+
+  enc_class z_enc_cctobool(iRegI res) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rres = reg_to_register_object($res$$reg);
+
+    if (VM_Version::has_LoadStoreConditional()) {
+      __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+      __ load_const_optimized(Rres, 1L);         // true  (succeed)
+      __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+    } else {
+      Label done;
+      __ load_const_optimized(Rres, 0L); // false (failed)
+      __ z_brne(done);                   // Assume true to be the common case.
+      __ load_const_optimized(Rres, 1L); // true  (succeed)
+      __ bind(done);
+    }
+  %}
+
+  enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rcomp = reg_to_register_object($compare_value$$reg);
+    Register Rnew  = reg_to_register_object($exchange_value$$reg);
+    Register Raddr = reg_to_register_object($addr_ptr$$reg);
+
+    __ z_cs(Rcomp, Rnew, 0, Raddr);
+  %}
+
+  enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rcomp = reg_to_register_object($compare_value$$reg);
+    Register Rnew  = reg_to_register_object($exchange_value$$reg);
+    Register Raddr = reg_to_register_object($addr_ptr$$reg);
+
+    __ z_csg(Rcomp, Rnew, 0, Raddr);
+  %}
+
+  enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rdst = reg_to_register_object($dst$$reg);
+    Register Rtmp = reg_to_register_object($tmp$$reg);
+    guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+    Label    retry;
+
+    // Iterate until swap succeeds.
+    __ z_llgf(Rtmp, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      __ z_csy(Rtmp, Rdst, $mem$$Address); // Try to store new value.
+      __ z_brne(retry);                    // Yikes, concurrent update, need to retry.
+    __ z_lgr(Rdst, Rtmp);                  // Exchanged value from memory is return value.
+  %}
+
+  enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
+    MacroAssembler _masm(&cbuf);
+    Register Rdst = reg_to_register_object($dst$$reg);
+    Register Rtmp = reg_to_register_object($tmp$$reg);
+    guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+    Label    retry;
+
+    // Iterate until swap succeeds.
+    __ z_lg(Rtmp, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      __ z_csg(Rtmp, Rdst, $mem$$Address); // Try to store new value.
+      __ z_brne(retry);                    // Yikes, concurrent update, need to retry.
+    __ z_lgr(Rdst, Rtmp);                  // Exchanged value from memory is return value.
+  %}
+
+%} // encode
+
+source %{
+
+  // Check whether outs are all Stores. If so, we can omit clearing the upper
+  // 32 bits after encoding.
+  static bool all_outs_are_Stores(const Node *n) {
+    for (DUIterator_Fast imax, k = n->fast_outs(imax); k < imax; k++) {
+      Node *out = n->fast_out(k);
+      if (!out->is_Mach() || out->as_Mach()->ideal_Opcode() != Op_StoreN) {
+        // Most other outs are SpillCopy, but there are various other.
+        // jvm98 has arond 9% Encodes where we return false.
+        return false;
+      }
+    }
+    return true;
+  }
+
+%} // source
+
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+
+frame %{
+  // What direction does stack grow in (assumed to be same for native & Java).
+  stack_direction(TOWARDS_LOW);
+
+  // These two registers define part of the calling convention between
+  // compiled code and the interpreter.
+
+  // Inline Cache Register
+  inline_cache_reg(Z_R9); // Z_inline_cache
+
+  // Argument pointer for I2C adapters
+  //
+  // Tos is loaded in run_compiled_code to Z_ARG5=Z_R6.
+  // interpreter_arg_ptr_reg(Z_R6);
+
+  // Temporary in compiled entry-points
+  // compiler_method_oop_reg(Z_R1);//Z_R1_scratch
+
+  // Method Oop Register when calling interpreter
+  interpreter_method_oop_reg(Z_R9);//Z_method
+
+  // Optional: name the operand used by cisc-spilling to access
+  // [stack_pointer + offset].
+  cisc_spilling_operand_name(indOffset12);
+
+  // Number of stack slots consumed by a Monitor enter.
+  sync_stack_slots(frame::jit_monitor_size_in_4_byte_units);
+
+  // Compiled code's Frame Pointer
+  //
+  // z/Architecture stack pointer
+  frame_pointer(Z_R15); // Z_SP
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors. I2CAdaptors convert from
+  // interpreted java to compiled java.
+  //
+  // Z_state holds pointer to caller's cInterpreter.
+  interpreter_frame_pointer(Z_R7); // Z_state
+
+  // Use alignment_in_bytes instead of log_2_of_alignment_in_bits.
+  stack_alignment(frame::alignment_in_bytes);
+
+  in_preserve_stack_slots(frame::jit_in_preserve_size_in_4_byte_units);
+
+  // A `slot' is assumed 4 bytes here!
+  // out_preserve_stack_slots(frame::jit_out_preserve_size_in_4_byte_units);
+
+  // Number of outgoing stack slots killed above the
+  // out_preserve_stack_slots for calls to C. Supports the var-args
+  // backing area for register parms.
+  varargs_C_out_slots_killed(((frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size));
+
+  // The after-PROLOG location of the return address. Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  return_addr(REG Z_R14);
+
+  // This is the body of the function
+  //
+  // void Matcher::calling_convention(OptoRegPair* sig /* array of ideal regs */,
+  //                                  uint length      /* length of array */,
+  //                                  bool is_outgoing)
+  //
+  // The `sig' array is to be updated. Sig[j] represents the location
+  // of the j-th argument, either a register or a stack slot.
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots. Passed an array
+  // of ideal registers called "sig" and a "length" count. Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE. Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+  calling_convention %{
+    // No difference between ingoing/outgoing just pass false.
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots. Passed an array
+  // of ideal registers called "sig" and a "length" count. Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE. Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+  c_calling_convention %{
+    // This is obviously always outgoing.
+    // C argument must be in register AND stack slot.
+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+  %}
+
+  // Location of native (C/C++) and interpreter return values. This
+  // is specified to be the same as Java. In the 32-bit VM, long
+  // values are actually returned from native calls in O0:O1 and
+  // returned to the interpreter in I0:I1. The copying to and from
+  // the register pairs is done by the appropriate call and epilog
+  // opcodes. This simplifies the register allocator.
+  //
+  // Use register pair for c return value.
+  c_return_value %{
+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values");
+    static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num };
+    static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num };
+    return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
+  %}
+
+  // Use register pair for return value.
+  // Location of compiled Java return values. Same as C
+  return_value %{
+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values");
+    static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num };
+    static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num };
+    return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
+  %}
+%}
+
+
+//----------ATTRIBUTES---------------------------------------------------------
+
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1);          // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+
+// Cost attribute. required.
+ins_attrib ins_cost(DEFAULT_COST);
+
+// Is this instruction a non-matching short branch variant of some
+// long branch? Not required.
+ins_attrib ins_short_branch(0);
+
+// Indicates this is a trap based check node and final control-flow fixup
+// must generate a proper fall through.
+ins_attrib ins_is_TrapBasedCheckNode(true);
+
+// Attribute of instruction to tell how many constants the instruction will generate.
+// (optional attribute). Default: 0.
+ins_attrib ins_num_consts(0);
+
+// Required alignment attribute (must be a power of 2)
+// specifies the alignment that some part of the instruction (not
+// necessarily the start) requires. If > 1, a compute_padding()
+// function must be provided for the instruction.
+//
+// WARNING: Don't use size(FIXED_SIZE) or size(VARIABLE_SIZE) in
+// instructions which depend on the proper alignment, because the
+// desired alignment isn't guaranteed for the call to "emit()" during
+// the size computation.
+ins_attrib ins_alignment(1);
+
+// Enforce/prohibit rematerializations.
+// - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
+//   then rematerialization of that instruction is prohibited and the
+//   instruction's value will be spilled if necessary.
+// - If an instruction is attributed with 'ins_should_rematerialize(true)'
+//   then rematerialization is enforced and the instruction's value will
+//   never get spilled. a copy of the instruction will be inserted if
+//   necessary.
+//   Note: this may result in rematerializations in front of every use.
+// (optional attribute)
+ins_attrib ins_cannot_rematerialize(false);
+ins_attrib ins_should_rematerialize(false);
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct
+// parsing in the ADLC because operands constitute user defined types
+// which are used in instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+// Please note:
+// Formats are generated automatically for constants and base registers.
+
+//----------------------------------------------
+// SIGNED (shorter than INT) immediate operands
+//----------------------------------------------
+
+// Byte Immediate: constant 'int -1'
+operand immB_minus1() %{
+  //         sign-ext constant      zero-ext constant
+  predicate((n->get_int() == -1) || ((n->get_int()&0x000000ff) == 0x000000ff));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Byte Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immB_n0m1() %{
+  //                             sign-ext constant     zero-ext constant
+  predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x000000ff) != 0x000000ff);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Short Immediate: constant 'int -1'
+operand immS_minus1() %{
+  //         sign-ext constant      zero-ext constant
+  predicate((n->get_int() == -1) || ((n->get_int()&0x0000ffff) == 0x0000ffff));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Short Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immS_n0m1() %{
+  //                             sign-ext constant     zero-ext constant
+  predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x0000ffff) != 0x0000ffff);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//-----------------------------------------
+//  SIGNED INT immediate operands
+//-----------------------------------------
+
+// Integer Immediate: 32-bit
+operand immI() %{
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Int Immediate: 20-bit
+operand immI20() %{
+  predicate(Immediate::is_simm20(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 16-bit
+operand immI16() %{
+  predicate(Immediate::is_simm16(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 8-bit
+operand immI8() %{
+  predicate(Immediate::is_simm8(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant 'int 0'
+operand immI_0() %{
+  predicate(n->get_int() == 0);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant 'int -1'
+operand immI_minus1() %{
+  predicate(n->get_int() == -1);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immI_n0m1() %{
+  predicate(n->get_int() != 0 && n->get_int() != -1);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//-------------------------------------------
+// UNSIGNED INT immediate operands
+//-------------------------------------------
+
+// Unsigned Integer Immediate: 32-bit
+operand uimmI() %{
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 16-bit
+operand uimmI16() %{
+  predicate(Immediate::is_uimm16(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 12-bit
+operand uimmI12() %{
+  predicate(Immediate::is_uimm12(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 12-bit
+operand uimmI8() %{
+  predicate(Immediate::is_uimm8(n->get_int()));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 6-bit
+operand uimmI6() %{
+  predicate(Immediate::is_uimm(n->get_int(), 6));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 5-bit
+operand uimmI5() %{
+  predicate(Immediate::is_uimm(n->get_int(), 5));
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Length for SS instructions, given in DWs,
+//   possible range [1..512], i.e. [8..4096] Bytes
+//   used     range [1..256], i.e. [8..2048] Bytes
+//   operand type int
+// Unsigned Integer Immediate: 9-bit
+operand SSlenDW() %{
+  predicate(Immediate::is_uimm8(n->get_long()-1));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//------------------------------------------
+// (UN)SIGNED INT specific values
+//------------------------------------------
+
+// Integer Immediate: the value 1
+operand immI_1() %{
+  predicate(n->get_int() == 1);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 16.
+operand immI_16() %{
+  predicate(n->get_int() == 16);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 24.
+operand immI_24() %{
+  predicate(n->get_int() == 24);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 255
+operand immI_255() %{
+  predicate(n->get_int() == 255);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the values 32-63
+operand immI_32_63() %{
+  predicate(n->get_int() >= 32 && n->get_int() <= 63);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: LL-part, extended by 1s.
+operand uimmI_LL1() %{
+  predicate((n->get_int() & 0xFFFF0000) == 0xFFFF0000);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: LH-part, extended by 1s.
+operand uimmI_LH1() %{
+  predicate((n->get_int() & 0xFFFF) == 0xFFFF);
+  match(ConI);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//------------------------------------------
+// SIGNED LONG immediate operands
+//------------------------------------------
+
+operand immL() %{
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: 32-bit
+operand immL32() %{
+  predicate(Immediate::is_simm32(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: 20-bit
+operand immL20() %{
+  predicate(Immediate::is_simm20(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: 16-bit
+operand immL16() %{
+  predicate(Immediate::is_simm16(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: 8-bit
+operand immL8() %{
+  predicate(Immediate::is_simm8(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//--------------------------------------------
+// UNSIGNED LONG immediate operands
+//--------------------------------------------
+
+operand uimmL32() %{
+  predicate(Immediate::is_uimm32(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 16-bit
+operand uimmL16() %{
+  predicate(Immediate::is_uimm16(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 12-bit
+operand uimmL12() %{
+  predicate(Immediate::is_uimm12(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 8-bit
+operand uimmL8() %{
+  predicate(Immediate::is_uimm8(n->get_long()));
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//-------------------------------------------
+// (UN)SIGNED LONG specific values
+//-------------------------------------------
+
+// Long Immediate: the value FF
+operand immL_FF() %{
+  predicate(n->get_long() == 0xFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFF
+operand immL_FFFF() %{
+  predicate(n->get_long() == 0xFFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFFFFFF
+operand immL_FFFFFFFF() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_0() %{
+  predicate(n->get_long() == 0L);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: LL-part, extended by 1s.
+operand uimmL_LL1() %{
+  predicate((n->get_long() & 0xFFFFFFFFFFFF0000L) == 0xFFFFFFFFFFFF0000L);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: LH-part, extended by 1s.
+operand uimmL_LH1() %{
+  predicate((n->get_long() & 0xFFFFFFFF0000FFFFL) == 0xFFFFFFFF0000FFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: HL-part, extended by 1s.
+operand uimmL_HL1() %{
+  predicate((n->get_long() & 0xFFFF0000FFFFFFFFL) == 0xFFFF0000FFFFFFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: HH-part, extended by 1s.
+operand uimmL_HH1() %{
+  predicate((n->get_long() & 0xFFFFFFFFFFFFL) == 0xFFFFFFFFFFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//--------------------------------------
+//  POINTER immediate operands
+//--------------------------------------
+
+// Pointer Immediate: 64-bit
+operand immP() %{
+  match(ConP);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 32-bit
+operand immP32() %{
+  predicate(Immediate::is_uimm32(n->get_ptr()));
+  match(ConP);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 16-bit
+operand immP16() %{
+  predicate(Immediate::is_uimm16(n->get_ptr()));
+  match(ConP);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 8-bit
+operand immP8() %{
+  predicate(Immediate::is_uimm8(n->get_ptr()));
+  match(ConP);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//-----------------------------------
+// POINTER specific values
+//-----------------------------------
+
+// Pointer Immediate: NULL
+operand immP0() %{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//---------------------------------------------
+// NARROW POINTER immediate operands
+//---------------------------------------------
+
+// Narrow Pointer Immediate
+operand immN() %{
+  match(ConN);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass() %{
+  match(ConNKlass);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow Pointer Immediate
+operand immN8() %{
+  predicate(Immediate::is_uimm8(n->get_narrowcon()));
+  match(ConN);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0() %{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// FLOAT and DOUBLE immediate operands
+
+// Double Immediate
+operand immD() %{
+  match(ConD);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate: +-0
+operand immDpm0() %{
+  predicate(n->getd() == 0);
+  match(ConD);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate: +0
+operand immDp0() %{
+  predicate(jlong_cast(n->getd()) == 0);
+  match(ConD);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF() %{
+  match(ConF);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate: +-0
+operand immFpm0() %{
+  predicate(n->getf() == 0);
+  match(ConF);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate: +0
+operand immFp0() %{
+  predicate(jint_cast(n->getf()) == 0);
+  match(ConF);
+  op_cost(1);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// End of Immediate Operands
+
+// Integer Register Operands
+// Integer Register
+operand iRegI() %{
+  constraint(ALLOC_IN_RC(z_int_reg));
+  match(RegI);
+  match(noArg_iRegI);
+  match(rarg1RegI);
+  match(rarg2RegI);
+  match(rarg3RegI);
+  match(rarg4RegI);
+  match(rarg5RegI);
+  match(noOdd_iRegI);
+  match(revenRegI);
+  match(roddRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand noArg_iRegI() %{
+  constraint(ALLOC_IN_RC(z_no_arg_int_reg));
+  match(RegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregi and roddRegI constitute and even-odd-pair.
+operand revenRegI() %{
+  constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregi and roddRegI constitute and even-odd-pair.
+operand roddRegI() %{
+  constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg1RegI() %{
+  constraint(ALLOC_IN_RC(z_rarg1_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg2RegI() %{
+  constraint(ALLOC_IN_RC(z_rarg2_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg3RegI() %{
+  constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg4RegI() %{
+  constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg5RegI() %{
+  constraint(ALLOC_IN_RC(z_rarg5_int_reg));
+  match(iRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand noOdd_iRegI() %{
+  constraint(ALLOC_IN_RC(z_no_odd_int_reg));
+  match(RegI);
+  match(revenRegI);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register
+operand iRegP() %{
+  constraint(ALLOC_IN_RC(z_ptr_reg));
+  match(RegP);
+  match(noArg_iRegP);
+  match(rarg1RegP);
+  match(rarg2RegP);
+  match(rarg3RegP);
+  match(rarg4RegP);
+  match(rarg5RegP);
+  match(revenRegP);
+  match(roddRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// thread operand
+operand threadRegP() %{
+  constraint(ALLOC_IN_RC(z_thread_ptr_reg));
+  match(RegP);
+  format %{ "Z_THREAD" %}
+  interface(REG_INTER);
+%}
+
+operand noArg_iRegP() %{
+  constraint(ALLOC_IN_RC(z_no_arg_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg1RegP() %{
+  constraint(ALLOC_IN_RC(z_rarg1_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg2RegP() %{
+  constraint(ALLOC_IN_RC(z_rarg2_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg3RegP() %{
+  constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg4RegP() %{
+  constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg5RegP() %{
+  constraint(ALLOC_IN_RC(z_rarg5_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand memoryRegP() %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(RegP);
+  match(iRegP);
+  match(threadRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregp and roddRegP constitute and even-odd-pair.
+operand revenRegP() %{
+  constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand roddRegP() %{
+  constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
+  match(iRegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand lock_ptr_RegP() %{
+  constraint(ALLOC_IN_RC(z_lock_ptr_reg));
+  match(RegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rscratch2RegP() %{
+  constraint(ALLOC_IN_RC(z_rscratch2_bits64_reg));
+  match(RegP);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegN() %{
+  constraint(ALLOC_IN_RC(z_int_reg));
+  match(RegN);
+  match(noArg_iRegN);
+  match(rarg1RegN);
+  match(rarg2RegN);
+  match(rarg3RegN);
+  match(rarg4RegN);
+  match(rarg5RegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand noArg_iRegN() %{
+  constraint(ALLOC_IN_RC(z_no_arg_int_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg1RegN() %{
+  constraint(ALLOC_IN_RC(z_rarg1_int_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg2RegN() %{
+  constraint(ALLOC_IN_RC(z_rarg2_int_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg3RegN() %{
+  constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg4RegN() %{
+  constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg5RegN() %{
+  constraint(ALLOC_IN_RC(z_rarg5_ptrN_reg));
+  match(iRegN);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long Register
+operand iRegL() %{
+  constraint(ALLOC_IN_RC(z_long_reg));
+  match(RegL);
+  match(revenRegL);
+  match(roddRegL);
+  match(rarg1RegL);
+  match(rarg5RegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand revenRegL() %{
+  constraint(ALLOC_IN_RC(z_rarg3_long_reg));
+  match(iRegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand roddRegL() %{
+  constraint(ALLOC_IN_RC(z_rarg4_long_reg));
+  match(iRegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg1RegL() %{
+  constraint(ALLOC_IN_RC(z_rarg1_long_reg));
+  match(iRegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rarg5RegL() %{
+  constraint(ALLOC_IN_RC(z_rarg5_long_reg));
+  match(iRegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Flag Registers
+operand flagsReg() %{
+  constraint(ALLOC_IN_RC(z_condition_reg));
+  match(RegFlags);
+  format %{ "CR" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Flag Registers for rules with result tuples
+operand TD_flagsReg() %{
+  constraint(ALLOC_IN_RC(z_condition_reg));
+  match(RegFlags);
+  format %{ "CR" %}
+  interface(REG_TUPLE_DEST_INTER);
+%}
+
+operand regD() %{
+  constraint(ALLOC_IN_RC(z_dbl_reg));
+  match(RegD);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rscratchRegD() %{
+  constraint(ALLOC_IN_RC(z_rscratch1_dbl_reg));
+  match(RegD);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand regF() %{
+  constraint(ALLOC_IN_RC(z_flt_reg));
+  match(RegF);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rscratchRegF() %{
+  constraint(ALLOC_IN_RC(z_rscratch1_flt_reg));
+  match(RegF);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_regP(iRegP reg) %{
+  constraint(ALLOC_IN_RC(z_r9_regP)); // inline_cache_reg
+  match(reg);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand compiler_method_oop_regP(iRegP reg) %{
+  constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_oop_reg
+  match(reg);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_regP(iRegP reg) %{
+  constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_oop_reg
+  match(reg);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Operands to remove register moves in unscaled mode.
+// Match read/write registers with an EncodeP node if neither shift nor add are required.
+operand iRegP2N(iRegP reg) %{
+  predicate(Universe::narrow_oop_shift() == 0 && _leaf->as_EncodeP()->in(0) == NULL);
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(EncodeP reg);
+  format %{ "$reg" %}
+  interface(REG_INTER)
+%}
+
+operand iRegN2P(iRegN reg) %{
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 &&
+            _leaf->as_DecodeN()->in(0) == NULL);
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(DecodeN reg);
+  format %{ "$reg" %}
+  interface(REG_INTER)
+%}
+
+
+//----------Complex Operands---------------------------------------------------
+
+// Indirect Memory Reference
+operand indirect(memoryRegP base) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(base);
+  op_cost(1);
+  format %{ "#0[,$base]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect with Offset (long)
+operand indOffset20(memoryRegP base, immL20 offset) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP base offset);
+  op_cost(1);
+  format %{ "$offset[,$base]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset20Narrow(iRegN base, immL20 offset) %{
+  predicate(Matcher::narrow_oop_use_complex_address());
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (DecodeN base) offset);
+  op_cost(1);
+  format %{ "$offset[,$base]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Offset (short)
+operand indOffset12(memoryRegP base, uimmL12 offset) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP base offset);
+  op_cost(1);
+  format %{ "$offset[[,$base]]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset12Narrow(iRegN base, uimmL12 offset) %{
+  predicate(Matcher::narrow_oop_use_complex_address());
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (DecodeN base) offset);
+  op_cost(1);
+  format %{ "$offset[[,$base]]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Register Index
+operand indIndex(memoryRegP base, iRegL index) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP base index);
+  op_cost(1);
+  format %{ "#0[($index,$base)]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect with Offset (long) and index
+operand indOffset20index(memoryRegP base, immL20 offset, iRegL index) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (AddP base index) offset);
+  op_cost(1);
+  format %{ "$offset[($index,$base)]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset20indexNarrow(iRegN base, immL20 offset, iRegL index) %{
+  predicate(Matcher::narrow_oop_use_complex_address());
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (AddP (DecodeN base) index) offset);
+  op_cost(1);
+  format %{ "$offset[($index,$base)]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Offset (short) and index
+operand indOffset12index(memoryRegP base, uimmL12 offset, iRegL index) %{
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (AddP base index) offset);
+  op_cost(1);
+  format %{ "$offset[[($index,$base)]]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset12indexNarrow(iRegN base, uimmL12 offset, iRegL index) %{
+  predicate(Matcher::narrow_oop_use_complex_address());
+  constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+  match(AddP (AddP (DecodeN base) index) offset);
+  op_cost(1);
+  format %{ "$offset[[($index,$base)]]" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+//----------Special Memory Operands--------------------------------------------
+
+// Stack Slot Operand
+// This operand is used for loading and storing temporary values on
+// the stack where a match requires a value to flow through memory.
+operand stackSlotI(sRegI reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(1);
+  format %{ "[$reg(stackSlotI)]" %}
+  interface(MEMORY_INTER) %{
+    base(0xf);   // Z_SP
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($reg);  // stack offset
+  %}
+%}
+
+operand stackSlotP(sRegP reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(1);
+  format %{ "[$reg(stackSlotP)]" %}
+  interface(MEMORY_INTER) %{
+    base(0xf);   // Z_SP
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(1);
+  format %{ "[$reg(stackSlotF)]" %}
+  interface(MEMORY_INTER) %{
+    base(0xf);   // Z_SP
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(1);
+  //match(RegD);
+  format %{ "[$reg(stackSlotD)]" %}
+  interface(MEMORY_INTER) %{
+    base(0xf);   // Z_SP
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(1);  //match(RegL);
+  format %{ "[$reg(stackSlotL)]" %}
+  interface(MEMORY_INTER) %{
+    base(0xf);   // Z_SP
+    index(0xffffFFFF); // noreg
+    scale(0x0);
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+// Operands for expressing Control Flow
+// NOTE: Label is a predefined operand which should not be redefined in
+// the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below.
+
+// INT cmpOps for CompareAndBranch and CompareAndTrap instructions should not
+// have mask bit #3 set.
+operand cmpOpT() %{
+  match(Bool);
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x8);         // Assembler::bcondEqual
+    not_equal(0x6);     // Assembler::bcondNotEqual
+    less(0x4);          // Assembler::bcondLow
+    greater_equal(0xa); // Assembler::bcondNotLow
+    less_equal(0xc);    // Assembler::bcondNotHigh
+    greater(0x2);       // Assembler::bcondHigh
+    overflow(0x1);      // Assembler::bcondOverflow
+    no_overflow(0xe);   // Assembler::bcondNotOverflow
+  %}
+%}
+
+// When used for floating point comparisons: unordered is treated as less.
+operand cmpOpF() %{
+  match(Bool);
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x8);
+    not_equal(0x7);     // Includes 'unordered'.
+    less(0x5);          // Includes 'unordered'.
+    greater_equal(0xa);
+    less_equal(0xd);    // Includes 'unordered'.
+    greater(0x2);
+    overflow(0x0);      // Not meaningful on z/Architecture.
+    no_overflow(0x0);   // leave unchanged (zero) therefore
+  %}
+%}
+
+// "Regular" cmpOp for int comparisons, includes bit #3 (overflow).
+operand cmpOp() %{
+  match(Bool);
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x8);
+    not_equal(0x7);     // Includes 'unordered'.
+    less(0x5);          // Includes 'unordered'.
+    greater_equal(0xa);
+    less_equal(0xd);    // Includes 'unordered'.
+    greater(0x2);
+    overflow(0x1);      // Assembler::bcondOverflow
+    no_overflow(0xe);   // Assembler::bcondNotOverflow
+  %}
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used to simplify
+// instruction definitions by not requiring the AD writer to specify
+// seperate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format.  The classic case of this is memory operands.
+// Indirect is not included since its use is limited to Compare & Swap
+
+// Most general memory operand, allows base, index, and long displacement.
+opclass memory(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow);
+opclass memoryRXY(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow);
+
+// General memory operand, allows base, index, and short displacement.
+opclass memoryRX(indirect, indIndex, indOffset12, indOffset12Narrow, indOffset12index, indOffset12indexNarrow);
+
+// Memory operand, allows only base and long displacement.
+opclass memoryRSY(indirect, indOffset20, indOffset20Narrow);
+
+// Memory operand, allows only base and short displacement.
+opclass memoryRS(indirect, indOffset12, indOffset12Narrow);
+
+// Operand classes to match encode and decode.
+opclass iRegN_P2N(iRegN);
+opclass iRegP_N2P(iRegP);
+
+
+//----------PIPELINE-----------------------------------------------------------
+pipeline %{
+
+//----------ATTRIBUTES---------------------------------------------------------
+attributes %{
+  // z/Architecture instructions are of length 2, 4, or 6 bytes.
+  variable_size_instructions;
+  instruction_unit_size = 2;
+
+  // Meaningless on z/Architecture.
+  max_instructions_per_bundle = 1;
+
+  // The z/Architecture processor fetches 64 bytes...
+  instruction_fetch_unit_size = 64;
+
+  // ...in one line.
+  instruction_fetch_units = 1
+%}
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine.
+resources(
+   Z_BR,     // branch unit
+   Z_CR,     // condition unit
+   Z_FX1,    // integer arithmetic unit 1
+   Z_FX2,    // integer arithmetic unit 2
+   Z_LDST1,  // load/store unit 1
+   Z_LDST2,  // load/store unit 2
+   Z_FP1,    // float arithmetic unit 1
+   Z_FP2,    // float arithmetic unit 2
+   Z_LDST = Z_LDST1 | Z_LDST2,
+   Z_FX   = Z_FX1 | Z_FX2,
+   Z_FP   = Z_FP1 | Z_FP2
+  );
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline.
+pipe_desc(
+   // TODO: adapt
+   Z_IF,  // instruction fetch
+   Z_IC,
+   Z_D0,  // decode
+   Z_D1,  // decode
+   Z_D2,  // decode
+   Z_D3,  // decode
+   Z_Xfer1,
+   Z_GD,  // group definition
+   Z_MP,  // map
+   Z_ISS, // issue
+   Z_RF,  // resource fetch
+   Z_EX1, // execute (all units)
+   Z_EX2, // execute (FP, LDST)
+   Z_EX3, // execute (FP, LDST)
+   Z_EX4, // execute (FP)
+   Z_EX5, // execute (FP)
+   Z_EX6, // execute (FP)
+   Z_WB,  // write back
+   Z_Xfer2,
+   Z_CP
+  );
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+// Providing the `ins_pipe' declarations in the instruction
+// specifications seems to be of little use. So we use
+// `pipe_class_dummy' for all our instructions at present.
+pipe_class pipe_class_dummy() %{
+  single_instruction;
+  fixed_latency(4);
+%}
+
+// SIGTRAP based implicit range checks in compiled code.
+// Currently, no pipe classes are used on z/Architecture.
+pipe_class pipe_class_trap() %{
+  single_instruction;
+%}
+
+pipe_class pipe_class_fx_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  single_instruction;
+  dst  : Z_EX1(write);
+  src1 : Z_RF(read);
+  src2 : Z_RF(read);
+  Z_FX : Z_RF;
+%}
+
+pipe_class pipe_class_ldst(iRegP dst, memory mem) %{
+  single_instruction;
+  mem : Z_RF(read);
+  dst : Z_WB(write);
+  Z_LDST : Z_RF;
+%}
+
+define %{
+  MachNop = pipe_class_dummy;
+%}
+
+%}
+
+//----------INSTRUCTIONS-------------------------------------------------------
+
+//---------- Chain stack slots between similar types --------
+
+// Load integer from stack slot.
+instruct stkI_to_regI(iRegI dst, stackSlotI src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "L       $dst,$src\t # stk reload int" %}
+  opcode(L_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store integer to stack slot.
+instruct regI_to_stkI(stackSlotI dst, iRegI src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "ST      $src,$dst\t # stk spill int" %}
+  opcode(ST_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load long from stack slot.
+instruct stkL_to_regL(iRegL dst, stackSlotL src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LG      $dst,$src\t # stk reload long" %}
+  opcode(LG_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store long to stack slot.
+instruct regL_to_stkL(stackSlotL dst, iRegL src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "STG     $src,$dst\t # stk spill long" %}
+  opcode(STG_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load pointer from stack slot, 64-bit encoding.
+instruct stkP_to_regP(iRegP dst, stackSlotP src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LG      $dst,$src\t # stk reload ptr" %}
+  opcode(LG_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store pointer to stack slot.
+instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "STG     $src,$dst\t # stk spill ptr" %}
+  opcode(STG_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  Float types
+
+// Load float value from stack slot.
+instruct stkF_to_regF(regF dst, stackSlotF src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "LE(Y)   $dst,$src\t # stk reload float" %}
+  opcode(LE_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store float value to stack slot.
+instruct regF_to_stkF(stackSlotF dst, regF src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STE(Y)  $src,$dst\t # stk spill float" %}
+  opcode(STE_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load double value from stack slot.
+instruct stkD_to_regD(regD dst, stackSlotD src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LD(Y)   $dst,$src\t # stk reload double" %}
+  opcode(LD_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store double value to stack slot.
+instruct regD_to_stkD(stackSlotD dst, regD src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STD(Y)  $src,$dst\t # stk spill double" %}
+  opcode(STD_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Load/Store/Move Instructions---------------------------------------
+
+//----------Load Instructions--------------------------------------------------
+
+//------------------
+//  MEMORY
+//------------------
+
+//  BYTE
+// Load Byte (8bit signed)
+instruct loadB(iRegI dst, memory mem) %{
+  match(Set dst (LoadB mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LB      $dst, $mem\t # sign-extend byte to int" %}
+  opcode(LB_ZOPC, LB_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Byte (8bit signed)
+instruct loadB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LGB     $dst, $mem\t # sign-extend byte to long" %}
+  opcode(LGB_ZOPC, LGB_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into an int reg.
+instruct loadUB(iRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LLGC    $dst,$mem\t # zero-extend byte to int" %}
+  opcode(LLGC_ZOPC, LLGC_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register.
+instruct loadUB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LLGC    $dst,$mem\t # zero-extend byte to long" %}
+  opcode(LLGC_ZOPC, LLGC_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// CHAR/SHORT
+
+// Load Short (16bit signed)
+instruct loadS(iRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "LH(Y)   $dst,$mem\t # sign-extend short to int" %}
+  opcode(LHY_ZOPC, LH_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Short (16bit signed)
+instruct loadS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LGH     $dst,$mem\t # sign-extend short to long" %}
+  opcode(LGH_ZOPC, LGH_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Char (16bit Unsigned)
+instruct loadUS(iRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LLGH    $dst,$mem\t # zero-extend short to int" %}
+  opcode(LLGH_ZOPC, LLGH_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
+instruct loadUS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LLGH    $dst,$mem\t # zero-extend short to long" %}
+  opcode(LLGH_ZOPC, LLGH_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// INT
+
+// Load Integer
+instruct loadI(iRegI dst, memory mem) %{
+  match(Set dst (LoadI mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "L(Y)    $dst,$mem\t #" %}
+  opcode(LY_ZOPC, L_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load and convert to long.
+instruct loadI2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LGF     $dst,$mem\t #" %}
+  opcode(LGF_ZOPC, LGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memory mem, immL_FFFFFFFF mask) %{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LLGF    $dst,$mem\t # zero-extend int to long" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// range = array length (=jint)
+// Load Range
+instruct loadRange(iRegI dst, memory mem) %{
+  match(Set dst (LoadRange mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "L(Y)    $dst,$mem\t # range" %}
+  opcode(LY_ZOPC, L_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// LONG
+
+// Load Long - aligned
+instruct loadL(iRegL dst, memory mem) %{
+  match(Set dst (LoadL mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # long" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Long - UNaligned
+instruct loadL_unaligned(iRegL dst, memory mem) %{
+  match(Set dst (LoadL_unaligned mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # unaligned long" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// PTR
+
+// Load Pointer
+instruct loadP(iRegP dst, memory mem) %{
+  match(Set dst (LoadP mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # ptr" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// LoadP + CastP2L
+instruct castP2X_loadP(iRegL dst, memory mem) %{
+  match(Set dst (CastP2X (LoadP mem)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # ptr + p2x" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegP dst, memory mem) %{
+  match(Set dst (LoadKlass mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # klass ptr" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadTOC(iRegL dst) %{
+  effect(DEF dst);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  // TODO: check why this attribute causes many unnecessary rematerializations.
+  //
+  // The graphs I saw just had high register pressure. Further the
+  // register TOC is loaded to is overwritten by the constant short
+  // after. Here something as round robin register allocation might
+  // help. But rematerializing seems not to hurt, jack even seems to
+  // improve slightly.
+  //
+  // Without this flag we get spill-split recycle sanity check
+  // failures in
+  // spec.benchmarks._228_jack.NfaState::GenerateCode. This happens in
+  // a block with three loadConP_dynTOC nodes and a tlsLoadP. The
+  // tlsLoadP has a huge amount of outs and forces the TOC down to the
+  // stack. Later tlsLoadP is rematerialized, leaving the register
+  // allocator with TOC on the stack and a badly placed reload.
+  ins_should_rematerialize(true);
+  format %{ "LARL    $dst, &constant_pool\t; load dynTOC" %}
+  ins_encode %{ __ load_toc($dst$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// FLOAT
+
+// Load Float
+instruct loadF(regF dst, memory mem) %{
+  match(Set dst (LoadF mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "LE(Y)    $dst,$mem" %}
+  opcode(LEY_ZOPC, LE_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// DOUBLE
+
+// Load Double
+instruct loadD(regD dst, memory mem) %{
+  match(Set dst (LoadD mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "LD(Y)    $dst,$mem" %}
+  opcode(LDY_ZOPC, LD_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Double - UNaligned
+instruct loadD_unaligned(regD dst, memory mem) %{
+  match(Set dst (LoadD_unaligned mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "LD(Y)    $dst,$mem" %}
+  opcode(LDY_ZOPC, LD_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------------------
+//  IMMEDIATES
+//----------------------
+
+instruct loadConI(iRegI dst, immI src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LGFI     $dst,$src\t # (int)" %}
+  ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConI16(iRegI dst, immI16 src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LGHI     $dst,$src\t # (int)" %}
+  ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConI_0(iRegI dst, immI_0 src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "loadConI $dst,$src\t # (int) XGR because ZERO is loaded" %}
+  opcode(XGR_ZOPC);
+  ins_encode(z_rreform(dst, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConUI16(iRegI dst, uimmI16 src) %{
+  match(Set dst src);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LLILL    $dst,$src" %}
+  opcode(LLILL_ZOPC);
+  ins_encode(z_riform_unsigned(dst, src) );
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load long constant from TOC with pcrelative address.
+instruct loadConL_pcrelTOC(iRegL dst, immL src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST_LO);
+  size(6);
+  format %{ "LGRL    $dst,[pcrelTOC]\t # load long $src from table" %}
+  ins_encode %{
+    address long_address = __ long_constant($src$$constant);
+    if (long_address == NULL) {
+      Compile::current()->env()->record_out_of_memory_failure();
+      return;
+    }
+    __ load_long_pcrelative($dst$$Register, long_address);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL32(iRegL dst, immL32 src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LGFI     $dst,$src\t # (long)" %}
+  ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL16(iRegL dst, immL16 src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LGHI     $dst,$src\t # (long)" %}
+  ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL_0(iRegL dst, immL_0 src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  format %{ "LoadConL    $dst,$src\t # (long) XGR because ZERO is loaded" %}
+  opcode(XGR_ZOPC);
+  ins_encode(z_rreform(dst, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load ptr constant from TOC with pc relative address.
+// Special handling for oop constants required.
+instruct loadConP_pcrelTOC(iRegP dst, immP src) %{
+  match(Set dst src);
+  ins_cost(MEMORY_REF_COST_LO);
+  size(6);
+  format %{ "LGRL    $dst,[pcrelTOC]\t # load ptr $src from table" %}
+  ins_encode %{
+    relocInfo::relocType constant_reloc = $src->constant_reloc();
+    if (constant_reloc == relocInfo::oop_type) {
+      AddressLiteral a = __ allocate_oop_address((jobject)$src$$constant);
+      bool success = __ load_oop_from_toc($dst$$Register, a);
+      if (!success) {
+        Compile::current()->env()->record_out_of_memory_failure();
+        return;
+      }
+    } else if (constant_reloc == relocInfo::metadata_type) {
+      AddressLiteral a = __ constant_metadata_address((Metadata *)$src$$constant);
+      address const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
+      if (const_toc_addr == NULL) {
+        Compile::current()->env()->record_out_of_memory_failure();
+        return;
+      }
+      __ load_long_pcrelative($dst$$Register, const_toc_addr);
+    } else {          // Non-oop pointers, e.g. card mark base, heap top.
+      address long_address = __ long_constant((jlong)$src$$constant);
+      if (long_address == NULL) {
+        Compile::current()->env()->record_out_of_memory_failure();
+        return;
+      }
+      __ load_long_pcrelative($dst$$Register, long_address);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// We don't use immP16 to avoid problems with oops.
+instruct loadConP0(iRegP dst, immP0 src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  size(4);
+  format %{ "XGR     $dst,$dst\t # NULL ptr" %}
+  opcode(XGR_ZOPC);
+  ins_encode(z_rreform(dst, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Load Float Constant Instructions-------------------------------------------------
+
+// We may not specify this instruction via an `expand' rule. If we do,
+// code selection will forget that this instruction needs a floating
+// point constant inserted into the code buffer. So `Shorten_branches'
+// will fail.
+instruct loadConF_dynTOC(regF dst, immF src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  // If this instruction rematerializes, it prolongs the live range
+  // of the toc node, causing illegal graphs.
+  ins_cannot_rematerialize(true);
+  format %{ "LE(Y)    $dst,$constantoffset[,$constanttablebase]\t # load FLOAT $src from table" %}
+  ins_encode %{
+    __ load_float_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// E may not specify this instruction via an `expand' rule. If we do,
+// code selection will forget that this instruction needs a floating
+// point constant inserted into the code buffer. So `Shorten_branches'
+// will fail.
+instruct loadConD_dynTOC(regD dst, immD src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  // If this instruction rematerializes, it prolongs the live range
+  // of the toc node, causing illegal graphs.
+  ins_cannot_rematerialize(true);
+  format %{ "LD(Y)    $dst,$constantoffset[,$constanttablebase]\t # load DOUBLE $src from table" %}
+  ins_encode %{
+    __ load_double_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Special case: Load Const 0.0F
+
+// There's a special instr to clear a FP register.
+instruct loadConF0(regF dst, immFp0 src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LZER     $dst,$src\t # clear to zero" %}
+  opcode(LZER_ZOPC);
+  ins_encode(z_rreform(dst, Z_F0));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// There's a special instr to clear a FP register.
+instruct loadConD0(regD dst, immDp0 src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LZDR     $dst,$src\t # clear to zero" %}
+  opcode(LZDR_ZOPC);
+  ins_encode(z_rreform(dst, Z_F0));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Store Instructions-------------------------------------------------
+
+// BYTE
+
+// Store Byte
+instruct storeB(memory mem, iRegI src) %{
+  match(Set mem (StoreB mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "STC(Y)  $src,$mem\t # byte" %}
+  opcode(STCY_ZOPC, STC_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct storeCM(memory mem, immI_0 src) %{
+  match(Set mem (StoreCM mem src));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "STC(Y)  $src,$mem\t # CMS card-mark byte (must be 0!)" %}
+  ins_encode %{
+    guarantee($mem$$index$$Register != Z_R0, "content will not be used.");
+    if ($mem$$index$$Register != noreg) {
+      // Can't use clear_mem --> load const zero and store character.
+      __ load_const_optimized(Z_R0_scratch, (long)0);
+      if (Immediate::is_uimm12($mem$$disp)) {
+        __ z_stc(Z_R0_scratch, $mem$$Address);
+      } else {
+        __ z_stcy(Z_R0_scratch, $mem$$Address);
+      }
+    } else {
+      __ clear_mem(Address($mem$$Address), 1);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// CHAR/SHORT
+
+// Store Char/Short
+instruct storeC(memory mem, iRegI src) %{
+  match(Set mem (StoreC mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "STH(Y)  $src,$mem\t # short" %}
+  opcode(STHY_ZOPC, STH_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// INT
+
+// Store Integer
+instruct storeI(memory mem, iRegI src) %{
+  match(Set mem (StoreI mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "ST(Y)   $src,$mem\t # int" %}
+  opcode(STY_ZOPC, ST_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// LONG
+
+// Store Long
+instruct storeL(memory mem, iRegL src) %{
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "STG     $src,$mem\t # long" %}
+  opcode(STG_ZOPC, STG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// PTR
+
+// Store Pointer
+instruct storeP(memory dst, memoryRegP src) %{
+  match(Set dst (StoreP dst src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "STG     $src,$dst\t # ptr" %}
+  opcode(STG_ZOPC, STG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// FLOAT
+
+// Store Float
+instruct storeF(memory mem, regF src) %{
+  match(Set mem (StoreF mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "STE(Y)   $src,$mem\t # float" %}
+  opcode(STEY_ZOPC, STE_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// DOUBLE
+
+// Store Double
+instruct storeD(memory mem, regD src) %{
+  match(Set mem (StoreD mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "STD(Y)   $src,$mem\t # double" %}
+  opcode(STDY_ZOPC, STD_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Prefetch instructions. Must be safe to execute with invalid address (cannot fault).
+
+// Should support match rule for PrefetchAllocation.
+// Still needed after 8068977 for PrefetchAllocate.
+instruct prefetchAlloc(memory mem) %{
+  match(PrefetchAllocation mem);
+  predicate(VM_Version::has_Prefetch());
+  ins_cost(DEFAULT_COST);
+  format %{ "PREFETCH 2, $mem\t # Prefetch allocation, z10 only" %}
+  ins_encode %{ __ z_pfd(0x02, $mem$$Address); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Memory init instructions------------------------------------------
+
+// Move Immediate to 1-byte memory.
+instruct memInitB(memoryRSY mem, immI8 src) %{
+  match(Set mem (StoreB mem src));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MVI     $mem,$src\t # direct mem init 1" %}
+  ins_encode %{
+    if (Immediate::is_uimm12((long)$mem$$disp)) {
+      __ z_mvi($mem$$Address, $src$$constant);
+    } else {
+      __ z_mviy($mem$$Address, $src$$constant);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 2-byte memory.
+instruct memInitC(memoryRS mem, immI16 src) %{
+  match(Set mem (StoreC mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "MVHHI   $mem,$src\t # direct mem init 2" %}
+  opcode(MVHHI_ZOPC);
+  ins_encode(z_silform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 4-byte memory.
+instruct memInitI(memoryRS mem, immI16 src) %{
+  match(Set mem (StoreI mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "MVHI    $mem,$src\t # direct mem init 4" %}
+  opcode(MVHI_ZOPC);
+  ins_encode(z_silform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// Move Immediate to 8-byte memory.
+instruct memInitL(memoryRS mem, immL16 src) %{
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "MVGHI   $mem,$src\t # direct mem init 8" %}
+  opcode(MVGHI_ZOPC);
+  ins_encode(z_silform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 8-byte memory.
+instruct memInitP(memoryRS mem, immP16 src) %{
+  match(Set mem (StoreP mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "MVGHI   $mem,$src\t # direct mem init 8" %}
+  opcode(MVGHI_ZOPC);
+  ins_encode(z_silform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Instructions for compressed pointers (cOop and NKlass)-------------
+
+// See cOop encoding classes for elaborate comment.
+
+// Moved here because it is needed in expand rules for encode.
+// Long negation.
+instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
+  match(Set dst (SubL zero src));
+  effect(KILL cr);
+  size(4);
+  format %{ "NEG     $dst, $src\t # long" %}
+  ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load Compressed Pointer
+
+// Load narrow oop
+instruct loadN(iRegN dst, memory mem) %{
+  match(Set dst (LoadN mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LoadN  $dst,$mem\t# (cOop)" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load narrow Klass Pointer
+instruct loadNKlass(iRegN dst, memory mem) %{
+  match(Set dst (LoadNKlass mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load constant Compressed Pointer
+
+instruct loadConN(iRegN dst, immN src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "loadConN    $dst,$src\t # (cOop)" %}
+  ins_encode %{
+    AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
+    __ relocate(cOop.rspec(), 1);
+    __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConN0(iRegN dst, immN0 src, flagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "loadConN    $dst,$src\t # (cOop) XGR because ZERO is loaded" %}
+  opcode(XGR_ZOPC);
+  ins_encode(z_rreform(dst, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConNKlass(iRegN dst, immNKlass src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
+  ins_encode %{
+    AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
+    __ relocate(NKlass.rspec(), 1);
+    __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load and Decode Compressed Pointer
+// optimized variants for Unscaled cOops
+
+instruct decodeLoadN(iRegP dst, memory mem) %{
+  match(Set dst (DecodeN (LoadN mem)));
+  predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "DecodeLoadN  $dst,$mem\t# (cOop Load+Decode)" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct decodeLoadNKlass(iRegP dst, memory mem) %{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "DecodeLoadNKlass  $dst,$mem\t# (load/decode NKlass)" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
+  match(Set dst (DecodeNKlass src));
+  ins_cost(3 * DEFAULT_COST);
+  size(12);
+  format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
+  ins_encode %{
+    AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
+    __ relocate(NKlass.rspec(), 1);
+    __ load_const($dst$$Register, (Klass*)NKlass.value());
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Decode Compressed Pointer
+
+// General decoder
+instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
+  match(Set dst (DecodeN src));
+  effect(KILL cr);
+  predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "decodeN  $dst,$src\t# (decode cOop)" %}
+  ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// General Klass decoder
+instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
+  match(Set dst (DecodeNKlass src));
+  effect(KILL cr);
+  ins_cost(3 * DEFAULT_COST);
+  format %{ "decode_klass $dst,$src" %}
+  ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// General decoder
+instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
+  match(Set dst (DecodeN src));
+  effect(KILL cr);
+  predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
+             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
+            (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
+  ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "decodeN  $dst,$src\t# (decode cOop NN)" %}
+  ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+  instruct loadBase(iRegL dst, immL baseImm) %{
+    effect(DEF dst, USE baseImm);
+    predicate(false);
+    format %{ "llihl    $dst=$baseImm \t// load heap base" %}
+    ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
+    ins_pipe(pipe_class_dummy);
+  %}
+
+  // Decoder for heapbased mode peeling off loading the base.
+  instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
+    match(Set dst (DecodeN src base));
+    // Note: Effect TEMP dst was used with the intention to get
+    // different regs for dst and base, but this has caused ADLC to
+    // generate wrong code. Oop_decoder generates additional lgr when
+    // dst==base.
+    effect(KILL cr);
+    predicate(false);
+    // TODO: s390 port size(VARIABLE_SIZE);
+    format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
+    ins_encode %{
+      __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
+                     (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
+    %}
+    ins_pipe(pipe_class_dummy);
+  %}
+
+  // Decoder for heapbased mode peeling off loading the base.
+  instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
+    match(Set dst (DecodeN src base));
+    effect(KILL cr);
+    predicate(false);
+    // TODO: s390 port size(VARIABLE_SIZE);
+    format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
+    ins_encode %{
+      __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
+                     (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
+    %}
+    ins_pipe(pipe_class_dummy);
+  %}
+
+// Decoder for heapbased mode peeling off loading the base.
+instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
+  match(Set dst (DecodeN src));
+  predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  expand %{
+    immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+    iRegL base;
+    loadBase(base, baseImm);
+    decodeN_base(dst, src, base, cr);
+  %}
+%}
+
+// Decoder for heapbased mode peeling off loading the base.
+instruct decodeN_NN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
+  match(Set dst (DecodeN src));
+  predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
+             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
+            Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode_NN);
+  ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  expand %{
+    immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+    iRegL base;
+    loadBase(base, baseImm);
+    decodeN_NN_base(dst, src, base, cr);
+  %}
+%}
+
+//  Encode Compressed Pointer
+
+// General encoder
+instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
+            (Universe::narrow_oop_base() == 0 ||
+             Universe::narrow_oop_base_disjoint() ||
+             !ExpandLoadingBaseEncode));
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
+  ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// General class encoder
+instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
+  match(Set dst (EncodePKlass src));
+  effect(KILL cr);
+  format %{ "encode_klass $dst,$src" %}
+  ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
+            (Universe::narrow_oop_base() == 0 ||
+             Universe::narrow_oop_base_disjoint() ||
+             !ExpandLoadingBaseEncode_NN));
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
+  ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+  // Encoder for heapbased mode peeling off loading the base.
+  instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
+    match(Set dst (EncodeP src (Binary base dst)));
+    effect(TEMP_DEF dst);
+    predicate(false);
+    ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+    // TODO: s390 port size(VARIABLE_SIZE);
+    format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
+    ins_encode %{
+      jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
+        (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
+      __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
+    %}
+    ins_pipe(pipe_class_dummy);
+  %}
+
+  // Encoder for heapbased mode peeling off loading the base.
+  instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
+    match(Set dst (EncodeP src base));
+    effect(USE pow2_offset);
+    predicate(false);
+    ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+    // TODO: s390 port size(VARIABLE_SIZE);
+    format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
+    ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
+    ins_pipe(pipe_class_dummy);
+  %}
+
+// Encoder for heapbased mode peeling off loading the base.
+instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
+            (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  expand %{
+    immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
+    immL_0 zero %{ (0) %}
+    flagsReg ccr;
+    iRegL base;
+    iRegL negBase;
+    loadBase(base, baseImm);
+    negL_reg_reg(negBase, zero, base, ccr);
+    encodeP_base(dst, src, negBase);
+  %}
+%}
+
+// Encoder for heapbased mode peeling off loading the base.
+instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
+            (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode_NN));
+  ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  expand %{
+    immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+    immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
+    immL_0 zero %{ 0 %}
+    flagsReg ccr;
+    iRegL base;
+    iRegL negBase;
+    loadBase(base, baseImm);
+    negL_reg_reg(negBase, zero, base, ccr);
+    encodeP_NN_base(dst, src, negBase, pow2_offset);
+  %}
+%}
+
+//  Store Compressed Pointer
+
+// Store Compressed Pointer
+instruct storeN(memory mem, iRegN_P2N src) %{
+  match(Set mem (StoreN mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "ST      $src,$mem\t# (cOop)" %}
+  opcode(STY_ZOPC, ST_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store Compressed Klass pointer
+instruct storeNKlass(memory mem, iRegN src) %{
+  match(Set mem (StoreNKlass mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP_SIZE);
+  format %{ "ST      $src,$mem\t# (cKlass)" %}
+  opcode(STY_ZOPC, ST_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Compare Compressed Pointers
+
+instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
+  match(Set cr (CmpN src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(2);
+  format %{ "CLR     $src1,$src2\t# (cOop)" %}
+  opcode(CLR_ZOPC);
+  ins_encode(z_rrform(src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
+  match(Set cr (CmpN src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "CLFI    $src1,$src2\t# (cOop) compare immediate narrow" %}
+  ins_encode %{
+    AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
+    __ relocate(cOop.rspec(), 1);
+    __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
+  match(Set cr (CmpN src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "CLFI    $src1,$src2\t# (NKlass) compare immediate narrow" %}
+  ins_encode %{
+    AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
+    __ relocate(NKlass.rspec(), 1);
+    __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
+  match(Set cr (CmpN src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(2);
+  format %{ "LTR     $src1,$src2\t# (cOop) LTR because comparing against zero" %}
+  opcode(LTR_ZOPC);
+  ins_encode(z_rrform(src1, src1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------MemBar Instructions-----------------------------------------------
+
+// Memory barrier flavors
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  match(LoadFence);
+  ins_cost(4*MEMORY_REF_COST);
+  size(0);
+  format %{ "MEMBAR-acquire" %}
+  ins_encode %{ __ z_acquire(); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquireLock);
+  ins_cost(0);
+  size(0);
+  format %{ "MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  match(StoreFence);
+  ins_cost(4 * MEMORY_REF_COST);
+  size(0);
+  format %{ "MEMBAR-release" %}
+  ins_encode %{ __ z_release(); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarReleaseLock);
+  ins_cost(0);
+  size(0);
+  format %{ "MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(4 * MEMORY_REF_COST);
+  size(2);
+  format %{ "MEMBAR-volatile" %}
+  ins_encode %{ __ z_fence(); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+  size(0);
+  format %{ "# MEMBAR-volatile (empty)" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_CPUOrder() %{
+  match(MemBarCPUOrder);
+  ins_cost(0);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "MEMBAR-CPUOrder (empty)" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+  size(0);
+  format %{ "MEMBAR-storestore (empty)" %}
+  ins_encode();
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Register Move Instructions-----------------------------------------
+instruct roundDouble_nop(regD dst) %{
+  match(Set dst (RoundDouble dst));
+  ins_cost(0);
+  // TODO: s390 port size(FIXED_SIZE);
+  // z/Architecture results are already "rounded" (i.e., normal-format IEEE).
+  ins_encode();
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct roundFloat_nop(regF dst) %{
+  match(Set dst (RoundFloat dst));
+  ins_cost(0);
+  // TODO: s390 port size(FIXED_SIZE);
+  // z/Architecture results are already "rounded" (i.e., normal-format IEEE).
+  ins_encode();
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Cast Long to Pointer for unsafe natives.
+instruct castX2P(iRegP dst, iRegL src) %{
+  match(Set dst (CastX2P src));
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "LGR     $dst,$src\t # CastX2P" %}
+  ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Cast Pointer to Long for unsafe natives.
+instruct castP2X(iRegL dst, iRegP_N2P src) %{
+  match(Set dst (CastP2X src));
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "LGR     $dst,$src\t # CastP2X" %}
+  ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct stfSSD(stackSlotD stkSlot, regD src) %{
+  // %%%% TODO: Tell the coalescer that this kind of node is a copy!
+  match(Set stkSlot src);   // chain rule
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ " STD   $src,$stkSlot\t # stk" %}
+  opcode(STD_ZOPC);
+  ins_encode(z_form_rt_mem(src, stkSlot));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct stfSSF(stackSlotF stkSlot, regF src) %{
+  // %%%% TODO: Tell the coalescer that this kind of node is a copy!
+  match(Set stkSlot src);   // chain rule
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "STE   $src,$stkSlot\t # stk" %}
+  opcode(STE_ZOPC);
+  ins_encode(z_form_rt_mem(src, stkSlot));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Conditional Move---------------------------------------------------
+
+instruct cmovN_reg(cmpOp cmp, flagsReg cr, iRegN dst, iRegN_P2N src) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveN,$cmp   $dst,$src" %}
+  ins_encode(z_enc_cmov_reg(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovN_imm(cmpOp cmp, flagsReg cr, iRegN dst, immN0 src) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveN,$cmp   $dst,$src" %}
+  ins_encode(z_enc_cmov_imm(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovI_reg(cmpOp cmp, flagsReg cr, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveI,$cmp   $dst,$src" %}
+  ins_encode(z_enc_cmov_reg(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovI_imm(cmpOp cmp, flagsReg cr, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveI,$cmp   $dst,$src" %}
+  ins_encode(z_enc_cmov_imm(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovP_reg(cmpOp cmp, flagsReg cr, iRegP dst, iRegP_N2P src) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveP,$cmp    $dst,$src" %}
+  ins_encode(z_enc_cmov_reg(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovP_imm(cmpOp cmp, flagsReg cr, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveP,$cmp  $dst,$src" %}
+  ins_encode(z_enc_cmov_imm(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovF_reg(cmpOpF cmp, flagsReg cr, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveF,$cmp   $dst,$src" %}
+  ins_encode %{
+    // Don't emit code if operands are identical (same register).
+    if ($dst$$FloatRegister != $src$$FloatRegister) {
+      Label done;
+      __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done);
+      __ z_ler($dst$$FloatRegister, $src$$FloatRegister);
+      __ bind(done);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovD_reg(cmpOpF cmp, flagsReg cr, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveD,$cmp   $dst,$src" %}
+  ins_encode %{
+    // Don't emit code if operands are identical (same register).
+    if ($dst$$FloatRegister != $src$$FloatRegister) {
+      Label done;
+      __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done);
+      __ z_ldr($dst$$FloatRegister, $src$$FloatRegister);
+      __ bind(done);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovL_reg(cmpOp cmp, flagsReg cr, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveL,$cmp  $dst,$src" %}
+  ins_encode(z_enc_cmov_reg(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovL_imm(cmpOp cmp, flagsReg cr, iRegL dst, immL16 src) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary dst src)));
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CMoveL,$cmp  $dst,$src" %}
+  ins_encode(z_enc_cmov_imm(cmp,dst,src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(threadRegP dst) %{
+  match(Set dst (ThreadLocal));
+  ins_cost(0);
+  size(0);
+  ins_should_rematerialize(true);
+  format %{ "# $dst=ThreadLocal" %}
+  ins_encode(/* empty */);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct checkCastPP(iRegP dst) %{
+  match(Set dst (CheckCastPP dst));
+  size(0);
+  format %{ "# checkcastPP of $dst" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct castPP(iRegP dst) %{
+  match(Set dst (CastPP dst));
+  size(0);
+  format %{ "# castPP of $dst" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct castII(iRegI dst) %{
+  match(Set dst (CastII dst));
+  size(0);
+  format %{ "# castII of $dst" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Conditional_store--------------------------------------------------
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flags (EQ) on success.
+
+// Implement LoadPLocked. Must be ordered against changes of the memory location
+// by storePConditional.
+// Don't know whether this is ever used.
+instruct loadPLocked(iRegP dst, memory mem) %{
+  match(Set dst (LoadPLocked mem));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "LG      $dst,$mem\t # LoadPLocked" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// As compareAndSwapP, but return flag register instead of boolean value in
+// int register.
+// This instruction is matched if UseTLAB is off. Needed to pass
+// option tests.  Mem_ptr must be a memory operand, else this node
+// does not get Flag_needs_anti_dependence_check set by adlc. If this
+// is not set this node can be rematerialized which leads to errors.
+instruct storePConditional(indirect mem_ptr, rarg5RegP oldval, iRegP_N2P newval, flagsReg cr) %{
+  match(Set cr (StorePConditional mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "storePConditional $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casL(oldval, newval, mem_ptr));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// As compareAndSwapL, but return flag register instead of boolean value in
+// int register.
+// Used by sun/misc/AtomicLongCSImpl.java. Mem_ptr must be a memory
+// operand, else this node does not get
+// Flag_needs_anti_dependence_check set by adlc. If this is not set
+// this node can be rematerialized which leads to errors.
+instruct storeLConditional(indirect mem_ptr, rarg5RegL oldval, iRegL newval, flagsReg cr) %{
+  match(Set cr (StoreLConditional mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "storePConditional $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casL(oldval, newval, mem_ptr));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// No flag versions for CompareAndSwap{P,I,L,N} because matcher can't match them.
+
+instruct compareAndSwapI_bool(iRegP mem_ptr, rarg5RegI oldval, iRegI newval, iRegI res, flagsReg cr) %{
+  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+  size(16);
+  format %{ "$res = CompareAndSwapI $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casI(oldval, newval, mem_ptr),
+             z_enc_cctobool(res));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRegI res, flagsReg cr) %{
+  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+  size(18);
+  format %{ "$res = CompareAndSwapL $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casL(oldval, newval, mem_ptr),
+             z_enc_cctobool(res));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{
+  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+  size(18);
+  format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casL(oldval, newval, mem_ptr),
+             z_enc_cctobool(res));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{
+  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+  effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+  size(16);
+  format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
+  ins_encode(z_enc_casI(oldval, newval, mem_ptr),
+             z_enc_cctobool(res));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Atomic operations on memory (GetAndSet*, GetAndAdd*)---------------
+
+// Exploit: direct memory arithmetic
+// Prereqs: - instructions available
+//          - instructions guarantee atomicity
+//          - immediate operand to be added
+//          - immediate operand is small enough (8-bit signed).
+//          - result of instruction is not used
+instruct addI_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immI8 src, flagsReg cr) %{
+  match(Set dummy (GetAndAddI mem src));
+  effect(KILL cr);
+  predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used());
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "ASI     [$mem],$src\t # GetAndAddI (atomic)" %}
+  opcode(ASI_ZOPC);
+  ins_encode(z_siyform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Fallback: direct memory arithmetic not available
+// Disadvantages: - CS-Loop required, very expensive.
+//                - more code generated (26 to xx bytes vs. 6 bytes)
+instruct addI_mem_imm16_atomic(memoryRSY mem, iRegI dst, immI16 src, iRegI tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddI mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LGF     $dst,[$mem]\n\t"
+            "  AHIK    $tmp,$dst,$src\n\t"
+            "  CSY     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    int      Isrc = $src$$constant;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lgf(Rdst, $mem$$Address);    // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_ahik(Rtmp, Rdst, Isrc);
+      } else {
+        __ z_lr(Rtmp, Rdst);
+        __ z_ahi(Rtmp, Isrc);
+      }
+      // Swap into memory location.
+      __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_mem_imm32_atomic(memoryRSY mem, iRegI dst, immI src, iRegI tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddI mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+200*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LGF     $dst,[$mem]\n\t"
+            "  LGR     $tmp,$dst\n\t"
+            "  AFI     $tmp,$src\n\t"
+            "  CSY     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    int      Isrc = $src$$constant;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lgf(Rdst, $mem$$Address);    // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      __ z_lr(Rtmp, Rdst);
+      __ z_afi(Rtmp, Isrc);
+      // Swap into memory location.
+      __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_mem_reg_atomic(memoryRSY mem, iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddI mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LGF     $dst,[$mem]\n\t"
+            "  ARK     $tmp,$dst,$src\n\t"
+            "  CSY     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lgf(Rdst, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_ark(Rtmp, Rdst, Rsrc);
+      } else {
+        __ z_lr(Rtmp, Rdst);
+        __ z_ar(Rtmp, Rsrc);
+      }
+      __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// Exploit: direct memory arithmetic
+// Prereqs: - instructions available
+//          - instructions guarantee atomicity
+//          - immediate operand to be added
+//          - immediate operand is small enough (8-bit signed).
+//          - result of instruction is not used
+instruct addL_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immL8 src, flagsReg cr) %{
+  match(Set dummy (GetAndAddL mem src));
+  effect(KILL cr);
+  predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used());
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "AGSI    [$mem],$src\t # GetAndAddL (atomic)" %}
+  opcode(AGSI_ZOPC);
+  ins_encode(z_siyform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Fallback: direct memory arithmetic not available
+// Disadvantages: - CS-Loop required, very expensive.
+//                - more code generated (26 to xx bytes vs. 6 bytes)
+instruct addL_mem_imm16_atomic(memoryRSY mem, iRegL dst, immL16 src, iRegL tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddL mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LG      $dst,[$mem]\n\t"
+            "  AGHIK   $tmp,$dst,$src\n\t"
+            "  CSG     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    int      Isrc = $src$$constant;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lg(Rdst, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_aghik(Rtmp, Rdst, Isrc);
+      } else {
+        __ z_lgr(Rtmp, Rdst);
+        __ z_aghi(Rtmp, Isrc);
+      }
+      __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_mem_imm32_atomic(memoryRSY mem, iRegL dst, immL32 src, iRegL tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddL mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LG      $dst,[$mem]\n\t"
+            "  LGR     $tmp,$dst\n\t"
+            "  AGFI    $tmp,$src\n\t"
+            "  CSG     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    int      Isrc = $src$$constant;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lg(Rdst, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      __ z_lgr(Rtmp, Rdst);
+      __ z_agfi(Rtmp, Isrc);
+      __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_mem_reg_atomic(memoryRSY mem, iRegL dst, iRegL src, iRegL tmp, flagsReg cr) %{
+  match(Set dst (GetAndAddL mem src));
+  effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+  ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+  format %{ "BEGIN ATOMIC {\n\t"
+            "  LG      $dst,[$mem]\n\t"
+            "  AGRK    $tmp,$dst,$src\n\t"
+            "  CSG     $dst,$tmp,$mem\n\t"
+            "  retry if failed\n\t"
+            "} END ATOMIC"
+         %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rdst = $dst$$Register;
+    Register Rtmp = $tmp$$Register;
+    Label    retry;
+
+    // Iterate until update with incremented value succeeds.
+    __ z_lg(Rdst, $mem$$Address);  // current contents
+    __ bind(retry);
+      // Calculate incremented value.
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_agrk(Rtmp, Rdst, Rsrc);
+      } else {
+        __ z_lgr(Rtmp, Rdst);
+        __ z_agr(Rtmp, Rsrc);
+      }
+      __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+    __ z_brne(retry);                      // Yikes, concurrent update, need to retry.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Increment value in memory, save old value in dst.
+instruct addI_mem_reg_atomic_z196(memoryRSY mem, iRegI dst, iRegI src) %{
+  match(Set dst (GetAndAddI mem src));
+  predicate(VM_Version::has_LoadAndALUAtomicV1());
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+  size(6);
+  format %{ "LAA     $dst,$src,[$mem]" %}
+  ins_encode %{ __ z_laa($dst$$Register, $src$$Register, $mem$$Address); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Increment value in memory, save old value in dst.
+instruct addL_mem_reg_atomic_z196(memoryRSY mem, iRegL dst, iRegL src) %{
+  match(Set dst (GetAndAddL mem src));
+  predicate(VM_Version::has_LoadAndALUAtomicV1());
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+  size(6);
+  format %{ "LAAG    $dst,$src,[$mem]" %}
+  ins_encode %{ __ z_laag($dst$$Register, $src$$Register, $mem$$Address); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+instruct xchgI_reg_mem(memoryRSY mem, iRegI dst, iRegI tmp, flagsReg cr) %{
+  match(Set dst (GetAndSetI mem dst));
+  effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+  format %{ "XCHGI   $dst,[$mem]\t # EXCHANGE (int, atomic), temp $tmp" %}
+  ins_encode(z_enc_SwapI(mem, dst, tmp));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{
+  match(Set dst (GetAndSetL mem dst));
+  effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+  format %{ "XCHGL   $dst,[$mem]\t # EXCHANGE (long, atomic), temp $tmp" %}
+  ins_encode(z_enc_SwapL(mem, dst, tmp));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
+  match(Set dst (GetAndSetN mem dst));
+  effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+  format %{ "XCHGN   $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
+  ins_encode(z_enc_SwapI(mem, dst, tmp));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{
+  match(Set dst (GetAndSetP mem dst));
+  effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+  format %{ "XCHGP   $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
+  ins_encode(z_enc_SwapL(mem, dst, tmp));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Arithmetic Instructions--------------------------------------------
+
+// The rules are sorted by right operand type and operand length. Please keep
+// it that way.
+// Left operand type is always reg. Left operand len is I, L, P
+// Right operand type is reg, imm, mem. Right operand len is S, I, L, P
+// Special instruction formats, e.g. multi-operand, are inserted at the end.
+
+// ADD
+
+// REG = REG + REG
+
+// Register Addition
+instruct addI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AR      $dst,$src\t # int  CISC ALU" %}
+  opcode(AR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (AddI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "ARK     $dst,$src1,$src2\t # int  RISC ALU" %}
+  opcode(ARK_ZOPC);
+  ins_encode(z_rrfform(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+// Avoid use of LA(Y) for general ALU operation.
+// Immediate Addition
+instruct addI_reg_imm16_CISC(iRegI dst, immI16 con, flagsReg cr) %{
+  match(Set dst (AddI dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AHI     $dst,$con\t # int  CISC ALU" %}
+  opcode(AHI_ZOPC);
+  ins_encode(z_riform_signed(dst, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+// Immediate Addition
+instruct addI_reg_imm16_RISC(iRegI dst, iRegI src, immI16 con, flagsReg cr) %{
+  match(Set dst (AddI src con));
+  effect(KILL cr);
+  predicate( VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AHIK    $dst,$src,$con\t # int  RISC ALU" %}
+  opcode(AHIK_ZOPC);
+  ins_encode(z_rieform_d(dst, src, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm32(iRegI dst, immI src, flagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "AFI     $dst,$src" %}
+  opcode(AFI_ZOPC);
+  ins_encode(z_rilform_signed(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm12(iRegI dst, iRegI src, uimmI12 con) %{
+  match(Set dst (AddI src con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con(,$src)\t # int d12(,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm20(iRegI dst, iRegI src, immI20 con) %{
+  match(Set dst (AddI src con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LAY     $dst,$con(,$src)\t # int d20(,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_reg_reg_imm12(iRegI dst, iRegI src1, iRegI src2, uimmI12 con) %{
+  match(Set dst (AddI (AddI src1 src2) con));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con($src1,$src2)\t # int d12(x,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_reg_reg_imm20(iRegI dst, iRegI src1, iRegI src2, immI20 con) %{
+  match(Set dst (AddI (AddI src1 src2) con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LAY     $dst,$con($src1,$src2)\t # int d20(x,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + MEM
+
+instruct addI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+  match(Set dst (AddI dst (LoadI src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "A(Y)    $dst, $src\t # int" %}
+  opcode(AY_ZOPC, A_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 4-byte memory operand and result
+instruct addI_mem_imm(memoryRSY mem, immI8 src, flagsReg cr) %{
+  match(Set mem (StoreI mem (AddI (LoadI mem) src)));
+  effect(KILL cr);
+  predicate(VM_Version::has_MemWithImmALUOps());
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "ASI     $mem,$src\t # direct mem add 4" %}
+  opcode(ASI_ZOPC);
+  ins_encode(z_siyform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//
+
+// REG = REG + REG
+
+instruct addL_reg_regI(iRegL dst, iRegI src, flagsReg cr) %{
+  match(Set dst (AddL dst (ConvI2L src)));
+  effect(KILL cr);
+  size(4);
+  format %{ "AGFR    $dst,$src\t # long<-int CISC ALU" %}
+  opcode(AGFR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{
+  match(Set dst (AddL dst src));
+  effect(KILL cr);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AGR     $dst, $src\t # long CISC ALU" %}
+  opcode(AGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+  match(Set dst (AddL src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "AGRK    $dst,$src1,$src2\t # long RISC ALU" %}
+  opcode(AGRK_ZOPC);
+  ins_encode(z_rrfform(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+instruct addL_reg_imm12(iRegL dst, iRegL src, uimmL12 con) %{
+  match(Set dst (AddL src con));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con(,$src)\t # long d12(,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_imm20(iRegL dst, iRegL src, immL20 con) %{
+  match(Set dst (AddL src con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LAY     $dst,$con(,$src)\t # long d20(,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_imm32(iRegL dst, immL32 con, flagsReg cr) %{
+  match(Set dst (AddL dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "AGFI    $dst,$con\t # long CISC ALU" %}
+  opcode(AGFI_ZOPC);
+  ins_encode(z_rilform_signed(dst, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_imm16_CISC(iRegL dst, immL16 con, flagsReg cr) %{
+  match(Set dst (AddL dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AGHI    $dst,$con\t # long CISC ALU" %}
+  opcode(AGHI_ZOPC);
+  ins_encode(z_riform_signed(dst, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_imm16_RISC(iRegL dst, iRegL src, immL16 con, flagsReg cr) %{
+  match(Set dst (AddL src con));
+  effect(KILL cr);
+  predicate( VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "AGHIK   $dst,$src,$con\t # long RISC ALU" %}
+  opcode(AGHIK_ZOPC);
+  ins_encode(z_rieform_d(dst, src, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + MEM
+
+instruct addL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (AddL dst (ConvI2L (LoadI src))));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "AGF     $dst, $src\t # long/int" %}
+  opcode(AGF_ZOPC, AGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (AddL dst (LoadL src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "AG      $dst, $src\t # long" %}
+  opcode(AG_ZOPC, AG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_imm12(iRegL dst, iRegL src1, iRegL src2, uimmL12 con) %{
+  match(Set dst (AddL (AddL src1 src2) con));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA     $dst,$con($src1,$src2)\t # long d12(x,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_imm20(iRegL dst, iRegL src1, iRegL src2, immL20 con) %{
+  match(Set dst (AddL (AddL src1 src2) con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LAY    $dst,$con($src1,$src2)\t # long d20(x,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 8-byte memory operand and result.
+instruct addL_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
+  match(Set mem (StoreL mem (AddL (LoadL mem) src)));
+  effect(KILL cr);
+  predicate(VM_Version::has_MemWithImmALUOps());
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "AGSI    $mem,$src\t # direct mem add 8" %}
+  opcode(AGSI_ZOPC);
+  ins_encode(z_siyform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// REG = REG + REG
+
+// Ptr Addition
+instruct addP_reg_reg_LA(iRegP dst, iRegP_N2P src1, iRegL src2) %{
+  match(Set dst (AddP src1 src2));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "LA      $dst,#0($src1,$src2)\t # ptr 0(x,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg_reg(dst, 0x0, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Ptr Addition
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_reg_CISC(iRegP dst, iRegL src, flagsReg cr) %{
+  match(Set dst (AddP dst src));
+  effect(KILL cr);
+  predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "ALGR    $dst,$src\t # ptr CICS ALU" %}
+  opcode(ALGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Ptr Addition
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_reg_RISC(iRegP dst, iRegP_N2P src1, iRegL src2, flagsReg cr) %{
+  match(Set dst (AddP src1 src2));
+  effect(KILL cr);
+  predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "ALGRK   $dst,$src1,$src2\t # ptr RISC ALU" %}
+  opcode(ALGRK_ZOPC);
+  ins_encode(z_rrfform(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+instruct addP_reg_imm12(iRegP dst, iRegP_N2P src, uimmL12 con) %{
+  match(Set dst (AddP src con));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con(,$src)\t # ptr d12(,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_imm16_CISC(iRegP dst, immL16 src, flagsReg cr) %{
+  match(Set dst (AddP dst src));
+  effect(KILL cr);
+  predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AGHI    $dst,$src\t # ptr CISC ALU" %}
+  opcode(AGHI_ZOPC);
+  ins_encode(z_riform_signed(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_imm16_RISC(iRegP dst, iRegP_N2P src, immL16 con, flagsReg cr) %{
+  match(Set dst (AddP src con));
+  effect(KILL cr);
+  predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "ALGHSIK $dst,$src,$con\t # ptr RISC ALU" %}
+  opcode(ALGHSIK_ZOPC);
+  ins_encode(z_rieform_d(dst, src, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_reg_imm20(iRegP dst, memoryRegP src, immL20 con) %{
+  match(Set dst (AddP src con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "LAY     $dst,$con(,$src)\t # ptr d20(,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg(dst, con, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Pointer Immediate Addition
+instruct addP_reg_imm32(iRegP dst, immL32 src, flagsReg cr) %{
+  match(Set dst (AddP dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AGFI    $dst,$src\t # ptr" %}
+  opcode(AGFI_ZOPC);
+  ins_encode(z_rilform_signed(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG1 + REG2 + IMM
+
+instruct addP_reg_reg_imm12(iRegP dst, memoryRegP src1, iRegL src2, uimmL12 con) %{
+  match(Set dst (AddP (AddP src1 src2) con));
+  predicate( PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_regN_reg_imm12(iRegP dst, iRegP_N2P src1, iRegL src2, uimmL12 con) %{
+  match(Set dst (AddP (AddP src1 src2) con));
+  predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
+  opcode(LA_ZOPC);
+  ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_reg_reg_imm20(iRegP dst, memoryRegP src1, iRegL src2, immL20 con) %{
+  match(Set dst (AddP (AddP src1 src2) con));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) %{
+  match(Set dst (AddP (AddP src1 src2) con));
+  predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
+  opcode(LAY_ZOPC);
+  ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 8-byte memory operand and result
+instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
+  match(Set mem (StoreP mem (AddP (LoadP mem) src)));
+  effect(KILL cr);
+  predicate(VM_Version::has_MemWithImmALUOps());
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "AGSI    $mem,$src\t # direct mem add 8 (ptr)" %}
+  opcode(AGSI_ZOPC);
+  ins_encode(z_siyform(mem, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// SUB
+
+// Register Subtraction
+instruct subI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (SubI dst src));
+  effect(KILL cr);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "SR      $dst,$src\t # int  CISC ALU" %}
+  opcode(SR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (SubI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "SRK     $dst,$src1,$src2\t # int  RISC ALU" %}
+  opcode(SRK_ZOPC);
+  ins_encode(z_rrfform(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+  match(Set dst (SubI dst (LoadI src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "S(Y)    $dst, $src\t # int" %}
+  opcode(SY_ZOPC, S_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_zero_reg(iRegI dst, immI_0 zero, iRegI src, flagsReg cr) %{
+  match(Set dst (SubI zero src));
+  effect(KILL cr);
+  size(2);
+  format %{ "NEG     $dst, $src" %}
+  ins_encode %{ __ z_lcr($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+// Long subtraction
+instruct subL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{
+  match(Set dst (SubL dst src));
+  effect(KILL cr);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "SGR     $dst,$src\t # int  CISC ALU" %}
+  opcode(SGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct subL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+  match(Set dst (SubL src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_DistinctOpnds());
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "SGRK    $dst,$src1,$src2\t # int  RISC ALU" %}
+  opcode(SGRK_ZOPC);
+  ins_encode(z_rrfform(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_reg_regI_CISC(iRegL dst, iRegI src, flagsReg cr) %{
+  match(Set dst (SubL dst (ConvI2L src)));
+  effect(KILL cr);
+  size(4);
+  format %{ "SGFR    $dst, $src\t # int  CISC ALU" %}
+  opcode(SGFR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (SubL dst (ConvI2L (LoadI src))));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "SGF     $dst, $src\t # long/int" %}
+  opcode(SGF_ZOPC, SGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (SubL dst (LoadL src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "SG      $dst, $src\t # long" %}
+  opcode(SG_ZOPC, SG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Moved declaration of negL_reg_reg before encode nodes, where it is used.
+
+//  MUL
+
+// Register Multiplication
+instruct mulI_reg_reg(iRegI dst, iRegI src) %{
+  match(Set dst (MulI dst src));
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "MSR     $dst, $src" %}
+  opcode(MSR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Multiplication
+instruct mulI_reg_imm16(iRegI dst, immI16 con) %{
+  match(Set dst (MulI dst con));
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "MHI     $dst,$con" %}
+  opcode(MHI_ZOPC);
+  ins_encode(z_riform_signed(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate (32bit) Multiplication
+instruct mulI_reg_imm32(iRegI dst, immI con) %{
+  match(Set dst (MulI dst con));
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "MSFI    $dst,$con" %}
+  opcode(MSFI_ZOPC);
+  ins_encode(z_rilform_signed(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulI_Reg_mem(iRegI dst, memory src)%{
+  match(Set dst (MulI dst (LoadI src)));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MS(Y)   $dst, $src\t # int" %}
+  opcode(MSY_ZOPC, MS_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+instruct mulL_reg_regI(iRegL dst, iRegI src) %{
+  match(Set dst (MulL dst (ConvI2L src)));
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "MSGFR   $dst $src\t # long/int" %}
+  opcode(MSGFR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_reg_reg(iRegL dst, iRegL src) %{
+  match(Set dst (MulL dst src));
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "MSGR    $dst $src\t # long" %}
+  opcode(MSGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Multiplication
+instruct mulL_reg_imm16(iRegL dst, immL16 src) %{
+  match(Set dst (MulL dst src));
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "MGHI    $dst,$src\t # long" %}
+  opcode(MGHI_ZOPC);
+  ins_encode(z_riform_signed(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate (32bit) Multiplication
+instruct mulL_reg_imm32(iRegL dst, immL32 con) %{
+  match(Set dst (MulL dst con));
+  ins_cost(DEFAULT_COST);
+  size(6);
+  format %{ "MSGFI   $dst,$con" %}
+  opcode(MSGFI_ZOPC);
+  ins_encode(z_rilform_signed(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_Reg_memI(iRegL dst, memory src)%{
+  match(Set dst (MulL dst (ConvI2L (LoadI src))));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "MSGF    $dst, $src\t # long" %}
+  opcode(MSGF_ZOPC, MSGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_Reg_mem(iRegL dst, memory src)%{
+  match(Set dst (MulL dst (LoadL src)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "MSG     $dst, $src\t # long" %}
+  opcode(MSG_ZOPC, MSG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  DIV
+
+// Integer DIVMOD with Register, both quotient and mod results
+instruct divModI_reg_divmod(roddRegI dst1src1, revenRegI dst2, noOdd_iRegI src2, flagsReg cr) %{
+  match(DivModI dst1src1 src2);
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(VM_Version::has_CompareBranch() ? 24 : 26);
+  format %{ "DIVMODI ($dst1src1, $dst2) $src2" %}
+  ins_encode %{
+    Register d1s1 = $dst1src1$$Register;
+    Register d2   = $dst2$$Register;
+    Register s2   = $src2$$Register;
+
+    assert_different_registers(d1s1, s2);
+
+    Label do_div, done_div;
+    if (VM_Version::has_CompareBranch()) {
+      __ z_cij(s2, -1, Assembler::bcondNotEqual, do_div);
+    } else {
+      __ z_chi(s2, -1);
+      __ z_brne(do_div);
+    }
+    __ z_lcr(d1s1, d1s1);
+    __ clear_reg(d2, false, false);
+    __ z_bru(done_div);
+    __ bind(do_div);
+    __ z_lgfr(d1s1, d1s1);
+    __ z_dsgfr(d2, s2);
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// Register Division
+instruct divI_reg_reg(roddRegI dst, iRegI src1, noOdd_iRegI src2, revenRegI tmp, flagsReg cr) %{
+  match(Set dst (DivI src1 src2));
+  effect(KILL tmp, KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(VM_Version::has_CompareBranch() ? 20 : 22);
+  format %{ "DIV_checked $dst, $src1,$src2\t # treats special case 0x80../-1" %}
+  ins_encode %{
+    Register a = $src1$$Register;
+    Register b = $src2$$Register;
+    Register t = $dst$$Register;
+
+    assert_different_registers(t, b);
+
+    Label do_div, done_div;
+    if (VM_Version::has_CompareBranch()) {
+      __ z_cij(b, -1, Assembler::bcondNotEqual, do_div);
+    } else {
+      __ z_chi(b, -1);
+      __ z_brne(do_div);
+    }
+    __ z_lcr(t, a);
+    __ z_bru(done_div);
+    __ bind(do_div);
+    __ z_lgfr(t, a);
+    __ z_dsgfr(t->predecessor()/* t is odd part of a register pair. */, b);
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Division
+instruct divI_reg_imm16(roddRegI dst, iRegI src1, immI16 src2, revenRegI tmp, flagsReg cr) %{
+  match(Set dst (DivI src1 src2));
+  effect(KILL tmp, KILL cr);  // R0 is killed, too.
+  ins_cost(2 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "DIV_const  $dst,$src1,$src2" %}
+  ins_encode %{
+    // No sign extension of Rdividend needed here.
+    if ($src2$$constant != -1) {
+      __ z_lghi(Z_R0_scratch, $src2$$constant);
+      __ z_lgfr($dst$$Register, $src1$$Register);
+      __ z_dsgfr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch);
+    } else {
+      __ z_lcr($dst$$Register, $src1$$Register);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Long DIVMOD with Register, both quotient and mod results
+instruct divModL_reg_divmod(roddRegL dst1src1, revenRegL dst2, iRegL src2, flagsReg cr) %{
+  match(DivModL dst1src1 src2);
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(VM_Version::has_CompareBranch() ? 22 : 24);
+  format %{ "DIVMODL ($dst1src1, $dst2) $src2" %}
+  ins_encode %{
+    Register d1s1 = $dst1src1$$Register;
+    Register d2   = $dst2$$Register;
+    Register s2   = $src2$$Register;
+
+    Label do_div, done_div;
+    if (VM_Version::has_CompareBranch()) {
+      __ z_cgij(s2, -1, Assembler::bcondNotEqual, do_div);
+    } else {
+      __ z_cghi(s2, -1);
+      __ z_brne(do_div);
+    }
+    __ z_lcgr(d1s1, d1s1);
+    // indicate unused result
+    (void) __ clear_reg(d2, true, false);
+    __ z_bru(done_div);
+    __ bind(do_div);
+    __ z_dsgr(d2, s2);
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Division
+instruct divL_reg_reg(roddRegL dst, iRegL src, revenRegL tmp, flagsReg cr) %{
+  match(Set dst (DivL dst src));
+  effect(KILL tmp, KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(VM_Version::has_CompareBranch() ? 18 : 20);
+  format %{ "DIVG_checked  $dst, $src\t # long, treats special case 0x80../-1" %}
+  ins_encode %{
+    Register b = $src$$Register;
+    Register t = $dst$$Register;
+
+    Label done_div;
+    __ z_lcgr(t, t);    // Does no harm. divisor is in other register.
+    if (VM_Version::has_CompareBranch()) {
+      __ z_cgij(b, -1, Assembler::bcondEqual, done_div);
+    } else {
+      __ z_cghi(b, -1);
+      __ z_bre(done_div);
+    }
+    __ z_lcgr(t, t);    // Restore sign.
+    __ z_dsgr(t->predecessor()/* t is odd part of a register pair. */, b);
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Long Division
+instruct divL_reg_imm16(roddRegL dst, iRegL src1, immL16 src2, revenRegL tmp, flagsReg cr) %{
+  match(Set dst (DivL src1 src2));
+  effect(KILL tmp, KILL cr);  // R0 is killed, too.
+  ins_cost(2 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "DIVG_const  $dst,$src1,$src2\t # long" %}
+  ins_encode %{
+    if ($src2$$constant != -1) {
+      __ z_lghi(Z_R0_scratch, $src2$$constant);
+      __ lgr_if_needed($dst$$Register, $src1$$Register);
+      __ z_dsgr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch);
+    } else {
+      __ z_lcgr($dst$$Register, $src1$$Register);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// REM
+
+// Integer Remainder
+// Register Remainder
+instruct modI_reg_reg(revenRegI dst, iRegI src1, noOdd_iRegI src2, roddRegI tmp, flagsReg cr) %{
+  match(Set dst (ModI src1 src2));
+  effect(KILL tmp, KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MOD_checked   $dst,$src1,$src2" %}
+  ins_encode %{
+    Register a = $src1$$Register;
+    Register b = $src2$$Register;
+    Register t = $dst$$Register;
+    assert_different_registers(t->successor(), b);
+
+    Label do_div, done_div;
+
+    if ((t->encoding() != b->encoding()) && (t->encoding() != a->encoding())) {
+      (void) __ clear_reg(t, true, false);  // Does no harm. Operands are in other regs.
+      if (VM_Version::has_CompareBranch()) {
+        __ z_cij(b, -1, Assembler::bcondEqual, done_div);
+      } else {
+        __ z_chi(b, -1);
+        __ z_bre(done_div);
+      }
+      __ z_lgfr(t->successor(), a);
+      __ z_dsgfr(t/* t is even part of a register pair. */, b);
+    } else {
+      if (VM_Version::has_CompareBranch()) {
+        __ z_cij(b, -1, Assembler::bcondNotEqual, do_div);
+      } else {
+        __ z_chi(b, -1);
+        __ z_brne(do_div);
+      }
+      __ clear_reg(t, true, false);
+      __ z_bru(done_div);
+      __ bind(do_div);
+      __ z_lgfr(t->successor(), a);
+      __ z_dsgfr(t/* t is even part of a register pair. */, b);
+    }
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Remainder
+instruct modI_reg_imm16(revenRegI dst, iRegI src1, immI16 src2, roddRegI tmp, flagsReg cr) %{
+  match(Set dst (ModI src1 src2));
+  effect(KILL tmp, KILL cr); // R0 is killed, too.
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MOD_const  $dst,src1,$src2" %}
+  ins_encode %{
+    assert_different_registers($dst$$Register, $src1$$Register);
+    assert_different_registers($dst$$Register->successor(), $src1$$Register);
+    int divisor = $src2$$constant;
+
+    if (divisor != -1) {
+      __ z_lghi(Z_R0_scratch, divisor);
+      __ z_lgfr($dst$$Register->successor(), $src1$$Register);
+      __ z_dsgfr($dst$$Register/* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
+    } else {
+      __ clear_reg($dst$$Register, true, false);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Remainder
+instruct modL_reg_reg(revenRegL dst, roddRegL src1, iRegL src2, flagsReg cr) %{
+  match(Set dst (ModL src1 src2));
+  effect(KILL src1, KILL cr); // R0 is killed, too.
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MODG_checked   $dst,$src1,$src2" %}
+  ins_encode %{
+    Register a = $src1$$Register;
+    Register b = $src2$$Register;
+    Register t = $dst$$Register;
+    assert(t->successor() == a, "(t,a) is an even-odd pair" );
+
+    Label do_div, done_div;
+    if (t->encoding() != b->encoding()) {
+      (void) __ clear_reg(t, true, false); // Does no harm. Dividend is in successor.
+      if (VM_Version::has_CompareBranch()) {
+        __ z_cgij(b, -1, Assembler::bcondEqual, done_div);
+      } else {
+        __ z_cghi(b, -1);
+        __ z_bre(done_div);
+      }
+      __ z_dsgr(t, b);
+    } else {
+      if (VM_Version::has_CompareBranch()) {
+        __ z_cgij(b, -1, Assembler::bcondNotEqual, do_div);
+      } else {
+        __ z_cghi(b, -1);
+        __ z_brne(do_div);
+      }
+      __ clear_reg(t, true, false);
+      __ z_bru(done_div);
+      __ bind(do_div);
+      __ z_dsgr(t, b);
+    }
+    __ bind(done_div);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Remainder
+instruct modL_reg_imm16(revenRegL dst, iRegL src1, immL16 src2, roddRegL tmp, flagsReg cr) %{
+  match(Set dst (ModL src1 src2));
+  effect(KILL tmp, KILL cr); // R0 is killed, too.
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MODG_const  $dst,src1,$src2\t # long" %}
+  ins_encode %{
+    int divisor = $src2$$constant;
+    if (divisor != -1) {
+      __ z_lghi(Z_R0_scratch, divisor);
+      __ z_lgr($dst$$Register->successor(), $src1$$Register);
+      __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
+    } else {
+      __ clear_reg($dst$$Register, true, false);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// SHIFT
+
+// Shift left logical
+
+// Register Shift Left variable
+instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
+  match(Set dst (LShiftI src nbits));
+  effect(KILL cr); // R1 is killed, too.
+  ins_cost(3 * DEFAULT_COST);
+  size(14);
+  format %{ "SLL     $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
+  ins_encode %{
+    __ z_lgr(Z_R1_scratch, $nbits$$Register);
+    __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+    __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Immediate
+// Constant shift count is masked in ideal graph already.
+instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
+  match(Set dst (LShiftI src nbits));
+  size(6);
+  format %{ "SLL     $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
+  ins_encode %{
+    int Nbit = $nbits$$constant;
+    __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Immediate by 1bit
+instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
+  match(Set dst (LShiftI src nbits));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
+  ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long
+instruct sllL_reg_reg(iRegL dst, iRegL src1, iRegI nbits) %{
+  match(Set dst (LShiftL src1 nbits));
+  size(6);
+  format %{ "SLLG    $dst,$src1,[$nbits]" %}
+  opcode(SLLG_ZOPC);
+  ins_encode(z_rsyform_reg_reg(dst, src1, nbits));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long Immediate
+instruct sllL_reg_imm(iRegL dst, iRegL src1, immI nbits) %{
+  match(Set dst (LShiftL src1 nbits));
+  size(6);
+  format %{ "SLLG    $dst,$src1,$nbits" %}
+  opcode(SLLG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src1, nbits));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long Immediate by 1bit
+instruct sllL_reg_imm_1(iRegL dst, iRegL src1, immI_1 nbits) %{
+  match(Set dst (LShiftL src1 nbits));
+  predicate(PreferLAoverADD);
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LA      $dst,#0($src1,$src1)\t # SLLG by 1 (long)" %}
+  ins_encode %{ __ z_la($dst$$Register, 0, $src1$$Register, $src1$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Shift right arithmetic
+
+// Register Arithmetic Shift Right
+instruct sraI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (RShiftI dst src));
+  effect(KILL cr); // R1 is killed, too.
+  ins_cost(3 * DEFAULT_COST);
+  size(12);
+  format %{ "SRA     $dst,[$src] & 31" %}
+  ins_encode %{
+    __ z_lgr(Z_R1_scratch, $src$$Register);
+    __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+    __ z_sra($dst$$Register, 0, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Immediate
+// Constant shift count is masked in ideal graph already.
+instruct sraI_reg_imm(iRegI dst, immI src, flagsReg cr) %{
+  match(Set dst (RShiftI dst src));
+  effect(KILL cr);
+  size(4);
+  format %{ "SRA     $dst,$src" %}
+  ins_encode %{
+    int Nbit = $src$$constant;
+    __ z_sra($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Long
+instruct sraL_reg_reg(iRegL dst, iRegL src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (RShiftL src1 src2));
+  effect(KILL cr);
+  size(6);
+  format %{ "SRAG    $dst,$src1,[$src2]" %}
+  opcode(SRAG_ZOPC);
+  ins_encode(z_rsyform_reg_reg(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Long Immediate
+instruct sraL_reg_imm(iRegL dst, iRegL src1, immI src2, flagsReg cr) %{
+  match(Set dst (RShiftL src1 src2));
+  effect(KILL cr);
+  size(6);
+  format %{ "SRAG    $dst,$src1,$src2" %}
+  opcode(SRAG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  Shift right logical
+
+// Register Shift Right
+instruct srlI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (URShiftI dst src));
+  effect(KILL cr); // R1 is killed, too.
+  ins_cost(3 * DEFAULT_COST);
+  size(12);
+  format %{ "SRL     $dst,[$src] & 31" %}
+  ins_encode %{
+    __ z_lgr(Z_R1_scratch, $src$$Register);
+    __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+    __ z_srl($dst$$Register, 0, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate
+// Constant shift count is masked in ideal graph already.
+instruct srlI_reg_imm(iRegI dst, immI src) %{
+  match(Set dst (URShiftI dst src));
+  size(4);
+  format %{ "SRL     $dst,$src" %}
+  ins_encode %{
+    int Nbit = $src$$constant;
+    __ z_srl($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Long
+instruct srlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+  match(Set dst (URShiftL src1 src2));
+  size(6);
+  format %{ "SRLG    $dst,$src1,[$src2]" %}
+  opcode(SRLG_ZOPC);
+  ins_encode(z_rsyform_reg_reg(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Long Immediate
+instruct srlL_reg_imm(iRegL dst, iRegL src1, immI src2) %{
+  match(Set dst (URShiftL src1 src2));
+  size(6);
+  format %{ "SRLG    $dst,$src1,$src2" %}
+  opcode(SRLG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate with a CastP2X
+instruct srlP_reg_imm(iRegL dst, iRegP_N2P src1, immI src2) %{
+  match(Set dst (URShiftL (CastP2X src1) src2));
+  size(6);
+  format %{ "SRLG    $dst,$src1,$src2\t # Cast ptr $src1 to long and shift" %}
+  opcode(SRLG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Rotate Instructions------------------------------------------------
+
+// Rotate left 32bit.
+instruct rotlI_reg_immI8(iRegI dst, iRegI src, immI8 lshift, immI8 rshift) %{
+  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  size(6);
+  format %{ "RLL     $dst,$src,$lshift\t # ROTL32" %}
+  opcode(RLL_ZOPC);
+  ins_encode(z_rsyform_const(dst, src, lshift));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate left 64bit.
+instruct rotlL_reg_immI8(iRegL dst, iRegL src, immI8 lshift, immI8 rshift) %{
+  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  size(6);
+  format %{ "RLLG    $dst,$src,$lshift\t # ROTL64" %}
+  opcode(RLLG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src, lshift));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate right 32bit.
+instruct rotrI_reg_immI8(iRegI dst, iRegI src, immI8 rshift, immI8 lshift) %{
+  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "RLL     $dst,$src,$rshift\t # ROTR32" %}
+  opcode(RLL_ZOPC);
+  ins_encode(z_rsyform_const(dst, src, rshift));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate right 64bit.
+instruct rotrL_reg_immI8(iRegL dst, iRegL src, immI8 rshift, immI8 lshift) %{
+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "RLLG    $dst,$src,$rshift\t # ROTR64" %}
+  opcode(RLLG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src, rshift));
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AR      $op1,$op2\t # overflow check int" %}
+  ins_encode %{
+    __ z_lr(Z_R0_scratch, $op1$$Register);
+    __ z_ar(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "AR      $op1,$op2\t # overflow check int" %}
+  ins_encode %{
+    __ load_const_optimized(Z_R0_scratch, $op2$$constant);
+    __ z_ar(Z_R0_scratch, $op1$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "AGR     $op1,$op2\t # overflow check long" %}
+  ins_encode %{
+    __ z_lgr(Z_R0_scratch, $op1$$Register);
+    __ z_agr(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "AGR     $op1,$op2\t # overflow check long" %}
+  ins_encode %{
+    __ load_const_optimized(Z_R0_scratch, $op2$$constant);
+    __ z_agr(Z_R0_scratch, $op1$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+  match(Set cr (OverflowSubI op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "SR      $op1,$op2\t # overflow check int" %}
+  ins_encode %{
+    __ z_lr(Z_R0_scratch, $op1$$Register);
+    __ z_sr(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+  match(Set cr (OverflowSubI op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "SR      $op1,$op2\t # overflow check int" %}
+  ins_encode %{
+    __ load_const_optimized(Z_R1_scratch, $op2$$constant);
+    __ z_lr(Z_R0_scratch, $op1$$Register);
+    __ z_sr(Z_R0_scratch, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+  match(Set cr (OverflowSubL op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "SGR     $op1,$op2\t # overflow check long" %}
+  ins_encode %{
+    __ z_lgr(Z_R0_scratch, $op1$$Register);
+    __ z_sgr(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
+  match(Set cr (OverflowSubL op1 op2));
+  effect(DEF cr, USE op1, USE op2);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "SGR     $op1,$op2\t # overflow check long" %}
+  ins_encode %{
+    __ load_const_optimized(Z_R1_scratch, $op2$$constant);
+    __ z_lgr(Z_R0_scratch, $op1$$Register);
+    __ z_sgr(Z_R0_scratch, Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
+  match(Set cr (OverflowSubI zero op2));
+  effect(DEF cr, USE op2);
+  format %{ "NEG    $op2\t# overflow check int" %}
+  ins_encode %{
+    __ clear_reg(Z_R0_scratch, false, false);
+    __ z_sr(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
+  match(Set cr (OverflowSubL zero op2));
+  effect(DEF cr, USE op2);
+  format %{ "NEGG    $op2\t# overflow check long" %}
+  ins_encode %{
+    __ clear_reg(Z_R0_scratch, true, false);
+    __ z_sgr(Z_R0_scratch, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// No intrinsics for multiplication, since there is no easy way
+// to check for overflow.
+
+
+//----------Floating Point Arithmetic Instructions-----------------------------
+
+//  ADD
+
+//  Add float single precision
+instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
+  match(Set dst (AddF dst src));
+  effect(KILL cr);
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "AEBR     $dst,$src" %}
+  opcode(AEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{
+  match(Set dst (AddF dst (LoadF src)));
+  effect(KILL cr);
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "AEB      $dst,$src\t # floatMemory" %}
+  opcode(AEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Add float double precision
+instruct addD_reg_reg(regD dst, regD src, flagsReg cr) %{
+  match(Set dst (AddD dst src));
+  effect(KILL cr);
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "ADBR     $dst,$src" %}
+  opcode(ADBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct addD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{
+  match(Set dst (AddD dst (LoadD src)));
+  effect(KILL cr);
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "ADB      $dst,$src\t # doubleMemory" %}
+  opcode(ADB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// SUB
+
+// Sub float single precision
+instruct subF_reg_reg(regF dst, regF src, flagsReg cr) %{
+  match(Set dst (SubF dst src));
+  effect(KILL cr);
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "SEBR     $dst,$src" %}
+  opcode(SEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{
+  match(Set dst (SubF dst (LoadF src)));
+  effect(KILL cr);
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "SEB      $dst,$src\t # floatMemory" %}
+  opcode(SEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  Sub float double precision
+instruct subD_reg_reg(regD dst, regD src, flagsReg cr) %{
+  match(Set dst (SubD dst src));
+  effect(KILL cr);
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "SDBR     $dst,$src" %}
+  opcode(SDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct subD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{
+  match(Set dst (SubD dst (LoadD src)));
+  effect(KILL cr);
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "SDB      $dst,$src\t # doubleMemory" %}
+  opcode(SDB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// MUL
+
+// Mul float single precision
+instruct mulF_reg_reg(regF dst, regF src) %{
+  match(Set dst (MulF dst src));
+  // CC unchanged by MUL.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "MEEBR    $dst,$src" %}
+  opcode(MEEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulF_reg_mem(regF dst, memoryRX src)%{
+  match(Set dst (MulF dst (LoadF src)));
+  // CC unchanged by MUL.
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "MEEB     $dst,$src\t # floatMemory" %}
+  opcode(MEEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  Mul float double precision
+instruct mulD_reg_reg(regD dst, regD src) %{
+  match(Set dst (MulD dst src));
+  // CC unchanged by MUL.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "MDBR     $dst,$src" %}
+  opcode(MDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulD_reg_mem(regD dst, memoryRX src)%{
+  match(Set dst (MulD dst (LoadD src)));
+  // CC unchanged by MUL.
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "MDB      $dst,$src\t # doubleMemory" %}
+  opcode(MDB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  DIV
+
+//  Div float single precision
+instruct divF_reg_reg(regF dst, regF src) %{
+  match(Set dst (DivF dst src));
+  // CC unchanged by DIV.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "DEBR     $dst,$src" %}
+  opcode(DEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct divF_reg_mem(regF dst, memoryRX src)%{
+  match(Set dst (DivF dst (LoadF src)));
+  // CC unchanged by DIV.
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "DEB      $dst,$src\t # floatMemory" %}
+  opcode(DEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  Div float double precision
+instruct divD_reg_reg(regD dst, regD src) %{
+  match(Set dst (DivD dst src));
+  // CC unchanged by DIV.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "DDBR     $dst,$src" %}
+  opcode(DDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct divD_reg_mem(regD dst, memoryRX src)%{
+  match(Set dst (DivD dst (LoadD src)));
+  // CC unchanged by DIV.
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "DDB      $dst,$src\t # doubleMemory" %}
+  opcode(DDB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ABS
+
+// Absolute float single precision
+instruct absF_reg(regF dst, regF src, flagsReg cr) %{
+  match(Set dst (AbsF src));
+  effect(KILL cr);
+  size(4);
+  format %{ "LPEBR    $dst,$src\t float" %}
+  opcode(LPEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Absolute float double precision
+instruct absD_reg(regD dst, regD src, flagsReg cr) %{
+  match(Set dst (AbsD src));
+  effect(KILL cr);
+  size(4);
+  format %{ "LPDBR    $dst,$src\t double" %}
+  opcode(LPDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  NEG(ABS)
+
+// Negative absolute float single precision
+instruct nabsF_reg(regF dst, regF src, flagsReg cr) %{
+  match(Set dst (NegF (AbsF src)));
+  effect(KILL cr);
+  size(4);
+  format %{ "LNEBR    $dst,$src\t float" %}
+  opcode(LNEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Negative absolute float double precision
+instruct nabsD_reg(regD dst, regD src, flagsReg cr) %{
+  match(Set dst (NegD (AbsD src)));
+  effect(KILL cr);
+  size(4);
+  format %{ "LNDBR    $dst,$src\t double" %}
+  opcode(LNDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// NEG
+
+instruct negF_reg(regF dst, regF src, flagsReg cr) %{
+  match(Set dst (NegF src));
+  effect(KILL cr);
+  size(4);
+  format %{ "NegF     $dst,$src\t float" %}
+  ins_encode %{ __ z_lcebr($dst$$FloatRegister, $src$$FloatRegister); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct negD_reg(regD dst, regD src, flagsReg cr) %{
+  match(Set dst (NegD src));
+  effect(KILL cr);
+  size(4);
+  format %{ "NegD     $dst,$src\t double" %}
+  ins_encode %{ __ z_lcdbr($dst$$FloatRegister, $src$$FloatRegister); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// SQRT
+
+// Sqrt float precision
+instruct sqrtF_reg(regF dst, regF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  // CC remains unchanged.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "SQEBR    $dst,$src" %}
+  opcode(SQEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Sqrt double precision
+instruct sqrtD_reg(regD dst, regD src) %{
+  match(Set dst (SqrtD src));
+  // CC remains unchanged.
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "SQDBR    $dst,$src" %}
+  opcode(SQDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct sqrtF_mem(regF dst, memoryRX src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  // CC remains unchanged.
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "SQEB     $dst,$src\t # floatMemory" %}
+  opcode(SQEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct sqrtD_mem(regD dst, memoryRX src) %{
+  match(Set dst (SqrtD src));
+  // CC remains unchanged.
+  ins_cost(ALU_MEMORY_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "SQDB     $dst,$src\t # doubleMemory" %}
+  opcode(SQDB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Logical Instructions-----------------------------------------------
+
+// Register And
+instruct andI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  size(2);
+  format %{ "NR      $dst,$src\t # int" %}
+  opcode(NR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+  match(Set dst (AndI dst (LoadI src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "N(Y)    $dst, $src\t # int" %}
+  opcode(NY_ZOPC, N_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate And
+instruct andI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "NILF    $dst,$src" %}
+  opcode(NILF_ZOPC);
+  ins_encode(z_rilform_unsigned(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_reg_uimmI_LH1(iRegI dst, uimmI_LH1 src, flagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NILH    $dst,$src" %}
+  ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_reg_uimmI_LL1(iRegI dst, uimmI_LL1 src, flagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NILL    $dst,$src" %}
+  ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register And Long
+instruct andL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NGR     $dst,$src\t # long" %}
+  opcode(NGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (AndL dst (LoadL src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "NG      $dst, $src\t # long" %}
+  opcode(NG_ZOPC, NG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_LL1(iRegL dst, uimmL_LL1 src, flagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NILL    $dst,$src\t # long" %}
+  ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_LH1(iRegL dst, uimmL_LH1 src, flagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NILH    $dst,$src\t # long" %}
+  ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_HL1(iRegL dst, uimmL_HL1 src, flagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NIHL    $dst,$src\t # long" %}
+  ins_encode %{ __ z_nihl($dst$$Register, ($src$$constant >> 32) & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_HH1(iRegL dst, uimmL_HH1 src, flagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "NIHH    $dst,$src\t # long" %}
+  ins_encode %{ __ z_nihh($dst$$Register, ($src$$constant >> 48) & 0xFFFF); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  OR
+
+// Or Instructions
+// Register Or
+instruct orI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (OrI dst src));
+  effect(KILL cr);
+  size(2);
+  format %{ "OR      $dst,$src" %}
+  opcode(OR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct orI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+  match(Set dst (OrI dst (LoadI src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "O(Y)    $dst, $src\t # int" %}
+  opcode(OY_ZOPC, O_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Or
+instruct orI_reg_uimm16(iRegI dst, uimmI16 con, flagsReg cr) %{
+  match(Set dst (OrI dst con));
+  effect(KILL cr);
+  size(4);
+  format %{ "OILL    $dst,$con" %}
+  opcode(OILL_ZOPC);
+  ins_encode(z_riform_unsigned(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct orI_reg_uimm32(iRegI dst, uimmI con, flagsReg cr) %{
+  match(Set dst (OrI dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "OILF    $dst,$con" %}
+  opcode(OILF_ZOPC);
+  ins_encode(z_rilform_unsigned(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Or Long
+instruct orL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+  match(Set dst (OrL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "OGR      $dst,$src\t # long" %}
+  opcode(OGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct orL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (OrL dst (LoadL src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "OG      $dst, $src\t # long" %}
+  opcode(OG_ZOPC, OG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Or long
+instruct orL_reg_uimm16(iRegL dst, uimmL16 con, flagsReg cr) %{
+  match(Set dst (OrL dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "OILL    $dst,$con\t # long" %}
+  opcode(OILL_ZOPC);
+  ins_encode(z_riform_unsigned(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct orL_reg_uimm32(iRegI dst, uimmL32 con, flagsReg cr) %{
+  match(Set dst (OrI dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "OILF    $dst,$con\t # long" %}
+  opcode(OILF_ZOPC);
+  ins_encode(z_rilform_unsigned(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// XOR
+
+// Register Xor
+instruct xorI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (XorI dst src));
+  effect(KILL cr);
+  size(2);
+  format %{ "XR      $dst,$src" %}
+  opcode(XR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct xorI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+  match(Set dst (XorI dst (LoadI src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "X(Y)    $dst, $src\t # int" %}
+  opcode(XY_ZOPC, X_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Xor
+instruct xorI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{
+  match(Set dst (XorI dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "XILF    $dst,$src" %}
+  opcode(XILF_ZOPC);
+  ins_encode(z_rilform_unsigned(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+  match(Set dst (XorL dst src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "XGR     $dst,$src\t # long" %}
+  opcode(XGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct xorL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set dst (XorL dst (LoadL src)));
+  effect(KILL cr);
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "XG      $dst, $src\t # long" %}
+  opcode(XG_ZOPC, XG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Xor Long
+instruct xorL_reg_uimm32(iRegL dst, uimmL32 con, flagsReg cr) %{
+  match(Set dst (XorL dst con));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_HIGH);
+  size(6);
+  format %{ "XILF    $dst,$con\t # long" %}
+  opcode(XILF_ZOPC);
+  ins_encode(z_rilform_unsigned(dst,con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Convert to Boolean-------------------------------------------------
+
+// Convert integer to boolean.
+instruct convI2B(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+  ins_cost(3 * DEFAULT_COST);
+  size(6);
+  format %{ "convI2B $dst,$src" %}
+  ins_encode %{
+    __ z_lnr($dst$$Register, $src$$Register);  // Rdst := -|Rsrc|, i.e. Rdst == 0 <=> Rsrc == 0
+    __ z_srl($dst$$Register, 31);              // Rdst := sign(Rdest)
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convP2B(iRegI dst, iRegP_N2P src, flagsReg cr) %{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+  ins_cost(3 * DEFAULT_COST);
+  size(10);
+  format %{ "convP2B $dst,$src" %}
+  ins_encode %{
+    __ z_lngr($dst$$Register, $src$$Register);     // Rdst := -|Rsrc| i.e. Rdst == 0 <=> Rsrc == 0
+    __ z_srlg($dst$$Register, $dst$$Register, 63); // Rdst := sign(Rdest)
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpLTMask_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (CmpLTMask dst src));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST);
+  size(18);
+  format %{ "Set $dst CmpLTMask $dst,$src" %}
+  ins_encode %{
+    // Avoid signed 32 bit overflow: Do sign extend and sub 64 bit.
+    __ z_lgfr(Z_R0_scratch, $src$$Register);
+    __ z_lgfr($dst$$Register, $dst$$Register);
+    __ z_sgr($dst$$Register, Z_R0_scratch);
+    __ z_srag($dst$$Register, $dst$$Register, 63);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpLTMask_reg_zero(iRegI dst, immI_0 zero, flagsReg cr) %{
+  match(Set dst (CmpLTMask dst zero));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "Set $dst CmpLTMask $dst,$zero" %}
+  ins_encode %{ __ z_sra($dst$$Register, 31); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Arithmetic Conversion Instructions---------------------------------
+// The conversions operations are all Alpha sorted. Please keep it that way!
+
+instruct convD2F_reg(regF dst, regD src) %{
+  match(Set dst (ConvD2F src));
+  // CC remains unchanged.
+  size(4);
+  format %{ "LEDBR   $dst,$src" %}
+  opcode(LEDBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2I_reg(iRegI dst, regF src, flagsReg cr) %{
+  match(Set dst (ConvF2I src));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(16);
+  format %{ "convF2I  $dst,$src" %}
+  ins_encode %{
+    Label done;
+    __ clear_reg($dst$$Register, false, false);  // Initialize with result for unordered: 0.
+    __ z_cebr($src$$FloatRegister, $src$$FloatRegister);   // Round.
+    __ z_brno(done);                             // Result is zero if unordered argument.
+    __ z_cfebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convD2I_reg(iRegI dst, regD src, flagsReg cr) %{
+  match(Set dst (ConvD2I src));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(16);
+  format %{ "convD2I  $dst,$src" %}
+  ins_encode %{
+    Label done;
+    __ clear_reg($dst$$Register, false, false);  // Initialize with result for unordered: 0.
+    __ z_cdbr($src$$FloatRegister, $src$$FloatRegister);   // Round.
+    __ z_brno(done);                             // Result is zero if unordered argument.
+    __ z_cfdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2L_reg(iRegL dst, regF src, flagsReg cr) %{
+  match(Set dst (ConvF2L src));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(16);
+  format %{ "convF2L  $dst,$src" %}
+  ins_encode %{
+    Label done;
+    __ clear_reg($dst$$Register, true, false);  // Initialize with result for unordered: 0.
+    __ z_cebr($src$$FloatRegister, $src$$FloatRegister);   // Round.
+    __ z_brno(done);                             // Result is zero if unordered argument.
+    __ z_cgebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convD2L_reg(iRegL dst, regD src, flagsReg cr) %{
+  match(Set dst (ConvD2L src));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  size(16);
+  format %{ "convD2L  $dst,$src" %}
+  ins_encode %{
+    Label done;
+    __ clear_reg($dst$$Register, true, false);  // Initialize with result for unordered: 0.
+    __ z_cdbr($src$$FloatRegister, $src$$FloatRegister);   // Round.
+    __ z_brno(done);                             // Result is zero if unordered argument.
+    __ z_cgdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
+  match(Set dst (ConvF2D src));
+  // CC remains unchanged.
+  size(4);
+  format %{ "LDEBR   $dst,$src" %}
+  opcode(LDEBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2D_mem(regD dst, memoryRX src) %{
+  match(Set dst (ConvF2D src));
+  // CC remains unchanged.
+  size(6);
+  format %{ "LDEB    $dst,$src" %}
+  opcode(LDEB_ZOPC);
+  ins_encode(z_form_rt_memFP(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convI2D_reg(regD dst, iRegI src) %{
+  match(Set dst (ConvI2D src));
+  // CC remains unchanged.
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "CDFBR   $dst,$src" %}
+  opcode(CDFBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Optimization that saves up to two memory operations for each conversion.
+instruct convI2F_ireg(regF dst, iRegI src) %{
+  match(Set dst (ConvI2F src));
+  // CC remains unchanged.
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "CEFBR   $dst,$src\t # convert int to float" %}
+  opcode(CEFBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convI2L_reg(iRegL dst, iRegI src) %{
+  match(Set dst (ConvI2L src));
+  size(4);
+  format %{ "LGFR    $dst,$src\t # int->long" %}
+  opcode(LGFR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend convert int to long.
+instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask) %{
+  match(Set dst (AndL (ConvI2L src) mask));
+  size(4);
+  format %{ "LLGFR   $dst, $src \t # zero-extend int to long" %}
+  ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend convert int to long.
+instruct convI2L_mem_zex(iRegL dst, memory src, immL_32bits mask) %{
+  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
+  // Uses load_const_optmized, so size can vary.
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "LLGF    $dst, $src \t # zero-extend int to long" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend long
+instruct zeroExtend_long(iRegL dst, iRegL src, immL_32bits mask) %{
+  match(Set dst (AndL src mask));
+  size(4);
+  format %{ "LLGFR   $dst, $src \t # zero-extend long to long" %}
+  ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct rShiftI16_lShiftI16_reg(iRegI dst, iRegI src, immI_16 amount) %{
+  match(Set dst (RShiftI (LShiftI src amount) amount));
+  size(4);
+  format %{ "LHR     $dst,$src\t short->int" %}
+  opcode(LHR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct rShiftI24_lShiftI24_reg(iRegI dst, iRegI src, immI_24 amount) %{
+  match(Set dst (RShiftI (LShiftI src amount) amount));
+  size(4);
+  format %{ "LBR     $dst,$src\t byte->int" %}
+  opcode(LBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{
+  match(Set dst (MoveF2I src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "L       $dst,$src\t # MoveF2I" %}
+  opcode(L_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// javax.imageio.stream.ImageInputStreamImpl.toFloats([B[FII)
+instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+  match(Set dst (MoveI2F src));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LE      $dst,$src\t # MoveI2F" %}
+  opcode(LE_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{
+  match(Set dst (MoveD2L src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "LG      $src,$dst\t # MoveD2L" %}
+  opcode(LG_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+  match(Set dst (MoveL2D src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "LD      $dst,$src\t # MoveL2D" %}
+  opcode(LD_ZOPC);
+  ins_encode(z_form_rt_mem(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+  match(Set dst (MoveI2F src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "ST      $src,$dst\t # MoveI2F" %}
+  opcode(ST_ZOPC);
+  ins_encode(z_form_rt_mem(src, dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STD     $src,$dst\t # MoveD2L" %}
+  opcode(STD_ZOPC);
+  ins_encode(z_form_rt_mem(src,dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+  match(Set dst (MoveL2D src));
+  ins_cost(MEMORY_REF_COST);
+  size(6);
+  format %{ "STG     $src,$dst\t # MoveL2D" %}
+  opcode(STG_ZOPC);
+  ins_encode(z_form_rt_mem(src,dst));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2F_reg(regF dst, iRegL src) %{
+  match(Set dst (ConvL2F src));
+  // CC remains unchanged.
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "CEGBR   $dst,$src" %}
+  opcode(CEGBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2D_reg(regD dst, iRegL src) %{
+  match(Set dst (ConvL2D src));
+  // CC remains unchanged.
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "CDGBR   $dst,$src" %}
+  opcode(CDGBR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2I_reg(iRegI dst, iRegL src) %{
+  match(Set dst (ConvL2I src));
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "LR      $dst,$src\t # long->int (if needed)" %}
+  ins_encode %{ __ lr_if_needed($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate
+instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt, flagsReg cr) %{
+  match(Set dst (ConvL2I (RShiftL src cnt)));
+  effect(KILL cr);
+  size(6);
+  format %{ "SRAG    $dst,$src,$cnt" %}
+  opcode(SRAG_ZOPC);
+  ins_encode(z_rsyform_const(dst, src, cnt));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------TRAP based zero checks and range checks----------------------------
+
+// SIGTRAP based implicit range checks in compiled code.
+// A range check in the ideal world has one of the following shapes:
+//   - (If le (CmpU length index)), (IfTrue  throw exception)
+//   - (If lt (CmpU index length)), (IfFalse throw exception)
+//
+// Match range check 'If le (CmpU length index)'
+instruct rangeCheck_iReg_uimmI16(cmpOpT cmp, iRegI length, uimmI16 index, label labl) %{
+  match(If cmp (CmpU length index));
+  effect(USE labl);
+  predicate(TrapBasedRangeChecks &&
+            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
+            PROB_UNLIKELY(_leaf->as_If ()->_prob) >= PROB_ALWAYS &&
+            Matcher::branches_to_uncommon_trap(_leaf));
+  ins_cost(1);
+  // TODO: s390 port size(FIXED_SIZE);
+
+  ins_is_TrapBasedCheckNode(true);
+
+  format %{ "RangeCheck len=$length cmp=$cmp idx=$index => trap $labl" %}
+  ins_encode %{ __ z_clfit($length$$Register, $index$$constant, $cmp$$cmpcode); %}
+  ins_pipe(pipe_class_trap);
+%}
+
+// Match range check 'If lt (CmpU index length)'
+instruct rangeCheck_iReg_iReg(cmpOpT cmp, iRegI index, iRegI length, label labl, flagsReg cr) %{
+  match(If cmp (CmpU index length));
+  effect(USE labl, KILL cr);
+  predicate(TrapBasedRangeChecks &&
+            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
+            _leaf->as_If ()->_prob >= PROB_ALWAYS &&
+            Matcher::branches_to_uncommon_trap(_leaf));
+  ins_cost(1);
+  // TODO: s390 port size(FIXED_SIZE);
+
+  ins_is_TrapBasedCheckNode(true);
+
+  format %{ "RangeCheck idx=$index cmp=$cmp len=$length => trap $labl" %}
+  ins_encode %{ __ z_clrt($index$$Register, $length$$Register, $cmp$$cmpcode); %}
+  ins_pipe(pipe_class_trap);
+%}
+
+// Match range check 'If lt (CmpU index length)'
+instruct rangeCheck_uimmI16_iReg(cmpOpT cmp, iRegI index, uimmI16 length, label labl) %{
+  match(If cmp (CmpU index length));
+  effect(USE labl);
+  predicate(TrapBasedRangeChecks &&
+            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
+            _leaf->as_If ()->_prob >= PROB_ALWAYS &&
+            Matcher::branches_to_uncommon_trap(_leaf));
+  ins_cost(1);
+  // TODO: s390 port size(FIXED_SIZE);
+
+  ins_is_TrapBasedCheckNode(true);
+
+  format %{ "RangeCheck idx=$index cmp=$cmp len= $length => trap $labl" %}
+  ins_encode %{ __ z_clfit($index$$Register, $length$$constant, $cmp$$cmpcode); %}
+  ins_pipe(pipe_class_trap);
+%}
+
+// Implicit zero checks (more implicit null checks).
+instruct zeroCheckP_iReg_imm0(cmpOpT cmp, iRegP_N2P value, immP0 zero, label labl) %{
+  match(If cmp (CmpP value zero));
+  effect(USE labl);
+  predicate(TrapBasedNullChecks &&
+            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
+            _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) &&
+            Matcher::branches_to_uncommon_trap(_leaf));
+  size(6);
+
+  ins_is_TrapBasedCheckNode(true);
+
+  format %{ "ZeroCheckP value=$value cmp=$cmp zero=$zero => trap $labl" %}
+  ins_encode %{ __ z_cgit($value$$Register, 0, $cmp$$cmpcode); %}
+  ins_pipe(pipe_class_trap);
+%}
+
+// Implicit zero checks (more implicit null checks).
+instruct zeroCheckN_iReg_imm0(cmpOpT cmp, iRegN_P2N value, immN0 zero, label labl) %{
+  match(If cmp (CmpN value zero));
+  effect(USE labl);
+  predicate(TrapBasedNullChecks &&
+            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
+            _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) &&
+            Matcher::branches_to_uncommon_trap(_leaf));
+  size(6);
+
+  ins_is_TrapBasedCheckNode(true);
+
+  format %{ "ZeroCheckN value=$value cmp=$cmp zero=$zero => trap $labl" %}
+  ins_encode %{ __ z_cit($value$$Register, 0, $cmp$$cmpcode); %}
+  ins_pipe(pipe_class_trap);
+%}
+
+//----------Compare instructions-----------------------------------------------
+
+// INT signed
+
+// Compare Integers
+instruct compI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+  match(Set cr (CmpI op1 op2));
+  size(2);
+  format %{ "CR      $op1,$op2" %}
+  opcode(CR_ZOPC);
+  ins_encode(z_rrform(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+  match(Set cr (CmpI op1 op2));
+  size(6);
+  format %{ "CFI     $op1,$op2" %}
+  opcode(CFI_ZOPC);
+  ins_encode(z_rilform_signed(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm16(flagsReg cr, iRegI op1, immI16 op2) %{
+  match(Set cr (CmpI op1 op2));
+  size(4);
+  format %{ "CHI     $op1,$op2" %}
+  opcode(CHI_ZOPC);
+  ins_encode(z_riform_signed(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{
+  match(Set cr (CmpI op1 zero));
+  ins_cost(DEFAULT_COST_LOW);
+  size(2);
+  format %{ "LTR     $op1,$op1" %}
+  opcode(LTR_ZOPC);
+  ins_encode(z_rrform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_mem(flagsReg cr, iRegI op1, memory op2)%{
+  match(Set cr (CmpI op1 (LoadI op2)));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "C(Y)    $op1, $op2\t # int" %}
+  opcode(CY_ZOPC, C_ZOPC);
+  ins_encode(z_form_rt_mem_opt(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// INT unsigned
+
+instruct compU_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+  match(Set cr (CmpU op1 op2));
+  size(2);
+  format %{ "CLR     $op1,$op2\t # unsigned" %}
+  opcode(CLR_ZOPC);
+  ins_encode(z_rrform(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_uimm(flagsReg cr, iRegI op1, uimmI op2) %{
+  match(Set cr (CmpU op1 op2));
+  size(6);
+  format %{ "CLFI    $op1,$op2\t # unsigned" %}
+  opcode(CLFI_ZOPC);
+  ins_encode(z_rilform_unsigned(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{
+  match(Set cr (CmpU op1 zero));
+  ins_cost(DEFAULT_COST_LOW);
+  size(2);
+  format %{ "LTR     $op1,$op1\t # unsigned" %}
+  opcode(LTR_ZOPC);
+  ins_encode(z_rrform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_mem(flagsReg cr, iRegI op1, memory op2)%{
+  match(Set cr (CmpU op1 (LoadI op2)));
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CL(Y)   $op1, $op2\t # unsigned" %}
+  opcode(CLY_ZOPC, CL_ZOPC);
+  ins_encode(z_form_rt_mem_opt(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// LONG signed
+
+instruct compL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+  match(Set cr (CmpL op1 op2));
+  size(4);
+  format %{ "CGR     $op1,$op2\t # long" %}
+  opcode(CGR_ZOPC);
+  ins_encode(z_rreform(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_regI(flagsReg cr, iRegL op1, iRegI op2) %{
+  match(Set cr (CmpL op1 (ConvI2L op2)));
+  size(4);
+  format %{ "CGFR    $op1,$op2\t # long/int" %}
+  opcode(CGFR_ZOPC);
+  ins_encode(z_rreform(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm32(flagsReg cr, iRegL op1, immL32 con) %{
+  match(Set cr (CmpL op1 con));
+  size(6);
+  format %{ "CGFI    $op1,$con" %}
+  opcode(CGFI_ZOPC);
+  ins_encode(z_rilform_signed(op1, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm16(flagsReg cr, iRegL op1, immL16 con) %{
+  match(Set cr (CmpL op1 con));
+  size(4);
+  format %{ "CGHI    $op1,$con" %}
+  opcode(CGHI_ZOPC);
+  ins_encode(z_riform_signed(op1, con));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm0(flagsReg cr, iRegL op1, immL_0 con) %{
+  match(Set cr (CmpL op1 con));
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LTGR    $op1,$op1" %}
+  opcode(LTGR_ZOPC);
+  ins_encode(z_rreform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_conv_reg_imm0(flagsReg cr, iRegI op1, immL_0 con) %{
+  match(Set cr (CmpL (ConvI2L op1) con));
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LTGFR    $op1,$op1" %}
+  opcode(LTGFR_ZOPC);
+  ins_encode(z_rreform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_mem(iRegL dst, memory src, flagsReg cr)%{
+  match(Set cr (CmpL dst (LoadL src)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "CG      $dst, $src\t # long" %}
+  opcode(CG_ZOPC, CG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_memI(iRegL dst, memory src, flagsReg cr)%{
+  match(Set cr (CmpL dst (ConvI2L (LoadI src))));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "CGF     $dst, $src\t # long/int" %}
+  opcode(CGF_ZOPC, CGF_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//  LONG unsigned
+
+//  PTR unsigned
+
+instruct compP_reg_reg(flagsReg cr, iRegP_N2P op1, iRegP_N2P op2) %{
+  match(Set cr (CmpP op1 op2));
+  size(4);
+  format %{ "CLGR    $op1,$op2\t # ptr" %}
+  opcode(CLGR_ZOPC);
+  ins_encode(z_rreform(op1, op2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compP_reg_imm0(flagsReg cr, iRegP_N2P op1, immP0 op2) %{
+  match(Set cr (CmpP op1 op2));
+  ins_cost(DEFAULT_COST_LOW);
+  size(4);
+  format %{ "LTGR    $op1, $op1\t # ptr" %}
+  opcode(LTGR_ZOPC);
+  ins_encode(z_rreform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Don't use LTGFR which performs sign extend.
+instruct compP_decode_reg_imm0(flagsReg cr, iRegN op1, immP0 op2) %{
+  match(Set cr (CmpP (DecodeN op1) op2));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  ins_cost(DEFAULT_COST_LOW);
+  size(2);
+  format %{ "LTR    $op1, $op1\t # ptr" %}
+  opcode(LTR_ZOPC);
+  ins_encode(z_rrform(op1, op1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct compP_reg_mem(iRegP dst, memory src, flagsReg cr)%{
+  match(Set cr (CmpP dst (LoadP src)));
+  ins_cost(MEMORY_REF_COST);
+  size(Z_DISP3_SIZE);
+  format %{ "CLG     $dst, $src\t # ptr" %}
+  opcode(CLG_ZOPC, CLG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Max and Min--------------------------------------------------------
+
+// Max Register with Register
+instruct z196_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) {   // Rdst preset with src1.
+      __ z_cr(Rsrc1, Rsrc2);      // Move src2 only if src1 is NotLow.
+      __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow);
+    } else if (Rdst == Rsrc2) {   // Rdst preset with src2.
+      __ z_cr(Rsrc2, Rsrc1);      // Move src1 only if src2 is NotLow.
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondNotLow);
+    } else {
+      // Rdst is disjoint from operands, move in either case.
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Min Register with Register.
+instruct z10_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI (z10 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+    Label done;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) {
+      __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done);
+      __ z_lgfr(Rdst, Rsrc2);
+    } else if (Rdst == Rsrc2) {
+      __ z_crj(Rsrc2, Rsrc1, Assembler::bcondLow, done);
+      __ z_lgfr(Rdst, Rsrc1);
+    } else {
+      __ z_lgfr(Rdst, Rsrc1);
+      __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done);
+      __ z_lgfr(Rdst, Rsrc2);
+    }
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(!VM_Version::has_CompareBranch());
+  ins_cost(3 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+    Label done;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) {
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_brl(done);
+      __ z_lgfr(Rdst, Rsrc2);
+    } else if (Rdst == Rsrc2) {
+      __ z_cr(Rsrc2, Rsrc1);
+      __ z_brl(done);
+      __ z_lgfr(Rdst, Rsrc1);
+    } else {
+      __ z_lgfr(Rdst, Rsrc1);
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_brl(done);
+      __ z_lgfr(Rdst, Rsrc2);
+    }
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI const32 (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    int      Isrc2 = $src2$$constant;
+
+    if (Rdst == Rsrc1) {
+      __ load_const_optimized(Z_R0_scratch, Isrc2);
+      __ z_cfi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow);
+    } else {
+      __ load_const_optimized(Rdst, Isrc2);
+      __ z_cfi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI const32" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_cfi($src1$$Register, $src2$$constant);
+    __ z_brl(done);
+    __ z_lgfi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI const16 (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    int      Isrc2 = $src2$$constant;
+
+    if (Rdst == Rsrc1) {
+      __ load_const_optimized(Z_R0_scratch, Isrc2);
+      __ z_chi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow);
+    } else {
+      __ load_const_optimized(Rdst, Isrc2);
+      __ z_chi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI const16" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_chi($src1$$Register, $src2$$constant);
+    __ z_brl(done);
+    __ z_lghi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z10_minI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MinI $dst $src1,$src2\t MinI const8 (z10 only)" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondLow, done);
+    __ z_lghi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Max Register with Register
+instruct z196_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) { // Rdst preset with src1.
+      __ z_cr(Rsrc1, Rsrc2);    // Move src2 only if src1 is NotHigh.
+      __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh);
+    } else if (Rdst == Rsrc2) { // Rdst preset with src2.
+      __ z_cr(Rsrc2, Rsrc1);    // Move src1 only if src2 is NotHigh.
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondNotHigh);
+    } else {                    // Rdst is disjoint from operands, move in either case.
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Max Register with Register
+instruct z10_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI (z10 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+    Label done;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) {
+      __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done);
+      __ z_lgfr(Rdst, Rsrc2);
+    } else if (Rdst == Rsrc2) {
+      __ z_crj(Rsrc2, Rsrc1, Assembler::bcondHigh, done);
+      __ z_lgfr(Rdst, Rsrc1);
+    } else {
+      __ z_lgfr(Rdst, Rsrc1);
+      __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done);
+      __ z_lgfr(Rdst, Rsrc2);
+    }
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(!VM_Version::has_CompareBranch());
+  ins_cost(3 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+    Label done;
+
+    if (Rsrc1 == Rsrc2) {
+      if (Rdst != Rsrc1) {
+        __ z_lgfr(Rdst, Rsrc1);
+      }
+    } else if (Rdst == Rsrc1) {
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_brh(done);
+      __ z_lgfr(Rdst, Rsrc2);
+    } else if (Rdst == Rsrc2) {
+      __ z_cr(Rsrc2, Rsrc1);
+      __ z_brh(done);
+      __ z_lgfr(Rdst, Rsrc1);
+    } else {
+      __ z_lgfr(Rdst, Rsrc1);
+      __ z_cr(Rsrc1, Rsrc2);
+      __ z_brh(done);
+      __ z_lgfr(Rdst, Rsrc2);
+    }
+
+    __ bind(done);
+  %}
+
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI const32 (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    int      Isrc2 = $src2$$constant;
+
+    if (Rdst == Rsrc1) {
+      __ load_const_optimized(Z_R0_scratch, Isrc2);
+      __ z_cfi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh);
+    } else {
+      __ load_const_optimized(Rdst, Isrc2);
+      __ z_cfi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI const32" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_cfi($src1$$Register, $src2$$constant);
+    __ z_brh(done);
+    __ z_lgfi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_LoadStoreConditional());
+  ins_cost(3 * DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI const16 (z196 only)" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    int      Isrc2 = $src2$$constant;
+    if (Rdst == Rsrc1) {
+      __ load_const_optimized(Z_R0_scratch, Isrc2);
+      __ z_chi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh);
+    } else {
+      __ load_const_optimized(Rdst, Isrc2);
+      __ z_chi(Rsrc1, Isrc2);
+      __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI const16" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_chi($src1$$Register, $src2$$constant);
+    __ z_brh(done);
+    __ z_lghi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct z10_maxI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(DEFAULT_COST + BRANCH_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MaxI $dst $src1,$src2\t MaxI const8" %}
+  ins_encode %{
+    Label done;
+    if ($dst$$Register != $src1$$Register) {
+      __ z_lgfr($dst$$Register, $src1$$Register);
+    }
+    __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondHigh, done);
+    __ z_lghi($dst$$Register, $src2$$constant);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Abs---------------------------------------------------------------
+
+instruct absI_reg(iRegI dst, iRegI src, flagsReg cr) %{
+  match(Set dst (AbsI src));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LPR     $dst, $src" %}
+  opcode(LPR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct negabsI_reg(iRegI dst, iRegI src, immI_0 zero, flagsReg cr) %{
+  match(Set dst (SubI zero (AbsI src)));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST_LOW);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LNR     $dst, $src" %}
+  opcode(LNR_ZOPC);
+  ins_encode(z_rrform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Float Compares----------------------------------------------------
+
+// Compare floating, generate condition code.
+instruct cmpF_cc(flagsReg cr, regF src1, regF src2) %{
+  match(Set cr (CmpF src1 src2));
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "FCMPcc   $src1,$src2\t # float" %}
+  ins_encode %{ __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_cc(flagsReg cr, regD src1, regD src2) %{
+  match(Set cr (CmpD src1 src2));
+  ins_cost(ALU_REG_COST);
+  size(4);
+  format %{ "FCMPcc   $src1,$src2 \t # double" %}
+  ins_encode %{ __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpF_cc_mem(flagsReg cr, regF src1, memoryRX src2) %{
+  match(Set cr (CmpF src1 (LoadF src2)));
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "FCMPcc_mem $src1,$src2\t # floatMemory" %}
+  opcode(CEB_ZOPC);
+  ins_encode(z_form_rt_memFP(src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_cc_mem(flagsReg cr, regD src1, memoryRX src2) %{
+  match(Set cr (CmpD src1 (LoadD src2)));
+  ins_cost(ALU_MEMORY_COST);
+  size(6);
+  format %{ "DCMPcc_mem $src1,$src2\t # doubleMemory" %}
+  opcode(CDB_ZOPC);
+  ins_encode(z_form_rt_memFP(src1, src2));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Compare floating, generate condition code
+instruct cmpF0_cc(flagsReg cr, regF src1, immFpm0 src2) %{
+  match(Set cr (CmpF src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "LTEBR    $src1,$src1\t # float" %}
+  opcode(LTEBR_ZOPC);
+  ins_encode(z_rreform(src1, src1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD0_cc(flagsReg cr, regD src1, immDpm0 src2) %{
+  match(Set cr (CmpD src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "LTDBR    $src1,$src1 \t # double" %}
+  opcode(LTDBR_ZOPC);
+  ins_encode(z_rreform(src1, src1));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg cr) %{
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+  size(24);
+  format %{ "CmpF3    $dst,$src1,$src2" %}
+  ins_encode %{
+    // compare registers
+    __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister);
+    // Convert condition code into -1,0,1, where
+    // -1 means unordered or less
+    //  0 means equal
+    //  1 means greater.
+    if (VM_Version::has_LoadStoreConditional()) {
+      Register one       = Z_R0_scratch;
+      Register minus_one = Z_R1_scratch;
+      __ z_lghi(minus_one, -1);
+      __ z_lghi(one, 1);
+      __ z_lghi( $dst$$Register, 0);
+      __ z_locgr($dst$$Register, one,       Assembler::bcondHigh);
+      __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered);
+    } else {
+      Label done;
+      __ clear_reg($dst$$Register, true, false);
+      __ z_bre(done);
+      __ z_lhi($dst$$Register, 1);
+      __ z_brh(done);
+      __ z_lhi($dst$$Register, -1);
+      __ bind(done);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg cr) %{
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+  size(24);
+  format %{ "CmpD3    $dst,$src1,$src2" %}
+  ins_encode %{
+    // compare registers
+    __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister);
+    // Convert condition code into -1,0,1, where
+    // -1 means unordered or less
+    //  0 means equal
+    //  1 means greater.
+    if (VM_Version::has_LoadStoreConditional()) {
+      Register one       = Z_R0_scratch;
+      Register minus_one = Z_R1_scratch;
+      __ z_lghi(minus_one, -1);
+      __ z_lghi(one, 1);
+      __ z_lghi( $dst$$Register, 0);
+      __ z_locgr($dst$$Register, one,       Assembler::bcondHigh);
+      __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered);
+    } else {
+      Label done;
+      // indicate unused result
+      (void) __ clear_reg($dst$$Register, true, false);
+      __ z_bre(done);
+      __ z_lhi($dst$$Register, 1);
+      __ z_brh(done);
+      __ z_lhi($dst$$Register, -1);
+      __ bind(done);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------Branches---------------------------------------------------------
+// Jump
+
+// Direct Branch.
+instruct branch(label labl) %{
+  match(Goto);
+  effect(USE labl);
+  ins_cost(BRANCH_COST);
+  size(4);
+  format %{ "BRU     $labl" %}
+  ins_encode(z_enc_bru(labl));
+  ins_pipe(pipe_class_dummy);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  ins_short_branch(1);
+%}
+
+// Direct Branch.
+instruct branchFar(label labl) %{
+  match(Goto);
+  effect(USE labl);
+  ins_cost(BRANCH_COST);
+  size(6);
+  format %{ "BRUL   $labl" %}
+  ins_encode(z_enc_brul(labl));
+  ins_pipe(pipe_class_dummy);
+  // This is not a short variant of a branch, but the long variant.
+  ins_short_branch(0);
+%}
+
+// Conditional Near Branch
+instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
+  // Same match rule as `branchConFar'.
+  match(If cmp cr);
+  effect(USE lbl);
+  ins_cost(BRANCH_COST);
+  size(4);
+  format %{ "branch_con_short,$cmp   $cr, $lbl" %}
+  ins_encode(z_enc_branch_con_short(cmp, lbl));
+  ins_pipe(pipe_class_dummy);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  ins_short_branch(1);
+%}
+
+// This is for cases when the z/Architecture conditional branch instruction
+// does not reach far enough. So we emit a far branch here, which is
+// more expensive.
+//
+// Conditional Far Branch
+instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
+  // Same match rule as `branchCon'.
+  match(If cmp cr);
+  effect(USE cr, USE lbl);
+  // Make more expensive to prefer compare_and_branch over separate instructions.
+  ins_cost(2 * BRANCH_COST);
+  size(6);
+  format %{ "branch_con_far,$cmp   $cr, $lbl" %}
+  ins_encode(z_enc_branch_con_far(cmp, lbl));
+  ins_pipe(pipe_class_dummy);
+  // This is not a short variant of a branch, but the long variant..
+  ins_short_branch(0);
+%}
+
+instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
+  match(CountedLoopEnd cmp cr);
+  effect(USE labl);
+  ins_cost(BRANCH_COST);
+  size(4);
+  format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
+  ins_encode(z_enc_branch_con_short(cmp, labl));
+  ins_pipe(pipe_class_dummy);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  ins_short_branch(1);
+%}
+
+instruct branchLoopEndFar(cmpOp cmp, flagsReg cr, label labl) %{
+  match(CountedLoopEnd cmp cr);
+  effect(USE labl);
+  ins_cost(BRANCH_COST);
+  size(6);
+  format %{ "branch_con_far,$cmp   $labl\t # counted loop end" %}
+  ins_encode(z_enc_branch_con_far(cmp, labl));
+  ins_pipe(pipe_class_dummy);
+  // This is not a short variant of a branch, but the long variant.
+  ins_short_branch(0);
+%}
+
+//----------Compare and Branch (short distance)------------------------------
+
+// INT REG operands for loop counter processing.
+instruct testAndBranchLoopEnd_Reg(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(CountedLoopEnd boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "test_and_branch_loop_end,$boolnode  $src1,$src2,$labl\t # counted loop end SHORT" %}
+  opcode(CRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// INT REG operands.
+instruct cmpb_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CRJ,$boolnode  $src1,$src2,$labl\t # SHORT" %}
+  opcode(CRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// Unsigned INT REG operands
+instruct cmpbU_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpU src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLRJ,$boolnode  $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// LONG REG operands
+instruct cmpb_RegL(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpL src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CGRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+//  PTR REG operands
+
+// Separate rules for regular and narrow oops.  ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegPP(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLGRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+instruct cmpb_RegNN(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLGRJ_ZOPC);
+  ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// INT REG/IMM operands for loop counter processing
+instruct testAndBranchLoopEnd_Imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+  match(CountedLoopEnd boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "test_and_branch_loop_end,$boolnode  $src1,$src2,$labl\t # counted loop end SHORT" %}
+  opcode(CIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// INT REG/IMM operands
+instruct cmpb_RegI_imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CIJ,$boolnode  $src1,$src2,$labl\t # SHORT" %}
+  opcode(CIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// INT REG/IMM operands
+instruct cmpbU_RegI_imm(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpU src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// LONG REG/IMM operands
+instruct cmpb_RegL_imm(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpL src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CGIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// PTR REG-imm operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegP_immP(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLGIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+// Compare against zero only, do not mix N and P oops (encode/decode required).
+instruct cmpb_RegN_immP0(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLGIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+instruct cmpb_RegN_imm(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+  opcode(CLGIJ_ZOPC);
+  ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(1);
+%}
+
+
+//----------Compare and Branch (far distance)------------------------------
+
+// INT REG operands for loop counter processing
+instruct testAndBranchLoopEnd_RegFar(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(CountedLoopEnd boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "test_and_branch_loop_end,$boolnode  $src1,$src2,$labl\t # counted loop end FAR" %}
+  opcode(CR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// INT REG operands
+instruct cmpb_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CRJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// INT REG operands
+instruct cmpbU_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpU src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLRJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// LONG REG operands
+instruct cmpb_RegL_Far(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpL src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CGRJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CGR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// PTR REG operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegPP_Far(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGRJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLGR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+instruct cmpb_RegNN_Far(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGRJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLGR_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// INT REG/IMM operands for loop counter processing
+instruct testAndBranchLoopEnd_ImmFar(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+  match(CountedLoopEnd boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "test_and_branch_loop_end,$boolnode  $src1,$src2,$labl\t # counted loop end FAR" %}
+  opcode(CHI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// INT REG/IMM operands
+instruct cmpb_RegI_imm_Far(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpI src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CHI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// INT REG/IMM operands
+instruct cmpbU_RegI_imm_Far(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpU src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLFI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// LONG REG/IMM operands
+instruct cmpb_RegL_imm_Far(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpL src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CGIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CGHI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// PTR REG-imm operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegP_immP_Far(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP src1 src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLGFI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// Compare against zero only, do not mix N and P oops (encode/decode required).
+instruct cmpb_RegN_immP0_Far(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) src2));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLGFI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+instruct cmpb_RegN_immN_Far(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{
+  match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+  effect(USE labl, KILL cr);
+  predicate(VM_Version::has_CompareBranch());
+  ins_cost(BRANCH_COST+DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CLGIJ,$boolnode   $src1,$src2,$labl\t # FAR(substituted)" %}
+  opcode(CLGFI_ZOPC, BRCL_ZOPC);
+  ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+  ins_pipe(pipe_class_dummy);
+  ins_short_branch(0);
+%}
+
+// ============================================================================
+// Long Compare
+
+// Due to a shortcoming in the ADLC, it mixes up expressions like:
+// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
+// difference between 'Y' and '0L'. The tree-matches for the CmpI sections
+// are collapsed internally in the ADLC's dfa-gen code. The match for
+// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
+// foo match ends up with the wrong leaf. One fix is to not match both
+// reg-reg and reg-zero forms of long-compare. This is unfortunate because
+// both forms beat the trinary form of long-compare and both are very useful
+// on platforms which have few registers.
+
+// Manifest a CmpL3 result in an integer register. Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr) %{
+  match(Set dst (CmpL3 src1 src2));
+  effect(KILL cr);
+  ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+  size(24);
+  format %{ "CmpL3 $dst,$src1,$src2" %}
+  ins_encode %{
+    Label done;
+    // compare registers
+    __ z_cgr($src1$$Register, $src2$$Register);
+    // Convert condition code into -1,0,1, where
+    // -1 means less
+    //  0 means equal
+    //  1 means greater.
+    if (VM_Version::has_LoadStoreConditional()) {
+      Register one       = Z_R0_scratch;
+      Register minus_one = Z_R1_scratch;
+      __ z_lghi(minus_one, -1);
+      __ z_lghi(one, 1);
+      __ z_lghi( $dst$$Register, 0);
+      __ z_locgr($dst$$Register, one,       Assembler::bcondHigh);
+      __ z_locgr($dst$$Register, minus_one, Assembler::bcondLow);
+    } else {
+      __ clear_reg($dst$$Register, true, false);
+      __ z_bre(done);
+      __ z_lhi($dst$$Register, 1);
+      __ z_brh(done);
+      __ z_lhi($dst$$Register, -1);
+    }
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// Safepoint Instruction
+
+instruct safePoint() %{
+  match(SafePoint);
+  predicate(false);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "UNIMPLEMENTED Safepoint_ " %}
+  ins_encode(enc_unimplemented());
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct safePoint_poll(iRegP poll, flagsReg cr) %{
+  match(SafePoint poll);
+  effect(USE poll, KILL cr); // R0 is killed, too.
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "TM      #0[,$poll],#111\t # Safepoint: poll for GC" %}
+  ins_encode %{
+    // Mark the code position where the load from the safepoint
+    // polling page was emitted as relocInfo::poll_type.
+    __ relocate(relocInfo::poll_type);
+    __ load_from_polling_page($poll$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+
+// Call Instructions
+
+// Call Java Static Instruction
+instruct CallStaticJavaDirect_dynTOC(method meth) %{
+  match(CallStaticJava);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CALL,static dynTOC $meth; ==> " %}
+  ins_encode( z_enc_java_static_call(meth) );
+  ins_pipe(pipe_class_dummy);
+  ins_alignment(2);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect_dynTOC(method meth) %{
+  match(CallDynamicJava);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "CALL,dynamic dynTOC $meth; ==> " %}
+  ins_encode(z_enc_java_dynamic_call(meth));
+  ins_pipe(pipe_class_dummy);
+  ins_alignment(2);
+%}
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+  match(CallRuntime);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  ins_num_consts(1);
+  ins_alignment(2);
+  format %{ "CALL,runtime" %}
+  ins_encode( z_enc_java_to_runtime_call(meth) );
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Call runtime without safepoint - same as CallRuntime
+instruct CallLeafDirect(method meth) %{
+  match(CallLeaf);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  ins_num_consts(1);
+  ins_alignment(2);
+  format %{ "CALL,runtime leaf $meth" %}
+  ins_encode( z_enc_java_to_runtime_call(meth) );
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Call runtime without safepoint - same as CallLeaf
+instruct CallLeafNoFPDirect(method meth) %{
+  match(CallLeafNoFP);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  ins_num_consts(1);
+  format %{ "CALL,runtime leaf nofp $meth" %}
+  ins_encode( z_enc_java_to_runtime_call(meth) );
+  ins_pipe(pipe_class_dummy);
+  ins_alignment(2);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
+  match(TailCall jump_target method_oop);
+  ins_cost(CALL_COST);
+  size(2);
+  format %{ "Jmp     $jump_target\t# $method_oop holds method oop" %}
+  ins_encode %{ __ z_br($jump_target$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Return Instruction
+instruct Ret() %{
+  match(Return);
+  size(2);
+  format %{ "BR(Z_R14) // branch to link register" %}
+  ins_encode %{ __ z_br(Z_R14); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
+  match(TailJump jump_target ex_oop);
+  ins_cost(CALL_COST);
+  size(8);
+  format %{ "TailJump $jump_target" %}
+  ins_encode %{
+    __ z_lg(Z_ARG2/* issuing pc */, _z_abi(return_pc), Z_SP);
+    __ z_br($jump_target$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+instruct CreateException(rarg1RegP ex_oop) %{
+  match(Set ex_oop (CreateEx));
+  ins_cost(0);
+  size(0);
+  format %{ "# exception oop; no code emitted" %}
+  ins_encode(/*empty*/);
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException() %{
+  match(Rethrow);
+  ins_cost(CALL_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "Jmp    rethrow_stub" %}
+  ins_encode %{
+    cbuf.set_insts_mark();
+    __ load_const_optimized(Z_R1_scratch, (address)OptoRuntime::rethrow_stub());
+    __ z_br(Z_R1_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+  match(Halt);
+  ins_cost(CALL_COST);
+  size(2);
+  format %{ "ILLTRAP; ShouldNotReachHere" %}
+  ins_encode %{ __ z_illtrap(); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
+// array for an instance of the superklass. Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()). Return
+// not zero for a miss or zero for a hit. The encoding ALSO sets flags.
+instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc,
+                             rarg4RegP scratch1, rarg5RegP scratch2) %{
+  match(Set index (PartialSubtypeCheck sub super));
+  effect(KILL pcc, KILL scratch1, KILL scratch2);
+  ins_cost(10 * DEFAULT_COST);
+  size(12);
+  format %{ "  CALL   PartialSubtypeCheck\n" %}
+  ins_encode %{
+    AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
+    __ load_const_optimized(Z_ARG4, stub_address);
+    __ z_basr(Z_R14, Z_ARG4);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero,
+                                     rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{
+  match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero));
+  effect(KILL scratch1, KILL scratch2, KILL index);
+  ins_cost(10 * DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "CALL   PartialSubtypeCheck_vs_zero\n" %}
+  ins_encode %{
+    AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
+    __ load_const_optimized(Z_ARG4, stub_address);
+    __ z_basr(Z_R14, Z_ARG4);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{
+  match(Set pcc (FastLock oop box));
+  effect(TEMP tmp1, TEMP tmp2);
+  ins_cost(100);
+  // TODO: s390 port size(VARIABLE_SIZE); // Uses load_const_optimized.
+  format %{ "FASTLOCK  $oop, $box; KILL Z_ARG4, Z_ARG5" %}
+  ins_encode %{ __ compiler_fast_lock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
+                                             UseBiasedLocking && !UseOptoBiasInlining); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpFastUnlock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{
+  match(Set pcc (FastUnlock oop box));
+  effect(TEMP tmp1, TEMP tmp2);
+  ins_cost(100);
+  // TODO: s390 port size(FIXED_SIZE);  // emitted code depends on UseBiasedLocking being on/off.
+  format %{ "FASTUNLOCK  $oop, $box; KILL Z_ARG4, Z_ARG5" %}
+  ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
+                                               UseBiasedLocking && !UseOptoBiasInlining); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy, flagsReg cr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(KILL cr);
+  ins_cost(100);
+  // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to varying #instructions.
+  format %{ "ClearArrayConst $cnt,$base" %}
+  ins_encode %{ __ Clear_Array_Const($cnt$$constant, $base$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+  ins_cost(200);
+  // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to optimized constant loader.
+  format %{ "ClearArrayConstBig $cnt,$base" %}
+  ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+  ins_cost(300);
+  // TODO: s390 port size(FIXED_SIZE);  // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
+  format %{ "ClearArrayVar $cnt,$base" %}
+  ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// CompactStrings
+
+// String equals
+instruct string_equalsL(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(300);
+  format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
+  ins_encode %{
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
+                    $result$$Register, true /* byte */);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equalsU(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(300);
+  format %{ "String Equals char[] $str1,$str2,$cnt -> $result" %}
+  ins_encode %{
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equals_imm(iRegP str1, iRegP str2, uimmI8 cnt, iRegI result, flagsReg cr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(KILL cr); // R0 is killed, too.
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+  ins_cost(100);
+  format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
+  ins_encode %{
+    const int cnt_imm = $cnt$$constant;
+    if (cnt_imm) { __ z_clc(0, cnt_imm - 1, $str1$$Register, 0, $str2$$Register); }
+    __ z_lhi($result$$Register, 1);
+    if (cnt_imm) {
+      if (VM_Version::has_LoadStoreConditional()) {
+        __ z_lhi(Z_R0_scratch, 0);
+        __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual);
+      } else {
+        Label Lskip;
+        __ z_bre(Lskip);
+        __ clear_reg($result$$Register);
+        __ bind(Lskip);
+      }
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equalsC_imm(iRegP str1, iRegP str2, immI8 cnt, iRegI result, flagsReg cr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(KILL cr); // R0 is killed, too.
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(100);
+  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
+  ins_encode %{
+    const int cnt_imm = $cnt$$constant; // positive immI8 (7 bits used)
+    if (cnt_imm) { __ z_clc(0, (cnt_imm << 1) - 1, $str1$$Register, 0, $str2$$Register); }
+    __ z_lhi($result$$Register, 1);
+    if (cnt_imm) {
+      if (VM_Version::has_LoadStoreConditional()) {
+        __ z_lhi(Z_R0_scratch, 0);
+        __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual);
+      } else {
+        Label Lskip;
+        __ z_bre(Lskip);
+        __ clear_reg($result$$Register);
+        __ bind(Lskip);
+      }
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Array equals
+instruct array_equalsB(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result" %}
+  ins_encode %{
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    noreg, $oddReg$$Register, $evenReg$$Register,
+                    $result$$Register, true /* byte */);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct array_equalsC(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result" %}
+  ins_encode %{
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    noreg, $oddReg$$Register, $evenReg$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// String CompareTo
+instruct string_compareL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $oddReg$$Register, $evenReg$$Register,
+                      $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrCompNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(300);
+  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $oddReg$$Register, $evenReg$$Register,
+                      $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareLU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  ins_cost(300);
+  format %{ "String Compare byte[],char[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $oddReg$$Register, $evenReg$$Register,
+                      $result$$Register, StrIntrinsicNode::LU);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(300);
+  format %{ "String Compare char[],byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+  ins_encode %{
+    __ string_compare($str2$$Register, $str1$$Register,
+                      $cnt2$$Register, $cnt1$$Register,
+                      $oddReg$$Register, $evenReg$$Register,
+                      $result$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// String IndexOfChar
+instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  predicate(CompactStrings);
+  match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+  effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+  ins_cost(200);
+  format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+  ins_encode %{
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           $ch$$Register, 0 /* unused, ch is in register */,
+                           $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(200);
+  format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+  ins_encode %{
+    immPOper *needleOper = (immPOper *)$needle;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr;
+#ifdef VM_LITTLE_ENDIAN
+    Unimplemented();
+#else
+    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
+#endif
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           noreg, chr,
+                           $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_L(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(200);
+  format %{ "String IndexOf L [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+  ins_encode %{
+    immPOper *needleOper = (immPOper *)$needle;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           noreg, chr,
+                           $oddReg$$Register, $evenReg$$Register, true /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_UL(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(200);
+  format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+  ins_encode %{
+    immPOper *needleOper = (immPOper *)$needle;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           noreg, chr,
+                           $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// String IndexOf
+instruct indexOf_imm_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(250);
+  format %{ "String IndexOf U [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, noreg, $needlecntImm$$constant,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(250);
+  format %{ "String IndexOf L [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, noreg, $needlecntImm$$constant,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(250);
+  format %{ "String IndexOf UL [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, noreg, $needlecntImm$$constant,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+  ins_cost(300);
+  format %{ "String IndexOf U [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, $needlecnt$$Register, 0,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(300);
+  format %{ "String IndexOf L [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, $needlecnt$$Register, 0,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(300);
+  format %{ "String IndexOf UL [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+  ins_encode %{
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, $needlecnt$$Register, 0,
+                      $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// char[] to byte[] compression
+instruct string_compress(iRegP src, rarg5RegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP_DEF result, USE_KILL dst, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+  ins_cost(300);
+  format %{ "String Compress $src->$dst($len) -> $result" %}
+  ins_encode %{
+    __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
+                       $evenReg$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// byte[] to char[] inflation. trot implementation is shorter, but slower than the unrolled icm(h) loop.
+//instruct string_inflate_trot(Universe dummy, iRegP src, revenRegP dst, roddRegI len, iRegI tmp, flagsReg cr) %{
+//  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+//  effect(USE_KILL dst, USE_KILL len, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+//  predicate(VM_Version::has_ETF2Enhancements());
+//  ins_cost(300);
+//  format %{ "String Inflate (trot) $dst,$src($len)" %}
+//  ins_encode %{
+//    __ string_inflate_trot($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
+//  %}
+//  ins_pipe(pipe_class_dummy);
+//%}
+
+// byte[] to char[] inflation
+instruct string_inflate(Universe dummy, rarg5RegP src, iRegP dst, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+  ins_cost(300);
+  format %{ "String Inflate $src->$dst($len)" %}
+  ins_encode %{
+    __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $evenReg$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// StringCoding.java intrinsics
+instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+  match(Set result (HasNegatives ary1 len));
+  effect(TEMP_DEF result, USE_KILL ary1, TEMP oddReg, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+  ins_cost(300);
+  format %{ "has negatives byte[] $ary1($len) -> $result" %}
+  ins_encode %{
+    __ has_negatives($result$$Register, $ary1$$Register, $len$$Register,
+                     $oddReg$$Register, $evenReg$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(rarg5RegP src, iRegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, iRegI tmp2, flagsReg cr) %{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(TEMP_DEF result, USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, TEMP tmp2, KILL cr); // R0, R1 are killed, too.
+  ins_cost(300);
+  format %{ "Encode array $src->$dst($len) -> $result" %}
+  ins_encode %{
+    __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
+                       $evenReg$$Register, $tmp$$Register, $tmp2$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch (root_instr_name [preceeding_instruction]*);
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...]);
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace (instr_name([instruction_number.operand_name]*));
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser. An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch (incI_eReg movI);
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint (0.dst == 1.dst);
+//   // construct a replacement instruction that sets
+//   // the destination to (move's source register + one)
+//   peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole %{
+//   peepmatch (incI_eReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+//   peepmatch (decI_eReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+//   peepmatch (addI_eReg_imm movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+//   peepmatch (addP_eReg_imm movP);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaP_eReg_immI(0.dst 1.src 0.src));
+// %}
+
+
+//  This peephole rule does not work, probably because ADLC can't handle two effects:
+//  Effect 1 is defining 0.op1 and effect 2 is setting CC
+// condense a load from memory and subsequent test for zero
+// into a single, more efficient ICM instruction.
+// peephole %{
+//   peepmatch (compI_iReg_imm0 loadI);
+//   peepconstraint (1.dst == 0.op1);
+//   peepreplace (loadtest15_iReg_mem(0.op1 0.op1 1.mem));
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+peephole %{
+  peepmatch (loadI storeI);
+  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
+  peepreplace (storeI(1.mem 1.mem 1.src));
+%}
+
+peephole %{
+  peepmatch (loadL storeL);
+  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
+  peepreplace (storeL(1.mem 1.mem 1.src));
+%}
+
+peephole %{
+  peepmatch (loadP storeP);
+  peepconstraint (1.src == 0.dst, 1.dst == 0.mem);
+  peepreplace (storeP(1.dst 1.dst 1.src));
+%}
+
+//----------SUPERWORD RULES---------------------------------------------------
+
+//  Expand rules for special cases
+
+instruct expand_storeF(stackSlotF mem, regF src) %{
+  // No match rule, false predicate, for expand only.
+  effect(DEF mem, USE src);
+  predicate(false);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "STE      $src,$mem\t # replicate(float2stack)" %}
+  opcode(STE_ZOPC, STE_ZOPC);
+  ins_encode(z_form_rt_mem(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct expand_LoadLogical_I2L(iRegL dst, stackSlotF mem) %{
+  // No match rule, false predicate, for expand only.
+  effect(DEF dst, USE mem);
+  predicate(false);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LLGF     $dst,$mem\t # replicate(stack2reg(unsigned))" %}
+  opcode(LLGF_ZOPC, LLGF_ZOPC);
+  ins_encode(z_form_rt_mem(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes)
+instruct expand_Repl2I_reg(iRegL dst, iRegL src) %{
+  // Dummy match rule, false predicate, for expand only.
+  match(Set dst (ConvI2L src));
+  predicate(false);
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "REPLIC2F $dst,$src\t # replicate(pack2F)" %}
+  ins_encode %{
+    if ($dst$$Register == $src$$Register) {
+      __ z_sllg(Z_R0_scratch, $src$$Register, 64-32);
+      __ z_ogr($dst$$Register, Z_R0_scratch);
+    }  else {
+      __ z_sllg($dst$$Register, $src$$Register, 64-32);
+      __ z_ogr( $dst$$Register, $src$$Register);
+    }
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replication
+
+// Exploit rotate_then_insert, if available
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+  match(Set dst (ReplicateB src));
+  effect(KILL cr);
+  predicate((n->as_Vector()->length() == 8));
+  format %{ "REPLIC8B $dst,$src\t # pack8B" %}
+  ins_encode %{
+    if ($dst$$Register != $src$$Register) {
+      __ z_lgr($dst$$Register, $src$$Register);
+    }
+    __ rotate_then_insert($dst$$Register, $dst$$Register, 48, 55,  8, false);
+    __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false);
+    __ rotate_then_insert($dst$$Register, $dst$$Register,  0, 31, 32, false);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_imm(iRegL dst, immB_n0m1 src) %{
+  match(Set dst (ReplicateB src));
+  predicate(n->as_Vector()->length() == 8);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC8B $dst,$src\t # pack8B imm" %}
+  ins_encode %{
+    int64_t  Isrc8 = $src$$constant & 0x000000ff;
+    int64_t Isrc16 =  Isrc8 <<  8 |  Isrc8;
+    int64_t Isrc32 = Isrc16 << 16 | Isrc16;
+    assert(Isrc8 != 0x000000ff && Isrc8 != 0, "should be handled by other match rules.");
+
+    __ z_llilf($dst$$Register, Isrc32);
+    __ z_iihf($dst$$Register, Isrc32);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_imm0(iRegL dst, immI_0 src) %{
+  match(Set dst (ReplicateB src));
+  predicate(n->as_Vector()->length() == 8);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC8B $dst,$src\t # pack8B imm0" %}
+  ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_immm1(iRegL dst, immB_minus1 src) %{
+  match(Set dst (ReplicateB src));
+  predicate(n->as_Vector()->length() == 8);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC8B $dst,$src\t # pack8B immm1" %}
+  ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Exploit rotate_then_insert, if available
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+  match(Set dst (ReplicateS src));
+  effect(KILL cr);
+  predicate((n->as_Vector()->length() == 4));
+  format %{ "REPLIC4S $dst,$src\t # pack4S" %}
+  ins_encode %{
+    if ($dst$$Register != $src$$Register) {
+      __ z_lgr($dst$$Register, $src$$Register);
+    }
+    __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false);
+    __ rotate_then_insert($dst$$Register, $dst$$Register,  0, 31, 32, false);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_imm(iRegL dst, immS_n0m1 src) %{
+  match(Set dst (ReplicateS src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC4S $dst,$src\t # pack4S imm" %}
+  ins_encode %{
+    int64_t Isrc16 = $src$$constant & 0x0000ffff;
+    int64_t Isrc32 = Isrc16 << 16 | Isrc16;
+    assert(Isrc16 != 0x0000ffff && Isrc16 != 0, "Repl4S_imm: (src == " INT64_FORMAT
+           ") should be handled by other match rules.", $src$$constant);
+
+    __ z_llilf($dst$$Register, Isrc32);
+    __ z_iihf($dst$$Register, Isrc32);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_imm0(iRegL dst, immI_0 src) %{
+  match(Set dst (ReplicateS src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC4S $dst,$src\t # pack4S imm0" %}
+  ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_immm1(iRegL dst, immS_minus1 src) %{
+  match(Set dst (ReplicateS src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC4S $dst,$src\t # pack4S immm1" %}
+  ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Exploit rotate_then_insert, if available.
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+  match(Set dst (ReplicateI src));
+  effect(KILL cr);
+  predicate((n->as_Vector()->length() == 2));
+  format %{ "REPLIC2I $dst,$src\t # pack2I" %}
+  ins_encode %{
+    if ($dst$$Register != $src$$Register) {
+      __ z_lgr($dst$$Register, $src$$Register);
+    }
+    __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_imm(iRegL dst, immI_n0m1 src) %{
+  match(Set dst (ReplicateI src));
+  predicate(n->as_Vector()->length() == 2);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC2I $dst,$src\t # pack2I imm" %}
+  ins_encode %{
+    int64_t Isrc32 = $src$$constant;
+    assert(Isrc32 != -1 && Isrc32 != 0, "should be handled by other match rules.");
+
+    __ z_llilf($dst$$Register, Isrc32);
+    __ z_iihf($dst$$Register, Isrc32);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_imm0(iRegL dst, immI_0 src) %{
+  match(Set dst (ReplicateI src));
+  predicate(n->as_Vector()->length() == 2);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC2I $dst,$src\t # pack2I imm0" %}
+  ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_immm1(iRegL dst, immI_minus1 src) %{
+  match(Set dst (ReplicateI src));
+  predicate(n->as_Vector()->length() == 2);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC2I $dst,$src\t # pack2I immm1" %}
+  ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+instruct Repl2F_reg_indirect(iRegL dst, regF src, flagsReg cr) %{
+  match(Set dst (ReplicateF src));
+  effect(KILL cr);
+  predicate(!VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2);
+  format %{ "REPLIC2F $dst,$src\t # pack2F indirect" %}
+  expand %{
+    stackSlotF tmp;
+    iRegL      tmp2;
+    expand_storeF(tmp, src);
+    expand_LoadLogical_I2L(tmp2, tmp);
+    expand_Repl2I_reg(dst, tmp2);
+  %}
+%}
+
+// Replicate scalar float to packed float values in GREG (8 Bytes).
+instruct Repl2F_reg_direct(iRegL dst, regF src, flagsReg cr) %{
+  match(Set dst (ReplicateF src));
+  effect(KILL cr);
+  predicate(VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2);
+  format %{ "REPLIC2F $dst,$src\t # pack2F direct" %}
+  ins_encode %{
+    assert(VM_Version::has_FPSupportEnhancements(), "encoder should never be called on old H/W");
+    __ z_lgdr($dst$$Register, $src$$FloatRegister);
+
+    __ z_srlg(Z_R0_scratch, $dst$$Register, 32);  // Floats are left-justified in 64bit reg.
+    __ z_iilf($dst$$Register, 0);                 // Save a "result not ready" stall.
+    __ z_ogr($dst$$Register, Z_R0_scratch);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar float immediate to packed float values in GREG (8 Bytes).
+instruct Repl2F_imm(iRegL dst, immF src) %{
+  match(Set dst (ReplicateF src));
+  predicate(n->as_Vector()->length() == 2);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC2F $dst,$src\t # pack2F imm" %}
+  ins_encode %{
+    union {
+      int   Isrc32;
+      float Fsrc32;
+    };
+    Fsrc32 = $src$$constant;
+    __ z_llilf($dst$$Register, Isrc32);
+    __ z_iihf($dst$$Register, Isrc32);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar float immediate zeroes to packed float values in GREG (8 Bytes).
+// Do this only for 'real' zeroes, especially don't loose sign of negative zeroes.
+instruct Repl2F_imm0(iRegL dst, immFp0 src) %{
+  match(Set dst (ReplicateF src));
+  predicate(n->as_Vector()->length() == 2);
+  ins_should_rematerialize(true);
+  format %{ "REPLIC2F $dst,$src\t # pack2F imm0" %}
+  ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Store
+
+// Store Aligned Packed Byte register to memory (8 Bytes).
+instruct storeA8B(memory mem, iRegL src) %{
+  match(Set mem (StoreVector mem src));
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "STG     $src,$mem\t # ST(packed8B)" %}
+  opcode(STG_ZOPC, STG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(src, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Load
+
+instruct loadV8(iRegL dst, memory mem) %{
+  match(Set dst (LoadVector mem));
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  ins_cost(MEMORY_REF_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "LG      $dst,$mem\t # L(packed8B)" %}
+  opcode(LG_ZOPC, LG_ZOPC);
+  ins_encode(z_form_rt_mem_opt(dst, mem));
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------POPULATION COUNT RULES--------------------------------------------
+
+// Byte reverse
+
+instruct bytes_reverse_int(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesI src));
+  predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "LRVR    $dst,$src\t# byte reverse int" %}
+  opcode(LRVR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct bytes_reverse_long(iRegL dst, iRegL src) %{
+  match(Set dst (ReverseBytesL src));
+  predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
+  opcode(LRVGR_ZOPC);
+  ins_encode(z_rreform(dst, src));
+  ins_pipe(pipe_class_dummy);
+%}
+
+// Leading zeroes
+
+// The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
+// returns the bit position of the leftmost 1 in the 64bit source register.
+// As the bits are numbered from left to right (0..63), the returned
+// position index is equivalent to the number of leading zeroes.
+// If no 1-bit is found (i.e. the regsiter contains zero), the instruction
+// returns position 64. That's exactly what we need.
+
+instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
+  match(Set dst (CountLeadingZerosI src));
+  effect(KILL tmp, KILL cr);
+  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
+  ins_cost(3 * DEFAULT_COST);
+  size(14);
+  format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
+            "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
+            "FLOGR   $dst,$dst"
+         %}
+  ins_encode %{
+    // Performance experiments indicate that "FLOGR" is using some kind of
+    // iteration to find the leftmost "1" bit.
+    //
+    // The prior implementation zero-extended the 32-bit argument to 64 bit,
+    // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
+    // We could gain measurable speedup in micro benchmark:
+    //
+    //               leading   trailing
+    //   z10:   int     2.04       1.68
+    //         long     1.00       1.02
+    //   z196:  int     0.99       1.23
+    //         long     1.00       1.11
+    //
+    // By shifting the argument into the high-word instead of zero-extending it.
+    // The add'l branch on condition (taken for a zero argument, very infrequent,
+    // good prediction) is well compensated for by the savings.
+    //
+    // We leave the previous implementation in for some time in the future when
+    // the "FLOGR" instruction may become less iterative.
+
+    // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
+    __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
+    __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
+    __ z_flogr($dst$$Register, $dst$$Register);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
+  match(Set dst (CountLeadingZerosL src));
+  effect(KILL tmp, KILL cr);
+  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
+  ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+// trailing zeroes
+
+// We transform the trailing zeroes problem to a leading zeroes problem
+// such that can use the FLOGR instruction to our advantage.
+
+// With
+//   tmp1 = src - 1
+// we flip all trailing zeroes to ones and the rightmost one to zero.
+// All other bits remain unchanged.
+// With the complement
+//   tmp2 = ~src
+// we get all ones in the trailing zeroes positions. Thus,
+//   tmp3 = tmp1 & tmp2
+// yields ones in the trailing zeroes positions and zeroes elsewhere.
+// Now we can apply FLOGR and get 64-(trailing zeroes).
+instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
+  match(Set dst (CountTrailingZerosI src));
+  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
+  ins_cost(8 * DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
+  format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
+            "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
+            "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
+            "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
+            "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
+            "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
+            "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
+            "LCR     $dst,$dst  \t# res = -tmp4"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    // Rtmp only needed for for zero-argument shortcut. With kill effect in
+    // match rule Rsrc = roddReg would be possible, saving one register.
+    Register Rtmp = $tmp$$Register;
+
+    assert_different_registers(Rdst, Rsrc, Rtmp);
+
+    // Algorithm:
+    // - Isolate the least significant (rightmost) set bit using (src & (-src)).
+    //   All other bits in the result are zero.
+    // - Find the "leftmost one" bit position in the single-bit result from previous step.
+    // - 63-("leftmost one" bit position) gives the # of trailing zeros.
+
+    // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
+    Label done;
+    __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
+    __ z_lcgfr(Rtmp, Rsrc);
+    __ z_bre(done);                    // Taken very infrequently, good prediction, no BHT entry.
+
+    __ z_nr(Rtmp, Rsrc);               // (src) & (-src) leaves nothing but least significant bit.
+    __ z_ahi(Rtmp,  -1);               // Subtract one to fill all trailing zero positions with ones.
+                                       // Use 32bit op to prevent borrow propagation (case Rdst = 0x80000000)
+                                       // into upper half of reg. Not relevant with sllg below.
+    __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
+    __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
+                                       // Depends on CC set by ahi above.
+                                       // Taken very infrequently, good prediction, no BHT entry.
+                                       // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
+                                       // after SLLG Rdst == 0(64bit)).
+    __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
+    __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
+    __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
+  match(Set dst (CountTrailingZerosL src));
+  effect(TEMP_DEF dst, KILL tmp, KILL cr);
+  predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
+  ins_cost(8 * DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
+  format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
+            "NGR     $dst,$src  \t#"
+            "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
+            "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
+            "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
+            "LCR     $dst,$dst  \t#"
+         %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
+
+    // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
+    __ z_lcgr(Rdst, Rsrc);
+    __ z_ngr(Rdst, Rsrc);
+    __ add2reg(Rdst,   -1);
+    __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
+    __ add2reg(Rdst,  -64);
+    __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+
+// bit count
+
+instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+  match(Set dst (PopCountI src));
+  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+  predicate(UsePopCountInstruction && VM_Version::has_PopCount());
+  ins_cost(DEFAULT_COST);
+  size(24);
+  format %{ "POPCNT  $dst,$src\t# pop count int" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+
+    // Prefer compile-time assertion over run-time SIGILL.
+    assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
+    assert_different_registers(Rdst, Rtmp);
+
+    // Version 2: shows 10%(z196) improvement over original.
+    __ z_popcnt(Rdst, Rsrc);
+    __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
+    __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
+    __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
+    __ z_alr(Rdst, Rtmp);      //   into byte7
+    __ z_llgcr(Rdst, Rdst);    // zero-extend sum
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
+  match(Set dst (PopCountL src));
+  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+  predicate(UsePopCountInstruction && VM_Version::has_PopCount());
+  ins_cost(DEFAULT_COST);
+  // TODO: s390 port size(FIXED_SIZE);
+  format %{ "POPCNT  $dst,$src\t# pop count long" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+
+    // Prefer compile-time assertion over run-time SIGILL.
+    assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
+    assert_different_registers(Rdst, Rtmp);
+
+    // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
+    __ z_popcnt(Rdst, Rsrc);
+    __ z_ahhlr(Rdst, Rdst, Rdst);
+    __ z_sllg(Rtmp, Rdst, 16);
+    __ z_algr(Rdst, Rtmp);
+    __ z_sllg(Rtmp, Rdst,  8);
+    __ z_algr(Rdst, Rtmp);
+    __ z_srlg(Rdst, Rdst, 56);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// ============================================================================
+// TYPE PROFILING RULES
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,3552 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "registerSaver_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/ad.hpp"
+#include "opto/runtime.hpp"
+#endif
+
+#ifdef PRODUCT
+#define __ masm->
+#else
+#define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
+#endif
+
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
+
+#define RegisterSaver_LiveIntReg(regname) \
+  { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
+
+#define RegisterSaver_LiveFloatReg(regname) \
+  { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
+
+// Registers which are not saved/restored, but still they have got a frame slot.
+// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
+#define RegisterSaver_ExcludedIntReg(regname) \
+  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
+
+// Registers which are not saved/restored, but still they have got a frame slot.
+// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
+#define RegisterSaver_ExcludedFloatReg(regname) \
+  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
+  // Live registers which get spilled to the stack. Register positions
+  // in this array correspond directly to the stack layout.
+  //
+  // live float registers:
+  //
+  RegisterSaver_LiveFloatReg(Z_F0 ),
+  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+  RegisterSaver_LiveFloatReg(Z_F2 ),
+  RegisterSaver_LiveFloatReg(Z_F3 ),
+  RegisterSaver_LiveFloatReg(Z_F4 ),
+  RegisterSaver_LiveFloatReg(Z_F5 ),
+  RegisterSaver_LiveFloatReg(Z_F6 ),
+  RegisterSaver_LiveFloatReg(Z_F7 ),
+  RegisterSaver_LiveFloatReg(Z_F8 ),
+  RegisterSaver_LiveFloatReg(Z_F9 ),
+  RegisterSaver_LiveFloatReg(Z_F10),
+  RegisterSaver_LiveFloatReg(Z_F11),
+  RegisterSaver_LiveFloatReg(Z_F12),
+  RegisterSaver_LiveFloatReg(Z_F13),
+  RegisterSaver_LiveFloatReg(Z_F14),
+  RegisterSaver_LiveFloatReg(Z_F15),
+  //
+  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+  RegisterSaver_LiveIntReg(Z_R2 ),
+  RegisterSaver_LiveIntReg(Z_R3 ),
+  RegisterSaver_LiveIntReg(Z_R4 ),
+  RegisterSaver_LiveIntReg(Z_R5 ),
+  RegisterSaver_LiveIntReg(Z_R6 ),
+  RegisterSaver_LiveIntReg(Z_R7 ),
+  RegisterSaver_LiveIntReg(Z_R8 ),
+  RegisterSaver_LiveIntReg(Z_R9 ),
+  RegisterSaver_LiveIntReg(Z_R10),
+  RegisterSaver_LiveIntReg(Z_R11),
+  RegisterSaver_LiveIntReg(Z_R12),
+  RegisterSaver_LiveIntReg(Z_R13),
+  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
+  // Live registers which get spilled to the stack. Register positions
+  // in this array correspond directly to the stack layout.
+  //
+  // live float registers: All excluded, but still they get a stack slot to get same frame size.
+  //
+  RegisterSaver_ExcludedFloatReg(Z_F0 ),
+  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+  RegisterSaver_ExcludedFloatReg(Z_F2 ),
+  RegisterSaver_ExcludedFloatReg(Z_F3 ),
+  RegisterSaver_ExcludedFloatReg(Z_F4 ),
+  RegisterSaver_ExcludedFloatReg(Z_F5 ),
+  RegisterSaver_ExcludedFloatReg(Z_F6 ),
+  RegisterSaver_ExcludedFloatReg(Z_F7 ),
+  RegisterSaver_ExcludedFloatReg(Z_F8 ),
+  RegisterSaver_ExcludedFloatReg(Z_F9 ),
+  RegisterSaver_ExcludedFloatReg(Z_F10),
+  RegisterSaver_ExcludedFloatReg(Z_F11),
+  RegisterSaver_ExcludedFloatReg(Z_F12),
+  RegisterSaver_ExcludedFloatReg(Z_F13),
+  RegisterSaver_ExcludedFloatReg(Z_F14),
+  RegisterSaver_ExcludedFloatReg(Z_F15),
+  //
+  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+  RegisterSaver_LiveIntReg(Z_R2 ),
+  RegisterSaver_LiveIntReg(Z_R3 ),
+  RegisterSaver_LiveIntReg(Z_R4 ),
+  RegisterSaver_LiveIntReg(Z_R5 ),
+  RegisterSaver_LiveIntReg(Z_R6 ),
+  RegisterSaver_LiveIntReg(Z_R7 ),
+  RegisterSaver_LiveIntReg(Z_R8 ),
+  RegisterSaver_LiveIntReg(Z_R9 ),
+  RegisterSaver_LiveIntReg(Z_R10),
+  RegisterSaver_LiveIntReg(Z_R11),
+  RegisterSaver_LiveIntReg(Z_R12),
+  RegisterSaver_LiveIntReg(Z_R13),
+  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
+  // Live registers which get spilled to the stack. Register positions
+  // in this array correspond directly to the stack layout.
+  //
+  // live float registers:
+  //
+  RegisterSaver_LiveFloatReg(Z_F0 ),
+  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+  RegisterSaver_LiveFloatReg(Z_F2 ),
+  RegisterSaver_LiveFloatReg(Z_F3 ),
+  RegisterSaver_LiveFloatReg(Z_F4 ),
+  RegisterSaver_LiveFloatReg(Z_F5 ),
+  RegisterSaver_LiveFloatReg(Z_F6 ),
+  RegisterSaver_LiveFloatReg(Z_F7 ),
+  RegisterSaver_LiveFloatReg(Z_F8 ),
+  RegisterSaver_LiveFloatReg(Z_F9 ),
+  RegisterSaver_LiveFloatReg(Z_F10),
+  RegisterSaver_LiveFloatReg(Z_F11),
+  RegisterSaver_LiveFloatReg(Z_F12),
+  RegisterSaver_LiveFloatReg(Z_F13),
+  RegisterSaver_LiveFloatReg(Z_F14),
+  RegisterSaver_LiveFloatReg(Z_F15),
+  //
+  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+  RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
+  RegisterSaver_LiveIntReg(Z_R3 ),
+  RegisterSaver_LiveIntReg(Z_R4 ),
+  RegisterSaver_LiveIntReg(Z_R5 ),
+  RegisterSaver_LiveIntReg(Z_R6 ),
+  RegisterSaver_LiveIntReg(Z_R7 ),
+  RegisterSaver_LiveIntReg(Z_R8 ),
+  RegisterSaver_LiveIntReg(Z_R9 ),
+  RegisterSaver_LiveIntReg(Z_R10),
+  RegisterSaver_LiveIntReg(Z_R11),
+  RegisterSaver_LiveIntReg(Z_R12),
+  RegisterSaver_LiveIntReg(Z_R13),
+  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
+};
+
+// Live argument registers which get spilled to the stack.
+static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
+  RegisterSaver_LiveFloatReg(Z_FARG1),
+  RegisterSaver_LiveFloatReg(Z_FARG2),
+  RegisterSaver_LiveFloatReg(Z_FARG3),
+  RegisterSaver_LiveFloatReg(Z_FARG4),
+  RegisterSaver_LiveIntReg(Z_ARG1),
+  RegisterSaver_LiveIntReg(Z_ARG2),
+  RegisterSaver_LiveIntReg(Z_ARG3),
+  RegisterSaver_LiveIntReg(Z_ARG4),
+  RegisterSaver_LiveIntReg(Z_ARG5)
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
+  // Live registers which get spilled to the stack. Register positions
+  // in this array correspond directly to the stack layout.
+  //
+  // live float registers:
+  //
+  RegisterSaver_LiveFloatReg(Z_F0 ),
+  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+  RegisterSaver_LiveFloatReg(Z_F2 ),
+  RegisterSaver_LiveFloatReg(Z_F3 ),
+  RegisterSaver_LiveFloatReg(Z_F4 ),
+  RegisterSaver_LiveFloatReg(Z_F5 ),
+  RegisterSaver_LiveFloatReg(Z_F6 ),
+  RegisterSaver_LiveFloatReg(Z_F7 ),
+  // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
+  // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
+  //
+  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+  RegisterSaver_LiveIntReg(Z_R2 ),
+  RegisterSaver_LiveIntReg(Z_R3 ),
+  RegisterSaver_LiveIntReg(Z_R4 ),
+  RegisterSaver_LiveIntReg(Z_R5 ),
+  // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
+  // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
+  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
+};
+
+int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
+  int reg_space = -1;
+  switch (reg_set) {
+    case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
+    case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
+    case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
+    case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
+    case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
+    default: ShouldNotReachHere();
+  }
+  return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
+}
+
+
+int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
+  return live_reg_save_size(reg_set) + frame::z_abi_160_size;
+}
+
+
+// return_pc: Specify the register that should be stored as the return pc in the current frame.
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
+  // Record volatile registers as callee-save values in an OopMap so
+  // their save locations will be propagated to the caller frame's
+  // RegisterMap during StackFrameStream construction (needed for
+  // deoptimization; see compiledVFrame::create_stack_value).
+
+  // Calculate frame size.
+  const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
+  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
+  const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
+
+  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
+  OopMap* map = new OopMap(frame_size_in_slots, 0);
+
+  int regstosave_num = 0;
+  const RegisterSaver::LiveRegType* live_regs = NULL;
+
+  switch (reg_set) {
+    case all_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveRegs;
+      break;
+    case all_registers_except_r2:
+      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveRegsWithoutR2;
+      break;
+    case all_integer_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveIntRegs;
+      break;
+    case all_volatile_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveVolatileRegs;
+      break;
+    case arg_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveArgRegs;
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  // Save return pc in old frame.
+  __ save_return_pc(return_pc);
+
+  // Push a new frame (includes stack linkage).
+  __ push_frame(frame_size_in_bytes);
+
+  // Register save area in new frame starts above z_abi_160 area.
+  int offset = register_save_offset;
+
+  Register first = noreg;
+  Register last  = noreg;
+  int      first_offset = -1;
+  bool     float_spilled = false;
+
+  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+    int reg_num  = live_regs[i].reg_num;
+    int reg_type = live_regs[i].reg_type;
+
+    switch (reg_type) {
+      case RegisterSaver::int_reg: {
+        Register reg = as_Register(reg_num);
+        if (last != reg->predecessor()) {
+          if (first != noreg) {
+            __ z_stmg(first, last, first_offset, Z_SP);
+          }
+          first = reg;
+          first_offset = offset;
+          DEBUG_ONLY(float_spilled = false);
+        }
+        last = reg;
+        assert(last != Z_R0, "r0 would require special treatment");
+        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
+        break;
+      }
+
+      case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
+        continue; // Continue with next loop iteration.
+
+      case RegisterSaver::float_reg: {
+        FloatRegister freg = as_FloatRegister(reg_num);
+        __ z_std(freg, offset, Z_SP);
+        DEBUG_ONLY(float_spilled = true);
+        break;
+      }
+
+      default:
+        ShouldNotReachHere();
+        break;
+    }
+
+    // Second set_callee_saved is really a waste but we'll keep things as they were for now
+    map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
+    map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
+  }
+  assert(first != noreg, "Should spill at least one int reg.");
+  __ z_stmg(first, last, first_offset, Z_SP);
+
+  // And we're done.
+  return map;
+}
+
+
+// Generate the OopMap (again, regs where saved before).
+OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
+  // Calculate frame size.
+  const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
+  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
+  const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
+
+  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
+  OopMap* map = new OopMap(frame_size_in_slots, 0);
+
+  int regstosave_num = 0;
+  const RegisterSaver::LiveRegType* live_regs = NULL;
+
+  switch (reg_set) {
+    case all_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveRegs;
+      break;
+    case all_registers_except_r2:
+      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveRegsWithoutR2;
+      break;
+    case all_integer_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveIntRegs;
+      break;
+    case all_volatile_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveVolatileRegs;
+      break;
+    case arg_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveArgRegs;
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  // Register save area in new frame starts above z_abi_160 area.
+  int offset = register_save_offset;
+  for (int i = 0; i < regstosave_num; i++) {
+    if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
+      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
+      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
+    }
+    offset += reg_size;
+  }
+  return map;
+}
+
+
+// Pop the current frame and restore all the registers that we saved.
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
+  int offset;
+  const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
+
+  Register first = noreg;
+  Register last = noreg;
+  int      first_offset = -1;
+  bool     float_spilled = false;
+
+  int regstosave_num = 0;
+  const RegisterSaver::LiveRegType* live_regs = NULL;
+
+  switch (reg_set) {
+    case all_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveRegs;
+      break;
+    case all_registers_except_r2:
+      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveRegsWithoutR2;
+      break;
+    case all_integer_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+      live_regs      = RegisterSaver_LiveIntRegs;
+      break;
+    case all_volatile_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveVolatileRegs;
+      break;
+    case arg_registers:
+      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+      live_regs      = RegisterSaver_LiveArgRegs;
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  // Restore all registers (ints and floats).
+
+  // Register save area in new frame starts above z_abi_160 area.
+  offset = register_save_offset;
+
+  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+    int reg_num  = live_regs[i].reg_num;
+    int reg_type = live_regs[i].reg_type;
+
+    switch (reg_type) {
+      case RegisterSaver::excluded_reg:
+        continue; // Continue with next loop iteration.
+
+      case RegisterSaver::int_reg: {
+        Register reg = as_Register(reg_num);
+        if (last != reg->predecessor()) {
+          if (first != noreg) {
+            __ z_lmg(first, last, first_offset, Z_SP);
+          }
+          first = reg;
+          first_offset = offset;
+          DEBUG_ONLY(float_spilled = false);
+        }
+        last = reg;
+        assert(last != Z_R0, "r0 would require special treatment");
+        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
+        break;
+      }
+
+      case RegisterSaver::float_reg: {
+        FloatRegister freg = as_FloatRegister(reg_num);
+        __ z_ld(freg, offset, Z_SP);
+        DEBUG_ONLY(float_spilled = true);
+        break;
+      }
+
+      default:
+        ShouldNotReachHere();
+    }
+  }
+  assert(first != noreg, "Should spill at least one int reg.");
+  __ z_lmg(first, last, first_offset, Z_SP);
+
+  // Pop the frame.
+  __ pop_frame();
+
+  // Restore the flags.
+  __ restore_return_pc();
+}
+
+
+// Pop the current frame and restore the registers that might be holding a result.
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+  int i;
+  int offset;
+  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
+                                   sizeof(RegisterSaver::LiveRegType);
+  const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
+
+  // Restore all result registers (ints and floats).
+  offset = register_save_offset;
+  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+    int reg_num = RegisterSaver_LiveRegs[i].reg_num;
+    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
+    switch (reg_type) {
+      case RegisterSaver::excluded_reg:
+        continue; // Continue with next loop iteration.
+      case RegisterSaver::int_reg: {
+        if (as_Register(reg_num) == Z_RET) { // int result_reg
+          __ z_lg(as_Register(reg_num), offset, Z_SP);
+        }
+        break;
+      }
+      case RegisterSaver::float_reg: {
+        if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
+          __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
+        }
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+}
+
+#if INCLUDE_CDS
+size_t SharedRuntime::trampoline_size() {
+  return MacroAssembler::load_const_size() + 2;
+}
+
+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
+  // Think about using pc-relative branch.
+  __ load_const(Z_R1_scratch, destination);
+  __ z_br(Z_R1_scratch);
+}
+#endif
+
+// ---------------------------------------------------------------------------
+void SharedRuntime::save_native_result(MacroAssembler * masm,
+                                       BasicType ret_type,
+                                       int frame_slots) {
+  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
+
+  switch (ret_type) {
+    case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
+    case T_BYTE:
+    case T_CHAR:
+    case T_SHORT:
+    case T_INT:
+      __ reg2mem_opt(Z_RET, memaddr, false);
+      break;
+    case T_OBJECT:   // Save pointer types as long.
+    case T_ARRAY:
+    case T_ADDRESS:
+    case T_VOID:
+    case T_LONG:
+      __ reg2mem_opt(Z_RET, memaddr);
+      break;
+    case T_FLOAT:
+      __ freg2mem_opt(Z_FRET, memaddr, false);
+      break;
+    case T_DOUBLE:
+      __ freg2mem_opt(Z_FRET, memaddr);
+      break;
+  }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm,
+                                          BasicType       ret_type,
+                                          int             frame_slots) {
+  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
+
+  switch (ret_type) {
+    case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
+    case T_BYTE:
+    case T_CHAR:
+    case T_SHORT:
+    case T_INT:
+      __ mem2reg_opt(Z_RET, memaddr, false);
+      break;
+    case T_OBJECT:   // Restore pointer types as long.
+    case T_ARRAY:
+    case T_ADDRESS:
+    case T_VOID:
+    case T_LONG:
+      __ mem2reg_opt(Z_RET, memaddr);
+      break;
+    case T_FLOAT:
+      __ mem2freg_opt(Z_FRET, memaddr, false);
+      break;
+    case T_DOUBLE:
+      __ mem2freg_opt(Z_FRET, memaddr);
+      break;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go. Values in the VMRegPair regs array refer to 4-byte
+// quantities. Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
+// up to RegisterImpl::number_of_registers are the 64-bit integer registers.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words, which are
+// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
+// units regardless of build.
+
+// The Java calling convention is a "shifted" version of the C ABI.
+// By skipping the first C ABI register we can call non-static jni methods
+// with small numbers of arguments without having to shuffle the arguments
+// at all. Since we control the java ABI we ought to at least get some
+// advantage out of it.
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+  // c2c calling conventions for compiled-compiled calls.
+
+  // An int/float occupies 1 slot here.
+  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
+  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
+
+  const VMReg z_iarg_reg[5] = {
+    Z_R2->as_VMReg(),
+    Z_R3->as_VMReg(),
+    Z_R4->as_VMReg(),
+    Z_R5->as_VMReg(),
+    Z_R6->as_VMReg()
+  };
+  const VMReg z_farg_reg[4] = {
+    Z_F0->as_VMReg(),
+    Z_F2->as_VMReg(),
+    Z_F4->as_VMReg(),
+    Z_F6->as_VMReg()
+  };
+  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+  assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
+  assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
+
+  int i;
+  int stk = 0;
+  int ireg = 0;
+  int freg = 0;
+
+  for (int i = 0; i < total_args_passed; ++i) {
+    switch (sig_bt[i]) {
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+      case T_INT:
+        if (ireg < z_num_iarg_registers) {
+          // Put int/ptr in register.
+          regs[i].set1(z_iarg_reg[ireg]);
+          ++ireg;
+        } else {
+          // Put int/ptr on stack.
+          regs[i].set1(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_intfloat;
+        }
+        break;
+      case T_LONG:
+        assert(sig_bt[i+1] == T_VOID, "expecting half");
+        if (ireg < z_num_iarg_registers) {
+          // Put long in register.
+          regs[i].set2(z_iarg_reg[ireg]);
+          ++ireg;
+        } else {
+          // Put long on stack and align to 2 slots.
+          if (stk & 0x1) { ++stk; }
+          regs[i].set2(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_longdouble;
+        }
+        break;
+      case T_OBJECT:
+      case T_ARRAY:
+      case T_ADDRESS:
+        if (ireg < z_num_iarg_registers) {
+          // Put ptr in register.
+          regs[i].set2(z_iarg_reg[ireg]);
+          ++ireg;
+        } else {
+          // Put ptr on stack and align to 2 slots, because
+          // "64-bit pointers record oop-ishness on 2 aligned adjacent
+          // registers." (see OopFlow::build_oop_map).
+          if (stk & 0x1) { ++stk; }
+          regs[i].set2(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_longdouble;
+        }
+        break;
+      case T_FLOAT:
+        if (freg < z_num_farg_registers) {
+          // Put float in register.
+          regs[i].set1(z_farg_reg[freg]);
+          ++freg;
+        } else {
+          // Put float on stack.
+          regs[i].set1(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_intfloat;
+        }
+        break;
+      case T_DOUBLE:
+        assert(sig_bt[i+1] == T_VOID, "expecting half");
+        if (freg < z_num_farg_registers) {
+          // Put double in register.
+          regs[i].set2(z_farg_reg[freg]);
+          ++freg;
+        } else {
+          // Put double on stack and align to 2 slots.
+          if (stk & 0x1) { ++stk; }
+          regs[i].set2(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_longdouble;
+        }
+        break;
+      case T_VOID:
+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+        // Do not count halves.
+        regs[i].set_bad();
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+  return round_to(stk, 2);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                        VMRegPair *regs,
+                                        VMRegPair *regs2,
+                                        int total_args_passed) {
+  assert(regs2 == NULL, "second VMRegPair array not used on this platform");
+
+  // Calling conventions for C runtime calls and calls to JNI native methods.
+  const VMReg z_iarg_reg[5] = {
+    Z_R2->as_VMReg(),
+    Z_R3->as_VMReg(),
+    Z_R4->as_VMReg(),
+    Z_R5->as_VMReg(),
+    Z_R6->as_VMReg()
+  };
+  const VMReg z_farg_reg[4] = {
+    Z_F0->as_VMReg(),
+    Z_F2->as_VMReg(),
+    Z_F4->as_VMReg(),
+    Z_F6->as_VMReg()
+  };
+  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+  // Check calling conventions consistency.
+  assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
+  assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
+
+  // Avoid passing C arguments in the wrong stack slots.
+
+  // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
+  // 2 such slots, like 64 bit values do.
+  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
+  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
+
+  int i;
+  // Leave room for C-compatible ABI
+  int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
+  int freg = 0;
+  int ireg = 0;
+
+  // We put the first 5 arguments into registers and the rest on the
+  // stack. Float arguments are already in their argument registers
+  // due to c2c calling conventions (see calling_convention).
+  for (int i = 0; i < total_args_passed; ++i) {
+    switch (sig_bt[i]) {
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+      case T_INT:
+        // Fall through, handle as long.
+      case T_LONG:
+      case T_OBJECT:
+      case T_ARRAY:
+      case T_ADDRESS:
+      case T_METADATA:
+        // Oops are already boxed if required (JNI).
+        if (ireg < z_num_iarg_registers) {
+          regs[i].set2(z_iarg_reg[ireg]);
+          ++ireg;
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_longdouble;
+        }
+        break;
+      case T_FLOAT:
+        if (freg < z_num_farg_registers) {
+          regs[i].set1(z_farg_reg[freg]);
+          ++freg;
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk+1));
+          stk +=  inc_stk_for_intfloat;
+        }
+        break;
+      case T_DOUBLE:
+        assert(sig_bt[i+1] == T_VOID, "expecting half");
+        if (freg < z_num_farg_registers) {
+          regs[i].set2(z_farg_reg[freg]);
+          ++freg;
+        } else {
+          // Put double on stack.
+          regs[i].set2(VMRegImpl::stack2reg(stk));
+          stk += inc_stk_for_longdouble;
+        }
+        break;
+      case T_VOID:
+        // Do not count halves.
+        regs[i].set_bad();
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+  return round_to(stk, 2);
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+//  Argument shufflers
+//
+////////////////////////////////////////////////////////////////////////
+
+//----------------------------------------------------------------------
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+//----------------------------------------------------------------------
+static int reg2slot(VMReg r) {
+  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+}
+
+static int reg2offset(VMReg r) {
+  return reg2slot(r) * VMRegImpl::stack_slot_size;
+}
+
+static void verify_oop_args(MacroAssembler *masm,
+                            int total_args_passed,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs) {
+  if (!VerifyOops) { return; }
+
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
+      VMReg r = regs[i].first();
+      assert(r->is_valid(), "bad oop arg");
+
+      if (r->is_stack()) {
+        __ z_lg(Z_R0_scratch,
+                Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+        __ verify_oop(Z_R0_scratch);
+      } else {
+        __ verify_oop(r->as_Register());
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler *masm,
+                                 int total_args_passed,
+                                 vmIntrinsics::ID special_dispatch,
+                                 const BasicType *sig_bt,
+                                 const VMRegPair *regs) {
+  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+  // Now write the args into the outgoing interpreter space.
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+
+  if (ref_kind != 0) {
+    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_reg = Z_R9;                       // Known to be free at this point.
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else {
+    guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
+    has_receiver = true;
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+
+    VMReg r = regs[member_arg_pos].first();
+    assert(r->is_valid(), "bad member arg");
+
+    if (r->is_stack()) {
+      __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
+    } else {
+      // No data motion is needed.
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(total_args_passed > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+
+    if (r->is_stack()) {
+      // Porting note: This assumes that compiled calling conventions always
+      // pass the receiver oop in a register. If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      assert(false, "receiver always in a register");
+      receiver_reg = Z_R13;  // Known to be free at this point.
+      __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
+    } else {
+      // No data motion is needed.
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+                                                 receiver_reg, member_reg,
+                                                 /*for_compiler_entry:*/ true);
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+//  Argument shufflers
+//
+////////////////////////////////////////////////////////////////////////
+
+// Is the size of a vector size (in bytes) bigger than a size saved by default?
+// 8 bytes registers are saved by default on z/Architecture.
+bool SharedRuntime::is_wide_vector(int size) {
+  // Note, MaxVectorSize == 8 on this platform.
+  assert(size <= 8, "%d bytes vectors are not supported", size);
+  return size > 8;
+}
+
+//----------------------------------------------------------------------
+// An oop arg. Must pass a handle not the oop itself
+//----------------------------------------------------------------------
+static void object_move(MacroAssembler *masm,
+                        OopMap *map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int *receiver_offset) {
+  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+
+  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
+
+  // Must pass a handle. First figure out the location we use as a handle.
+
+  if (src.first()->is_stack()) {
+    // Oop is already on the stack, put handle on stack or in register
+    // If handle will be on the stack, use temp reg to calculate it.
+    Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
+    Label    skip;
+    int      slot_in_older_frame = reg2slot(src.first());
+
+    guarantee(!is_receiver, "expecting receiver in register");
+    map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
+
+    __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
+    __ load_and_test_long(Z_R0, Address(rHandle));
+    __ z_brne(skip);
+    // Use a NULL handle if oop is NULL.
+    __ clear_reg(rHandle, true, false);
+    __ bind(skip);
+
+    // Copy handle to the right place (register or stack).
+    if (dst.first()->is_stack()) {
+      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
+    } // else
+      // nothing to do. rHandle uses the correct register
+  } else {
+    // Oop is passed in an input register. We must flush it to the stack.
+    const Register rOop = src.first()->as_Register();
+    const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
+    int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
+    int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
+    NearLabel skip;
+
+    if (is_receiver) {
+      *receiver_offset = oop_slot_offset;
+    }
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+
+    // Flush Oop to stack, calculate handle.
+    __ z_stg(rOop, oop_slot_offset, Z_SP);
+    __ add2reg(rHandle, oop_slot_offset, Z_SP);
+
+    // If Oop == NULL, use a NULL handle.
+    __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
+    __ clear_reg(rHandle, true, false);
+    __ bind(skip);
+
+    // Copy handle to the right place (register or stack).
+    if (dst.first()->is_stack()) {
+      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
+    } // else
+      // nothing to do here, since rHandle = dst.first()->as_Register in this case.
+  }
+}
+
+//----------------------------------------------------------------------
+// A float arg. May have to do float reg to int reg conversion
+//----------------------------------------------------------------------
+static void float_move(MacroAssembler *masm,
+                       VMRegPair src,
+                       VMRegPair dst,
+                       int framesize_in_slots,
+                       int workspace_slot_offset) {
+  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+  int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
+
+  // We do not accept an argument in a VMRegPair to be spread over two slots,
+  // no matter what physical location (reg or stack) the slots may have.
+  // We just check for the unaccepted slot to be invalid.
+  assert(!src.second()->is_valid(), "float in arg spread over two slots");
+  assert(!dst.second()->is_valid(), "float out arg spread over two slots");
+
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack -> stack. The easiest of the bunch.
+      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
+    } else {
+      // stack to reg
+      Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+      if (dst.first()->is_Register()) {
+        __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
+      } else {
+        __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
+      }
+    }
+  } else if (src.first()->is_Register()) {
+    if (dst.first()->is_stack()) {
+      // gpr -> stack
+      __ reg2mem_opt(src.first()->as_Register(),
+                     Address(Z_SP, reg2offset(dst.first()), false ));
+    } else {
+      if (dst.first()->is_Register()) {
+        // gpr -> gpr
+        __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
+                              src.first()->as_Register(), T_INT);
+      } else {
+        if (VM_Version::has_FPSupportEnhancements()) {
+          // gpr -> fpr. Exploit z10 capability of direct transfer.
+          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
+        } else {
+          // gpr -> fpr. Use work space on stack to transfer data.
+          Address   stackaddr(Z_SP, workspace_offset);
+
+          __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
+          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
+        }
+      }
+    }
+  } else {
+    if (dst.first()->is_stack()) {
+      // fpr -> stack
+      __ freg2mem_opt(src.first()->as_FloatRegister(),
+                      Address(Z_SP, reg2offset(dst.first())), false);
+    } else {
+      if (dst.first()->is_Register()) {
+        if (VM_Version::has_FPSupportEnhancements()) {
+          // fpr -> gpr.
+          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
+        } else {
+          // fpr -> gpr. Use work space on stack to transfer data.
+          Address   stackaddr(Z_SP, workspace_offset);
+
+          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
+          __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
+        }
+      } else {
+        // fpr -> fpr
+        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
+                               src.first()->as_FloatRegister(), T_FLOAT);
+      }
+    }
+  }
+}
+
+//----------------------------------------------------------------------
+// A double arg. May have to do double reg to long reg conversion
+//----------------------------------------------------------------------
+static void double_move(MacroAssembler *masm,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        int framesize_in_slots,
+                        int workspace_slot_offset) {
+  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+  int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
+
+  // Since src is always a java calling convention we know that the
+  // src pair is always either all registers or all stack (and aligned?)
+
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack -> stack. The easiest of the bunch.
+      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
+    } else {
+      // stack to reg
+      Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+
+      if (dst.first()->is_Register()) {
+        __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
+      } else {
+        __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
+      }
+    }
+  } else if (src.first()->is_Register()) {
+    if (dst.first()->is_stack()) {
+      // gpr -> stack
+      __ reg2mem_opt(src.first()->as_Register(),
+                     Address(Z_SP, reg2offset(dst.first())));
+    } else {
+      if (dst.first()->is_Register()) {
+        // gpr -> gpr
+        __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
+                              src.first()->as_Register(), T_LONG);
+      } else {
+        if (VM_Version::has_FPSupportEnhancements()) {
+          // gpr -> fpr. Exploit z10 capability of direct transfer.
+          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
+        } else {
+          // gpr -> fpr. Use work space on stack to transfer data.
+          Address stackaddr(Z_SP, workspace_offset);
+          __ reg2mem_opt(src.first()->as_Register(), stackaddr);
+          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
+        }
+      }
+    }
+  } else {
+    if (dst.first()->is_stack()) {
+      // fpr -> stack
+      __ freg2mem_opt(src.first()->as_FloatRegister(),
+                      Address(Z_SP, reg2offset(dst.first())));
+    } else {
+      if (dst.first()->is_Register()) {
+        if (VM_Version::has_FPSupportEnhancements()) {
+          // fpr -> gpr. Exploit z10 capability of direct transfer.
+          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
+        } else {
+          // fpr -> gpr. Use work space on stack to transfer data.
+          Address stackaddr(Z_SP, workspace_offset);
+
+          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
+          __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
+        }
+      } else {
+        // fpr -> fpr
+        // In theory these overlap but the ordering is such that this is likely a nop.
+        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
+                               src.first()->as_FloatRegister(), T_DOUBLE);
+      }
+    }
+  }
+}
+
+//----------------------------------------------------------------------
+// A long arg.
+//----------------------------------------------------------------------
+static void long_move(MacroAssembler *masm,
+                      VMRegPair src,
+                      VMRegPair dst,
+                      int framesize_in_slots) {
+  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack -> stack. The easiest of the bunch.
+      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
+    } else {
+      // stack to reg
+      assert(dst.first()->is_Register(), "long dst value must be in GPR");
+      __ mem2reg_opt(dst.first()->as_Register(),
+                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
+    }
+  } else {
+    // reg to reg
+    assert(src.first()->is_Register(), "long src value must be in GPR");
+    if (dst.first()->is_stack()) {
+      // reg -> stack
+      __ reg2mem_opt(src.first()->as_Register(),
+                     Address(Z_SP, reg2offset(dst.first())));
+    } else {
+      // reg -> reg
+      assert(dst.first()->is_Register(), "long dst value must be in GPR");
+      __ move_reg_if_needed(dst.first()->as_Register(),
+                            T_LONG, src.first()->as_Register(), T_LONG);
+    }
+  }
+}
+
+
+//----------------------------------------------------------------------
+// A int-like arg.
+//----------------------------------------------------------------------
+// On z/Architecture we will store integer like items to the stack as 64 bit
+// items, according to the z/Architecture ABI, even though Java would only store
+// 32 bits for a parameter.
+// We do sign extension for all base types. That is ok since the only
+// unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
+// Sign extension 32->64 bit will thus not affect the value.
+//----------------------------------------------------------------------
+static void move32_64(MacroAssembler *masm,
+                      VMRegPair src,
+                      VMRegPair dst,
+                      int framesize_in_slots) {
+  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+
+  if (src.first()->is_stack()) {
+    Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+    if (dst.first()->is_stack()) {
+      // stack -> stack. MVC not posible due to sign extension.
+      Address firstaddr(Z_SP, reg2offset(dst.first()));
+      __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
+      __ reg2mem_opt(Z_R0_scratch, firstaddr);
+    } else {
+      // stack -> reg, sign extended
+      __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
+    }
+  } else {
+    if (dst.first()->is_stack()) {
+      // reg -> stack, sign extended
+      Address firstaddr(Z_SP, reg2offset(dst.first()));
+      __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
+      __ reg2mem_opt(src.first()->as_Register(), firstaddr);
+    } else {
+      // reg -> reg, sign extended
+      __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+static void save_or_restore_arguments(MacroAssembler *masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap *map,
+                                      VMRegPair *in_regs,
+                                      BasicType *in_sig_bt) {
+
+  // If map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int slot = arg_save_area;
+  // Handle double words first.
+  for (int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int offset = slot * VMRegImpl::stack_slot_size;
+      slot += VMRegImpl::slots_per_word;
+      assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
+      const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
+      Address   stackaddr(Z_SP, offset);
+      if (map != NULL) {
+        __ freg2mem_opt(freg, stackaddr);
+      } else {
+        __ mem2freg_opt(freg, stackaddr);
+      }
+    } else if (in_regs[i].first()->is_Register() &&
+               (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
+      int offset = slot * VMRegImpl::stack_slot_size;
+      const Register   reg = in_regs[i].first()->as_Register();
+      if (map != NULL) {
+        __ z_stg(reg, offset, Z_SP);
+        if (in_sig_bt[i] == T_ARRAY) {
+          map->set_oop(VMRegImpl::stack2reg(slot));
+        }
+      } else {
+        __ z_lg(reg, offset, Z_SP);
+        slot += VMRegImpl::slots_per_word;
+        assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
+      }
+    }
+  }
+
+  // Save or restore single word registers.
+  for (int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_FloatRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int offset = slot * VMRegImpl::stack_slot_size;
+        slot++;
+        assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
+        const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
+        Address   stackaddr(Z_SP, offset);
+        if (map != NULL) {
+          __ freg2mem_opt(freg, stackaddr, false);
+        } else {
+          __ mem2freg_opt(freg, stackaddr, false);
+        }
+      }
+    } else if (in_regs[i].first()->is_stack() &&
+               in_sig_bt[i] == T_ARRAY && map != NULL) {
+      int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+      map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+    }
+  }
+}
+
+// Check GCLocker::needs_gc and enter the runtime if it's true. This
+// keeps a new JNI critical region from starting until a GC has been
+// forced. Save down any oops in registers and describe them in an OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
+                                                const int stack_slots,
+                                                const int total_in_args,
+                                                const int arg_save_area,
+                                                OopMapSet *oop_maps,
+                                                VMRegPair *in_regs,
+                                                BasicType *in_sig_bt) {
+  __ block_comment("check GCLocker::needs_gc");
+  Label cont;
+
+  // Check GCLocker::_needs_gc flag.
+  __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
+  __ z_cli(0, Z_R1_scratch, 0);
+  __ z_bre(cont);
+
+  // Save down any values that are live in registers and call into the
+  // runtime to halt for a GC.
+  OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+  address the_pc = __ pc();
+  __ set_last_Java_frame(Z_SP, noreg);
+
+  __ block_comment("block_for_jni_critical");
+  __ z_lgr(Z_ARG1, Z_thread);
+
+  address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
+  __ call_c(entry_point);
+  oop_maps->add_gc_map(__ offset(), map);
+
+  __ reset_last_Java_frame();
+
+  // Reload all the register arguments.
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+
+    // Destroy argument registers.
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        // Don't set CC.
+        __ clear_reg(in_regs[i].first()->as_Register(), true, false);
+      } else {
+        if (in_regs[i].first()->is_FloatRegister()) {
+          FloatRegister fr = in_regs[i].first()->as_FloatRegister();
+          __ z_lcdbr(fr, fr);
+        }
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+}
+
+static void move_ptr(MacroAssembler *masm,
+                     VMRegPair src,
+                     VMRegPair dst,
+                     int framesize_in_slots) {
+  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
+      __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
+    } else {
+      // stack to reg
+      __ mem2reg_opt(dst.first()->as_Register(),
+                     Address(Z_SP, reg2offset(src.first()) + frame_offset));
+    }
+  } else {
+    if (dst.first()->is_stack()) {
+      // reg to stack
+    __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
+    } else {
+    __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler *masm,
+                                   VMRegPair reg,
+                                   BasicType in_elem_type,
+                                   VMRegPair body_arg,
+                                   VMRegPair length_arg,
+                                   int framesize_in_slots) {
+  Register tmp_reg = Z_tmp_2;
+  Register tmp2_reg = Z_tmp_1;
+
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair.
+  NearLabel set_out_args;
+  VMRegPair tmp, tmp2;
+
+  tmp.set_ptr(tmp_reg->as_VMReg());
+  tmp2.set_ptr(tmp2_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack.
+    move_ptr(masm, reg, tmp, framesize_in_slots);
+    reg = tmp;
+  }
+
+  const Register first = reg.first()->as_Register();
+
+  // Don't set CC, indicate unused result.
+  (void) __ clear_reg(tmp2_reg, true, false);
+  if (tmp_reg != first) {
+    __ clear_reg(tmp_reg, true, false);  // Don't set CC.
+  }
+  __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
+  __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
+  __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
+
+  __ bind(set_out_args);
+  move_ptr(masm, tmp, body_arg, framesize_in_slots);
+  move32_64(masm, tmp2, length_arg, framesize_in_slots);
+}
+
+//----------------------------------------------------------------------
+// Wrap a JNI call.
+//----------------------------------------------------------------------
+#undef USE_RESIZE_FRAME
+nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
+                                                const methodHandle& method,
+                                                int compile_id,
+                                                BasicType *in_sig_bt,
+                                                VMRegPair *in_regs,
+                                                BasicType ret_type) {
+#ifdef COMPILER2
+  int total_in_args = method->size_of_parameters();
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t) __ pc();
+    int vep_offset = ((intptr_t) __ pc()) - start;
+
+    gen_special_dispatch(masm, total_in_args,
+                         method->intrinsic_id(), in_sig_bt, in_regs);
+
+    int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
+
+    __ flush();
+
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
+
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet *) NULL);
+  }
+
+
+  ///////////////////////////////////////////////////////////////////////
+  //
+  //  Precalculations before generating any code
+  //
+  ///////////////////////////////////////////////////////////////////////
+
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
+  //---------------------------------------------------------------------
+  // We have received a description of where all the java args are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method).
+  //
+  // The first hidden argument arg[0] is a pointer to the JNI environment.
+  // It is generated for every call.
+  // The second argument arg[1] to the JNI call, which is hidden for static
+  // methods, is the boxed lock object. For static calls, the lock object
+  // is the static method itself. The oop is constructed here. for instance
+  // calls, the lock is performed on the object itself, the pointer of
+  // which is passed as the first visible argument.
+  //---------------------------------------------------------------------
+
+  // Additionally, on z/Architecture we must convert integers
+  // to longs in the C signature. We do this in advance in order to have
+  // no trouble with indexes into the bt-arrays.
+  // So convert the signature and registers now, and adjust the total number
+  // of in-arguments accordingly.
+  bool method_is_static = method->is_static();
+  int  total_c_args     = total_in_args;
+
+  if (!is_critical_native) {
+    int n_hidden_args = method_is_static ? 2 : 1;
+    total_c_args += n_hidden_args;
+  } else {
+    // No JNIEnv*, no this*, but unpacked arrays (base+length).
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args ++;
+      }
+    }
+  }
+
+  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
+
+  // Create the signature for the C call:
+  //   1) add the JNIEnv*
+  //   2) add the class if the method is static
+  //   3) copy the rest of the incoming signature (shifted by the number of
+  //      hidden arguments)
+
+  int argc = 0;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args; i++) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    int o = 0;
+    for (int i = 0; i < total_in_args; i++, o++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as tuples (int, elem*).
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[o]  = T_BYTE; break;
+            case 'C': in_elem_bt[o]  = T_CHAR; break;
+            case 'D': in_elem_bt[o]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[o]  = T_FLOAT; break;
+            case 'I': in_elem_bt[o]  = T_INT; break;
+            case 'J': in_elem_bt[o]  = T_LONG; break;
+            case 'S': in_elem_bt[o]  = T_SHORT; break;
+            case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        in_elem_bt[o] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
+    assert(total_in_args == o, "must match");
+
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as tuples (int, elem*).
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+      }
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but providing space
+  // for storing the first five register arguments).
+  // It's weird, see int_stk_helper.
+  ///////////////////////////////////////////////////////////////////////
+
+  //---------------------------------------------------------------------
+  // Compute framesize for the wrapper.
+  //
+  // - We need to handlize all oops passed in registers.
+  // - We must create space for them here that is disjoint from the save area.
+  // - We always just allocate 5 words for storing down these object.
+  //   This allows us to simply record the base and use the Ireg number to
+  //   decide which slot to use.
+  // - Note that the reg number used to index the stack slot is the inbound
+  //   number, not the outbound number.
+  // - We must shuffle args to match the native convention,
+  //   and to include var-args space.
+  //---------------------------------------------------------------------
+
+  //---------------------------------------------------------------------
+  // Calculate the total number of stack slots we will need:
+  // - 1) abi requirements
+  // - 2) outgoing args
+  // - 3) space for inbound oop handle area
+  // - 4) space for handlizing a klass if static method
+  // - 5) space for a lock if synchronized method
+  // - 6) workspace (save rtn value, int<->float reg moves, ...)
+  // - 7) filler slots for alignment
+  //---------------------------------------------------------------------
+  // Here is how the space we have allocated will look like.
+  // Since we use resize_frame, we do not create a new stack frame,
+  // but just extend the one we got with our own data area.
+  //
+  // If an offset or pointer name points to a separator line, it is
+  // assumed that addressing with offset 0 selects storage starting
+  // at the first byte above the separator line.
+  //
+  //
+  //     ...                   ...
+  //      | caller's frame      |
+  // FP-> |---------------------|
+  //      | filler slots, if any|
+  //     7| #slots == mult of 2 |
+  //      |---------------------|
+  //      | work space          |
+  //     6| 2 slots = 8 bytes   |
+  //      |---------------------|
+  //     5| lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //     4| klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //     3| oopHandle area      |
+  //      | (save area for      |
+  //      |  critical natives)  |
+  //      |                     |
+  //      |                     |
+  //      |---------------------| <- oop_handle_offset
+  //     2| outbound memory     |
+  //     ...                   ...
+  //      | based arguments     |
+  //      |---------------------|
+  //      | vararg              |
+  //     ...                   ...
+  //      | area                |
+  //      |---------------------| <- out_arg_slot_offset
+  //     1| out_preserved_slots |
+  //     ...                   ...
+  //      | (z_abi spec)        |
+  // SP-> |---------------------| <- FP_slot_offset (back chain)
+  //     ...                   ...
+  //
+  //---------------------------------------------------------------------
+
+  // *_slot_offset indicates offset from SP in #stack slots
+  // *_offset      indicates offset from SP in #bytes
+
+  int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
+                    SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
+
+  // Now the space for the inbound oop handle area.
+  int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:
+          // Fall through.
+          case T_ARRAY:
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else {
+        if (in_regs[i].first()->is_FloatRegister()) {
+          switch (in_sig_bt[i]) {
+            case T_FLOAT:  single_slots++; break;
+            case T_DOUBLE: double_slots++; break;
+            default:  ShouldNotReachHere();
+          }
+        }
+      }
+    }  // for
+    total_save_slots = double_slots * 2 + round_to(single_slots, 2); // Round to even.
+  }
+
+  int oop_handle_slot_offset = stack_slots;
+  stack_slots += total_save_slots;                                        // 3)
+
+  int klass_slot_offset = 0;
+  int klass_offset      = -1;
+  if (method_is_static && !is_critical_native) {                          // 4)
+    klass_slot_offset  = stack_slots;
+    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
+    stack_slots       += VMRegImpl::slots_per_word;
+  }
+
+  int lock_slot_offset = 0;
+  int lock_offset      = -1;
+  if (method->is_synchronized()) {                                        // 5)
+    lock_slot_offset   = stack_slots;
+    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
+    stack_slots       += VMRegImpl::slots_per_word;
+  }
+
+  int workspace_slot_offset= stack_slots;                                 // 6)
+  stack_slots         += 2;
+
+  // Now compute actual number of stack words we need.
+  // Round to align stack properly.
+  stack_slots = round_to(stack_slots,                                     // 7)
+                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
+  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
+
+
+  ///////////////////////////////////////////////////////////////////////
+  // Now we can start generating code
+  ///////////////////////////////////////////////////////////////////////
+
+  unsigned int wrapper_CodeStart  = __ offset();
+  unsigned int wrapper_UEPStart;
+  unsigned int wrapper_VEPStart;
+  unsigned int wrapper_FrameDone;
+  unsigned int wrapper_CRegsSet;
+  Label     handle_pending_exception;
+  Label     ic_miss;
+
+  //---------------------------------------------------------------------
+  // Unverified entry point (UEP)
+  //---------------------------------------------------------------------
+  wrapper_UEPStart = __ offset();
+
+  // check ic: object class <-> cached class
+  if (!method_is_static) __ nmethod_UEP(ic_miss);
+  // Fill with nops (alignment of verified entry point).
+  __ align(CodeEntryAlignment);
+
+  //---------------------------------------------------------------------
+  // Verified entry point (VEP)
+  //---------------------------------------------------------------------
+  wrapper_VEPStart = __ offset();
+
+  __ save_return_pc();
+  __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
+#ifndef USE_RESIZE_FRAME
+  __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
+#else
+  __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
+                                                          // Just resize the existing one.
+#endif
+
+  wrapper_FrameDone = __ offset();
+
+  __ verify_thread();
+
+  // Native nmethod wrappers never take possession of the oop arguments.
+  // So the caller will gc the arguments.
+  // The only thing we need an oopMap for is if the call is static.
+  //
+  // An OopMap for lock (and class if static), and one for the VM call itself
+  OopMapSet  *oop_maps        = new OopMapSet();
+  OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
+                                       oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
+  }
+
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // The Grand Shuffle
+  //
+  //////////////////////////////////////////////////////////////////////
+  //
+  // We immediately shuffle the arguments so that for any vm call we have
+  // to make from here on out (sync slow path, jvmti, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for them.
+  //
+  //--------------------------------------------------------------------
+  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
+  // (derived from JavaThread* which is in Z_thread) and, if static,
+  // the class mirror instead of a receiver. This pretty much guarantees that
+  // register layout will not match. We ignore these extra arguments during
+  // the shuffle. The shuffle is described by the two calling convention
+  // vectors we have in our possession. We simply walk the java vector to
+  // get the source locations and the c vector to get the destinations.
+  //
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the caller.
+  //--------------------------------------------------------------------
+
+  // Record sp-based slot for receiver on stack for non-static methods.
+  int receiver_offset = -1;
+
+  //--------------------------------------------------------------------
+  // We move the arguments backwards because the floating point registers
+  // destination will always be to a register with a greater or equal
+  // register number or the stack.
+  //   jix is the index of the incoming Java arguments.
+  //   cix is the index of the outgoing C arguments.
+  //--------------------------------------------------------------------
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
+    reg_destroyed[r] = false;
+  }
+  for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
+    freg_destroyed[f] = false;
+  }
+#endif // ASSERT
+
+  for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
+#ifdef ASSERT
+    if (in_regs[jix].first()->is_Register()) {
+      assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
+    } else {
+      if (in_regs[jix].first()->is_FloatRegister()) {
+        assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
+      }
+    }
+    if (out_regs[cix].first()->is_Register()) {
+      reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
+    } else {
+      if (out_regs[cix].first()->is_FloatRegister()) {
+        freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
+      }
+    }
+#endif // ASSERT
+
+    switch (in_sig_bt[jix]) {
+      // Due to casting, small integers should only occur in pairs with type T_LONG.
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+      case T_INT:
+        // Move int and do sign extension.
+        move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
+        break;
+
+      case T_LONG :
+        long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
+        break;
+
+      case T_ARRAY:
+        if (is_critical_native) {
+          int body_arg = cix;
+          cix -= 2; // Point to length arg.
+          unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
+          break;
+        }
+        // else fallthrough
+      case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
+        object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
+                    ((jix == 0) && (!method_is_static)),
+                    &receiver_offset);
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
+        break;
+
+      case T_DOUBLE:
+        assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
+        double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
+        break;
+
+      case T_ADDRESS:
+        assert(false, "found T_ADDRESS in java args");
+        break;
+
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  //--------------------------------------------------------------------
+  // Pre-load a static method's oop into ARG2.
+  // Used both by locking code and the normal JNI call code.
+  //--------------------------------------------------------------------
+  if (method_is_static && !is_critical_native) {
+    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
+
+    // Now handlize the static class mirror in ARG2. It's known not-null.
+    __ z_stg(Z_ARG2, klass_offset, Z_SP);
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+    __ add2reg(Z_ARG2, klass_offset, Z_SP);
+  }
+
+  // Get JNIEnv* which is first argument to native.
+  if (!is_critical_native) {
+    __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  // We have all of the arguments setup at this point.
+  // We MUST NOT touch any outgoing regs from this point on.
+  // So if we must call out we must push a new frame.
+  //////////////////////////////////////////////////////////////////////
+
+
+  // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
+  // Both values represent the same position.
+  __ get_PC(Z_R10);                // PC into register
+  wrapper_CRegsSet = __ offset();  // and into into variable.
+
+  // Z_R10 now has the pc loaded that we will use when we finally call to native.
+
+  // We use the same pc/oopMap repeatedly when we call out.
+  oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
+
+  // Lock a synchronized method.
+
+  if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
+
+    // ATTENTION: args and Z_R10 must be preserved.
+    Register r_oop  = Z_R11;
+    Register r_box  = Z_R12;
+    Register r_tmp1 = Z_R13;
+    Register r_tmp2 = Z_R7;
+    Label done;
+
+    // Load the oop for the object or class. R_carg2_classorobject contains
+    // either the handlized oop from the incoming arguments or the handlized
+    // class mirror (if the method is static).
+    __ z_lg(r_oop, 0, Z_ARG2);
+
+    lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
+    // Get the lock box slot's address.
+    __ add2reg(r_box, lock_offset, Z_SP);
+
+#ifdef ASSERT
+    if (UseBiasedLocking)
+      // Making the box point to itself will make it clear it went unused
+      // but also be obviously invalid.
+      __ z_stg(r_box, 0, r_box);
+#endif // ASSERT
+
+    // Try fastpath for locking.
+    // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
+    __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
+    __ z_bre(done);
+
+    //-------------------------------------------------------------------------
+    // None of the above fast optimizations worked so we have to get into the
+    // slow case of monitor enter. Inline a special case of call_VM that
+    // disallows any pending_exception.
+    //-------------------------------------------------------------------------
+
+    Register oldSP = Z_R11;
+
+    __ z_lgr(oldSP, Z_SP);
+
+    RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
+
+    // Prepare arguments for call.
+    __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
+    __ add2reg(Z_ARG2, lock_offset, oldSP);
+    __ z_lgr(Z_ARG3, Z_thread);
+
+    __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
+
+    // Do the call.
+    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
+    __ call(Z_R1_scratch);
+
+    __ reset_last_Java_frame();
+
+    RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
+#ifdef ASSERT
+    { Label L;
+      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+      __ z_bre(L);
+      __ stop("no pending exception allowed on exit from IR::monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ bind(done);
+  } // lock for synchronized methods
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Finally just about ready to make the JNI call.
+  //////////////////////////////////////////////////////////////////////
+
+  // Use that pc we placed in Z_R10 a while back as the current frame anchor.
+  __ set_last_Java_frame(Z_SP, Z_R10);
+
+  // Transition from _thread_in_Java to _thread_in_native.
+  __ set_thread_state(_thread_in_native);
+
+
+  //////////////////////////////////////////////////////////////////////
+  // This is the JNI call.
+  //////////////////////////////////////////////////////////////////////
+
+  __ call_c(native_func);
+
+
+  //////////////////////////////////////////////////////////////////////
+  // We have survived the call once we reach here.
+  //////////////////////////////////////////////////////////////////////
+
+
+  //--------------------------------------------------------------------
+  // Unpack native results.
+  //--------------------------------------------------------------------
+  // For int-types, we do any needed sign-extension required.
+  // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
+  // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
+  // blocking or unlocking.
+  // An OOP result (handle) is done specially in the slow-path code.
+  //--------------------------------------------------------------------
+  switch (ret_type) {  //GLGLGL
+    case T_VOID:    break;         // Nothing to do!
+    case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
+    case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
+    case T_LONG:    break;         // Got it where we want it (unless slow-path)
+    case T_OBJECT:  break;         // Really a handle.
+                                   // Cannot de-handlize until after reclaiming jvm_lock.
+    case T_ARRAY:   break;
+
+    case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
+      __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
+      __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
+      break;
+    case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
+    case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
+    case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
+    case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
+
+    default:
+      ShouldNotReachHere();
+      break;
+  }
+
+
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //   - VM thread changes sync state to synchronizing and suspends threads for GC.
+  //   - Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization in progress, and escapes.
+
+  // Transition from _thread_in_native to _thread_in_native_trans.
+  __ set_thread_state(_thread_in_native_trans);
+
+  // Safepoint synchronization
+  //--------------------------------------------------------------------
+  // Must we block?
+  //--------------------------------------------------------------------
+  // Block, if necessary, before resuming in _thread_in_Java state.
+  // In order for GC to work, don't clear the last_Java_sp until after blocking.
+  //--------------------------------------------------------------------
+  Label after_transition;
+  {
+    Label no_block, sync;
+
+    save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
+
+    if (os::is_MP()) {
+      if (UseMembar) {
+        // Force this write out before the read below.
+        __ z_fence();
+      } else {
+        // Write serialization page so VM thread can do a pseudo remote membar.
+        // We use the current thread pointer to calculate a thread specific
+        // offset to write to within the page. This minimizes bus traffic
+        // due to cache line collision.
+        __ serialize_memory(Z_thread, Z_R1, Z_R2);
+      }
+    }
+    __ generate_safepoint_check(sync, Z_R1, true);
+
+    __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
+    __ z_bre(no_block);
+
+    // Block. Save any potential method result value before the operation and
+    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
+    // lets us share the oopMap we used when we went native rather than create
+    // a distinct one for this pc.
+    //
+    __ bind(sync);
+    __ z_acquire();
+
+    address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
+                                             : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+
+    __ call_VM_leaf(entry_point, Z_thread);
+
+    if (is_critical_native) {
+      restore_native_result(masm, ret_type, workspace_slot_offset);
+      __ z_bru(after_transition); // No thread state transition here.
+    }
+    __ bind(no_block);
+    restore_native_result(masm, ret_type, workspace_slot_offset);
+  }
+
+  //--------------------------------------------------------------------
+  // Thread state is thread_in_native_trans. Any safepoint blocking has
+  // already happened so we can now change state to _thread_in_Java.
+  //--------------------------------------------------------------------
+  // Transition from _thread_in_native_trans to _thread_in_Java.
+  __ set_thread_state(_thread_in_Java);
+  __ bind(after_transition);
+
+
+  //--------------------------------------------------------------------
+  // Reguard any pages if necessary.
+  // Protect native result from being destroyed.
+  //--------------------------------------------------------------------
+
+  Label no_reguard;
+
+  __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
+           JavaThread::stack_guard_yellow_reserved_disabled);
+
+  __ z_bre(no_reguard);
+
+  save_native_result(masm, ret_type, workspace_slot_offset);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
+  restore_native_result(masm, ret_type, workspace_slot_offset);
+
+  __ bind(no_reguard);
+
+
+  // Synchronized methods (slow path only)
+  // No pending exceptions for now.
+  //--------------------------------------------------------------------
+  // Handle possibly pending exception (will unlock if necessary).
+  // Native result is, if any is live, in Z_FRES or Z_RES.
+  //--------------------------------------------------------------------
+  // Unlock
+  //--------------------------------------------------------------------
+  if (method->is_synchronized()) {
+    const Register r_oop        = Z_R11;
+    const Register r_box        = Z_R12;
+    const Register r_tmp1       = Z_R13;
+    const Register r_tmp2       = Z_R7;
+    Label done;
+
+    // Get unboxed oop of class mirror or object ...
+    int   offset = method_is_static ? klass_offset : receiver_offset;
+
+    assert(offset != -1, "");
+    __ z_lg(r_oop, offset, Z_SP);
+
+    // ... and address of lock object box.
+    __ add2reg(r_box, lock_offset, Z_SP);
+
+    // Try fastpath for unlocking.
+    __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
+    __ z_bre(done);
+
+    // Slow path for unlocking.
+    // Save and restore any potential method result value around the unlocking operation.
+    const Register R_exc = Z_R11;
+
+    save_native_result(masm, ret_type, workspace_slot_offset);
+
+    // Must save pending exception around the slow-path VM call. Since it's a
+    // leaf call, the pending exception (if any) can be kept in a register.
+    __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
+    assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
+
+    // Must clear pending-exception before re-entering the VM. Since this is
+    // a leaf call, pending-exception-oop can be safely kept in a register.
+    __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
+
+    // Inline a special case of call_VM that disallows any pending_exception.
+
+    // Get locked oop from the handle we passed to jni.
+    __ z_lg(Z_ARG1, offset, Z_SP);
+    __ add2reg(Z_ARG2, lock_offset, Z_SP);
+    __ z_lgr(Z_ARG3, Z_thread);
+
+    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
+
+    __ call(Z_R1_scratch);
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+      __ z_bre(L);
+      __ stop("no pending exception allowed on exit from IR::monitorexit");
+      __ bind(L);
+    }
+#endif
+
+    // Check_forward_pending_exception jump to forward_exception if any pending
+    // exception is set. The forward_exception routine expects to see the
+    // exception in pending_exception and not in a register. Kind of clumsy,
+    // since all folks who branch to forward_exception must have tested
+    // pending_exception first and hence have it in a register already.
+    __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
+    restore_native_result(masm, ret_type, workspace_slot_offset);
+    __ z_bru(done);
+    __ z_illtrap(0x66);
+
+    __ bind(done);
+  }
+
+
+  //--------------------------------------------------------------------
+  // Clear "last Java frame" SP and PC.
+  //--------------------------------------------------------------------
+  __ verify_thread(); // Z_thread must be correct.
+
+  __ reset_last_Java_frame();
+
+  // Unpack oop result
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+    NearLabel L;
+    __ compare64_and_branch(Z_RET, (RegisterOrConstant)0L, Assembler::bcondEqual, L);
+    __ z_lg(Z_RET, 0, Z_RET);
+    __ bind(L);
+    __ verify_oop(Z_RET);
+  }
+
+  if (CheckJNICalls) {
+    // clear_pending_jni_exception_check
+    __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
+  }
+
+  // Reset handle block.
+  if (!is_critical_native) {
+    __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
+    __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
+
+    // Check for pending exceptions.
+    __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+    __ z_brne(handle_pending_exception);
+  }
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Return
+  //////////////////////////////////////////////////////////////////////
+
+
+#ifndef USE_RESIZE_FRAME
+  __ pop_frame();                     // Pop wrapper frame.
+#else
+  __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
+#endif
+  __ restore_return_pc();             // This is the way back to the caller.
+  __ z_br(Z_R14);
+
+
+  //////////////////////////////////////////////////////////////////////
+  // Out-of-line calls to the runtime.
+  //////////////////////////////////////////////////////////////////////
+
+
+  if (!is_critical_native) {
+
+    //---------------------------------------------------------------------
+    // Handler for pending exceptions (out-of-line).
+    //---------------------------------------------------------------------
+    // Since this is a native call, we know the proper exception handler
+    // is the empty function. We just pop this frame and then jump to
+    // forward_exception_entry. Z_R14 will contain the native caller's
+    // return PC.
+    __ bind(handle_pending_exception);
+    __ pop_frame();
+    __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+    __ restore_return_pc();
+    __ z_br(Z_R1_scratch);
+
+    //---------------------------------------------------------------------
+    // Handler for a cache miss (out-of-line)
+    //---------------------------------------------------------------------
+    __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
+  }
+  __ flush();
+
+
+  //////////////////////////////////////////////////////////////////////
+  // end of code generation
+  //////////////////////////////////////////////////////////////////////
+
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            (int)(wrapper_VEPStart-wrapper_CodeStart),
+                                            (int)(wrapper_FrameDone-wrapper_CodeStart),
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_offset),
+                                            oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
+  return nm;
+#else
+  ShouldNotReachHere();
+  return NULL;
+#endif // COMPILER2
+}
+
+static address gen_c2i_adapter(MacroAssembler  *masm,
+                               int total_args_passed,
+                               int comp_args_on_stack,
+                               const BasicType *sig_bt,
+                               const VMRegPair *regs,
+                               Label &skip_fixup) {
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all. We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one). Check for a
+  // compiled target. If there is one, we need to patch the caller's call.
+
+  // These two defs MUST MATCH code in gen_i2c2i_adapter!
+  const Register ientry = Z_R11;
+  const Register code   = Z_R11;
+
+  address c2i_entrypoint;
+  Label   patch_callsite;
+
+  // Regular (verified) c2i entry point.
+  c2i_entrypoint = __ pc();
+
+  // Call patching needed?
+  __ load_and_test_long(Z_R0_scratch, method_(code));
+  __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
+  __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
+
+  __ bind(skip_fixup);  // Return point from patch_callsite.
+
+  // Since all args are passed on the stack, total_args_passed*wordSize is the
+  // space we need. We need ABI scratch area but we use the caller's since
+  // it has already been allocated.
+
+  const int abi_scratch = frame::z_top_ijava_frame_abi_size;
+  int       extraspace  = round_to(total_args_passed, 2)*wordSize + abi_scratch;
+  Register  sender_SP   = Z_R10;
+  Register  value       = Z_R12;
+
+  // Remember the senderSP so we can pop the interpreter arguments off of the stack.
+  // In addition, frame manager expects initial_caller_sp in Z_R10.
+  __ z_lgr(sender_SP, Z_SP);
+
+  // This should always fit in 14 bit immediate.
+  __ resize_frame(-extraspace, Z_R0_scratch);
+
+  // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
+  // args. This essentially moves the callers ABI scratch area from the top to the
+  // bottom of the arg area.
+
+  int st_off =  extraspace - wordSize;
+
+  // Now write the args into the outgoing interpreter space.
+  for (int i = 0; i < total_args_passed; i++) {
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
+      // We must account for it here.
+      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+
+      if (!r_2->is_valid()) {
+        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
+      } else {
+        // longs are given 2 64-bit slots in the interpreter,
+        // but the data is passed in only 1 slot.
+        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+#ifdef ASSERT
+          __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
+#endif
+          st_off -= wordSize;
+        }
+        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
+      }
+    } else {
+      if (r_1->is_Register()) {
+        if (!r_2->is_valid()) {
+          __ z_st(r_1->as_Register(), st_off, Z_SP);
+        } else {
+          // longs are given 2 64-bit slots in the interpreter, but the
+          // data is passed in only 1 slot.
+          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+#ifdef ASSERT
+            __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
+#endif
+            st_off -= wordSize;
+          }
+          __ z_stg(r_1->as_Register(), st_off, Z_SP);
+        }
+      } else {
+        assert(r_1->is_FloatRegister(), "");
+        if (!r_2->is_valid()) {
+          __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
+        } else {
+          // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
+          // data is passed in only 1 slot.
+          // One of these should get known junk...
+#ifdef ASSERT
+          __ z_lzdr(Z_F1);
+          __ z_std(Z_F1, st_off, Z_SP);
+#endif
+          st_off-=wordSize;
+          __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
+        }
+      }
+    }
+    st_off -= wordSize;
+  }
+
+
+  // Jump to the interpreter just as if interpreter was doing it.
+  __ add2reg(Z_esp, st_off, Z_SP);
+
+  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
+  __ z_br(ientry);
+
+
+  // Prevent illegal entry to out-of-line code.
+  __ z_illtrap(0x22);
+
+  // Generate out-of-line runtime call to patch caller,
+  // then continue as interpreted.
+
+  // IF you lose the race you go interpreted.
+  // We don't see any possible endless c2i -> i2c -> c2i ...
+  // transitions no matter how rare.
+  __ bind(patch_callsite);
+
+  RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
+  RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
+  __ z_bru(skip_fixup);
+
+  // end of out-of-line code
+
+  return c2i_entrypoint;
+}
+
+// On entry, the following registers are set
+//
+//    Z_thread  r8  - JavaThread*
+//    Z_method  r9  - callee's method (method to be invoked)
+//    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
+//    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
+//
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
+  const Register value = Z_R12;
+  const Register ld_ptr= Z_esp;
+
+  int ld_offset = total_args_passed * wordSize;
+
+  // Cut-out for having no stack args.
+  if (comp_args_on_stack) {
+    // Sig words on the stack are greater than VMRegImpl::stack0. Those in
+    // registers are below. By subtracting stack0, we either get a negative
+    // number (all values in registers) or the maximum stack slot accessed.
+    // Convert VMRegImpl (4 byte) stack slots to words.
+    int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
+    // Round up to miminum stack alignment, in wordSize
+    comp_words_on_stack = round_to(comp_words_on_stack, 2);
+
+    __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
+  }
+
+  // Now generate the shuffle code. Pick up all register args and move the
+  // rest through register value=Z_R12.
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // Pick up 0, 1 or 2 words from ld_ptr.
+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+           "scrambled load targets?");
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_FloatRegister()) {
+      if (!r_2->is_valid()) {
+        __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
+        ld_offset-=wordSize;
+      } else {
+        // Skip the unused interpreter slot.
+        __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
+        ld_offset -= 2 * wordSize;
+      }
+    } else {
+      if (r_1->is_stack()) {
+        // Must do a memory to memory move.
+        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+
+        if (!r_2->is_valid()) {
+          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
+        } else {
+          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
+          // data is passed in only 1 slot.
+          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+            ld_offset -= wordSize;
+          }
+          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
+        }
+      } else {
+        if (!r_2->is_valid()) {
+          // Not sure we need to do this but it shouldn't hurt.
+          if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
+            __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
+          } else {
+            __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
+          }
+        } else {
+          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
+          // data is passed in only 1 slot.
+          if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+            ld_offset -= wordSize;
+          }
+          __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
+        }
+      }
+      ld_offset -= wordSize;
+    }
+  }
+
+  // Jump to the compiled code just as if compiled code was doing it.
+  // load target address from method oop:
+  __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
+
+  // Store method oop into thread->callee_target.
+  // 6243940: We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately, if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find it there should this case occur.
+  __ z_stg(Z_method, thread_(callee_target));
+
+  __ z_br(Z_R1_scratch);
+}
+
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            int comp_args_on_stack,
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs,
+                                                            AdapterFingerPrint* fingerprint) {
+  __ align(CodeEntryAlignment);
+  address i2c_entry = __ pc();
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+  address c2i_unverified_entry;
+
+  Label skip_fixup;
+  {
+    Label ic_miss;
+    const int klass_offset         = oopDesc::klass_offset_in_bytes();
+    const int holder_klass_offset  = CompiledICHolder::holder_klass_offset();
+    const int holder_method_offset = CompiledICHolder::holder_method_offset();
+
+    // Out-of-line call to ic_miss handler.
+    __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
+
+    // Unverified Entry Point UEP
+    __ align(CodeEntryAlignment);
+    c2i_unverified_entry = __ pc();
+
+    // Check the pointers.
+    if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+      __ z_ltgr(Z_ARG1, Z_ARG1);
+      __ z_bre(ic_miss);
+    }
+    __ verify_oop(Z_ARG1);
+
+    // Check ic: object class <-> cached class
+    // Compress cached class for comparison. That's more efficient.
+    if (UseCompressedClassPointers) {
+      __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
+      __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
+    } else {
+      __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
+    }
+    __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
+
+    // This def MUST MATCH code in gen_c2i_adapter!
+    const Register code = Z_R11;
+
+    __ z_lg(Z_method, holder_method_offset, Z_method);
+    __ load_and_test_long(Z_R0, method_(code));
+    __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
+
+    // Fallthru to VEP. Duplicate LTG, but saved taken branch.
+  }
+
+  address c2i_entry;
+  c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+// This function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization.
+//
+// Actually only compiled frames need to be adjusted, but it
+// doesn't harm to adjust entry and interpreter frames, too.
+//
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+  assert(callee_locals >= callee_parameters,
+          "test and remove; got more parms than locals");
+  // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
+  return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
+         frame::z_parent_ijava_frame_abi_size / BytesPerWord;
+}
+
+uint SharedRuntime::out_preserve_stack_slots() {
+  return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
+}
+
+//
+// Frame generation for deopt and uncommon trap blobs.
+//
+static void push_skeleton_frame(MacroAssembler* masm,
+                          /* Unchanged */
+                          Register frame_sizes_reg,
+                          Register pcs_reg,
+                          /* Invalidate */
+                          Register frame_size_reg,
+                          Register pc_reg) {
+  BLOCK_COMMENT("  push_skeleton_frame {");
+   __ z_lg(pc_reg, 0, pcs_reg);
+   __ z_lg(frame_size_reg, 0, frame_sizes_reg);
+   __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
+   Register fp = pc_reg;
+   __ push_frame(frame_size_reg, fp);
+#ifdef ASSERT
+   // The magic is required for successful walking skeletal frames.
+   __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
+   __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
+   // Fill other slots that are supposedly not necessary with eye catchers.
+   __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
+   __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
+   // The sender_sp of the bottom frame is set before pushing it.
+   // The sender_sp of non bottom frames is their caller's top_frame_sp, which
+   // is unknown here. Luckily it is not needed before filling the frame in
+   // layout_activation(), we assert this by setting an eye catcher (see
+   // comments on sender_sp in frame_s390.hpp).
+   __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
+#endif // ASSERT
+  BLOCK_COMMENT("  } push_skeleton_frame");
+}
+
+// Loop through the UnrollBlock info and create new frames.
+static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
+                            /* read */
+                            Register unroll_block_reg,
+                            /* invalidate */
+                            Register frame_sizes_reg,
+                            Register number_of_frames_reg,
+                            Register pcs_reg,
+                            Register tmp1,
+                            Register tmp2) {
+  BLOCK_COMMENT("push_skeleton_frames {");
+  // _number_of_frames is of type int (deoptimization.hpp).
+  __ z_lgf(number_of_frames_reg,
+           Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+  __ z_lg(pcs_reg,
+          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+  __ z_lg(frame_sizes_reg,
+          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+  // stack: (caller_of_deoptee, ...).
+
+  // If caller_of_deoptee is a compiled frame, then we extend it to make
+  // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
+  // See also Deoptimization::last_frame_adjust() above.
+  // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
+
+  __ z_lgf(Z_R1_scratch,
+           Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
+  __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
+  __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
+  // The oldest skeletal frame requires a valid sender_sp to make it walkable
+  // (it is required to find the original pc of caller_of_deoptee if it is marked
+  // for deoptimization - see nmethod::orig_pc_addr()).
+  __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
+
+  // Now push the new interpreter frames.
+  Label loop, loop_entry;
+
+  // Make sure that there is at least one entry in the array.
+  DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
+  __ asm_assert_ne("array_size must be > 0", 0x205);
+
+  __ z_bru(loop_entry);
+
+  __ bind(loop);
+
+  __ add2reg(frame_sizes_reg, wordSize);
+  __ add2reg(pcs_reg, wordSize);
+
+  __ bind(loop_entry);
+
+  // Allocate a new frame, fill in the pc.
+  push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
+
+  __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
+  __ z_brne(loop);
+
+  // Set the top frame's return pc.
+  __ add2reg(pcs_reg, wordSize);
+  __ z_lg(Z_R0_scratch, 0, pcs_reg);
+  __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
+  BLOCK_COMMENT("} push_skeleton_frames");
+}
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+  // Allocate space for the code.
+  ResourceMark rm;
+  // Setup code generation tools.
+  CodeBuffer buffer("deopt_blob", 2048, 1024);
+  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
+  Label exec_mode_initialized;
+  OopMap* map = NULL;
+  OopMapSet *oop_maps = new OopMapSet();
+
+  unsigned int start_off = __ offset();
+  Label cont;
+
+  // --------------------------------------------------------------------------
+  // Normal entry (non-exception case)
+  //
+  // We have been called from the deopt handler of the deoptee.
+  // Z_R14 points behind the call in the deopt handler. We adjust
+  // it such that it points to the start of the deopt handler.
+  // The return_pc has been stored in the frame of the deoptee and
+  // will replace the address of the deopt_handler in the call
+  // to Deoptimization::fetch_unroll_info below.
+  // The (int) cast is necessary, because -((unsigned int)14)
+  // is an unsigned int.
+  __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
+
+  const Register   exec_mode_reg = Z_tmp_1;
+
+  // stack: (deoptee, caller of deoptee, ...)
+
+  // pushes an "unpack" frame
+  // R14 contains the return address pointing into the deoptimized
+  // nmethod that was valid just before the nmethod was deoptimized.
+  // save R14 into the deoptee frame.  the `fetch_unroll_info'
+  // procedure called below will read it from there.
+  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+  // note the entry point.
+  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
+  __ z_bru(exec_mode_initialized);
+
+#ifndef COMPILER1
+  int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
+#else
+  // --------------------------------------------------------------------------
+  // Reexecute entry
+  // - Z_R14 = Deopt Handler in nmethod
+
+  int reexecute_offset = __ offset() - start_off;
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
+  __ z_bru(exec_mode_initialized);
+#endif
+
+
+  // --------------------------------------------------------------------------
+  // Exception entry. We reached here via a branch. Registers on entry:
+  // - Z_EXC_OOP (Z_ARG1) = exception oop
+  // - Z_EXC_PC  (Z_ARG2) = the exception pc.
+
+  int exception_offset = __ offset() - start_off;
+
+  // all registers are dead at this entry point, except for Z_EXC_OOP, and
+  // Z_EXC_PC which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+
+  // Store exception oop and pc in thread (location known to GC).
+  // Need this since the call to "fetch_unroll_info()" may safepoint.
+  __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
+  __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
+
+  // fall through
+
+  int exception_in_tls_offset = __ offset() - start_off;
+
+  // new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+
+  // load throwing pc from JavaThread and us it as the return address of the current frame.
+  __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+  // Save everything in sight.
+  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
+
+  // Now it is safe to overwrite any register
+
+  // Clear the exception pc field in JavaThread
+  __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
+
+  // Deopt during an exception.  Save exec mode for unpack_frames.
+  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
+
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+  __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
+  __ verify_oop(Z_ARG1);
+
+  // verify that there is no pending exception
+  __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
+                             "must not have pending exception here", __LINE__);
+#endif
+
+  // --------------------------------------------------------------------------
+  // At this point, the live registers are saved and
+  // the exec_mode_reg has been set up correctly.
+  __ bind(exec_mode_initialized);
+
+  // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
+
+  {
+  const Register unroll_block_reg  = Z_tmp_2;
+
+  // we need to set `last_Java_frame' because `fetch_unroll_info' will
+  // call `last_Java_frame()'.  however we can't block and no gc will
+  // occur so we don't need an oopmap. the value of the pc in the
+  // frame is not particularly important.  it just needs to identify the blob.
+
+  // Don't set last_Java_pc anymore here (is implicitly NULL then).
+  // the correct PC is retrieved in pd_last_frame() in that case.
+  __ set_last_Java_frame(/*sp*/Z_SP, noreg);
+  // With EscapeAnalysis turned on, this call may safepoint
+  // despite it's marked as "leaf call"!
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
+  // Set an oopmap for the call site this describes all our saved volatile registers
+  int offs = __ offset();
+  oop_maps->add_gc_map(offs, map);
+
+  __ reset_last_Java_frame();
+  // save the return value.
+  __ z_lgr(unroll_block_reg, Z_RET);
+  // restore the return registers that have been saved
+  // (among other registers) by save_live_registers(...).
+  RegisterSaver::restore_result_registers(masm);
+
+  // reload the exec mode from the UnrollBlock (it might have changed)
+  __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+
+  // In excp_deopt_mode, restore and clear exception oop which we
+  // stored in the thread during exception entry above. The exception
+  // oop will be the return value of this stub.
+  NearLabel skip_restore_excp;
+  __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
+  __ z_lg(Z_RET, thread_(exception_oop));
+  __ clear_mem(thread_(exception_oop), 8);
+  __ bind(skip_restore_excp);
+
+  // remove the "unpack" frame
+  __ pop_frame();
+
+  // stack: (deoptee, caller of deoptee, ...).
+
+  // pop the deoptee's frame
+  __ pop_frame();
+
+  // stack: (caller_of_deoptee, ...).
+
+  // loop through the `UnrollBlock' info and create interpreter frames.
+  push_skeleton_frames(masm, true/*deopt*/,
+                  unroll_block_reg,
+                  Z_tmp_3,
+                  Z_tmp_4,
+                  Z_ARG5,
+                  Z_ARG4,
+                  Z_ARG3);
+
+  // stack: (skeletal interpreter frame, ..., optional skeletal
+  // interpreter frame, caller of deoptee, ...).
+  }
+
+  // push an "unpack" frame taking care of float / int return values.
+  __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
+
+  // stack: (unpack frame, skeletal interpreter frame, ..., optional
+  // skeletal interpreter frame, caller of deoptee, ...).
+
+  // spill live volatile registers since we'll do a call.
+  __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
+  __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
+
+  // let the unpacker layout information in the skeletal frames just allocated.
+  __ get_PC(Z_RET);
+  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
+                  Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
+
+  __ reset_last_Java_frame();
+
+  // restore the volatiles saved above.
+  __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
+  __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
+
+  // pop the "unpack" frame.
+  __ pop_frame();
+  __ restore_return_pc();
+
+  // stack: (top interpreter frame, ..., optional interpreter frame,
+  // caller of deoptee, ...).
+
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+
+  // return to the interpreter entry point.
+  __ z_br(Z_R14);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+
+#ifdef COMPILER2
+//------------------------------generate_uncommon_trap_blob--------------------
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
+  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
+
+  Register unroll_block_reg = Z_tmp_1;
+  Register klass_index_reg  = Z_ARG2;
+  Register unc_trap_reg     = Z_ARG2;
+
+  // stack: (deoptee, caller_of_deoptee, ...).
+
+  // push a dummy "unpack" frame and call
+  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
+  // vframe array and return the `UnrollBlock' information.
+
+  // save R14 to compiled frame.
+  __ save_return_pc();
+  // push the "unpack_frame".
+  __ push_frame_abi160(0);
+
+  // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
+
+  // set the "unpack" frame as last_Java_frame.
+  // `Deoptimization::uncommon_trap' expects it and considers its
+  // sender frame as the deoptee frame.
+  __ get_PC(Z_R1_scratch);
+  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
+
+  __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
+  __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
+  BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
+
+  __ reset_last_Java_frame();
+
+  // pop the "unpack" frame
+  __ pop_frame();
+
+  // stack: (deoptee, caller_of_deoptee, ...).
+
+  // save the return value.
+  __ z_lgr(unroll_block_reg, Z_RET);
+
+  // pop the deoptee frame.
+  __ pop_frame();
+
+  // stack: (caller_of_deoptee, ...).
+
+#ifdef ASSERT
+  assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
+  assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
+  const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
+#ifndef VM_LITTLE_ENDIAN
+  + 3
+#endif
+  ;
+  if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
+    __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
+  } else {
+    __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
+  }
+  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
+#endif
+
+  __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
+
+  // allocate new interpreter frame(s) and possibly resize the caller's frame
+  // (no more adapters !)
+  push_skeleton_frames(masm, false/*deopt*/,
+                  unroll_block_reg,
+                  Z_tmp_2,
+                  Z_tmp_3,
+                  Z_tmp_4,
+                  Z_ARG5,
+                  Z_ARG4);
+
+  // stack: (skeletal interpreter frame, ..., optional skeletal
+  // interpreter frame, (resized) caller of deoptee, ...).
+
+  // push a dummy "unpack" frame taking care of float return values.
+  // call `Deoptimization::unpack_frames' to layout information in the
+  // interpreter frames just created
+
+  // push the "unpack" frame
+   const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
+
+  // stack: (unpack frame, skeletal interpreter frame, ..., optional
+  // skeletal interpreter frame, (resized) caller of deoptee, ...).
+
+  // set the "unpack" frame as last_Java_frame
+  __ get_PC(Z_R1_scratch);
+  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
+
+  // indicate it is the uncommon trap case
+  BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
+  __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
+  // let the unpacker layout information in the skeletal frames just allocated.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
+
+  __ reset_last_Java_frame();
+  // pop the "unpack" frame
+  __ pop_frame();
+  // restore LR from top interpreter frame
+  __ restore_return_pc();
+
+  // stack: (top interpreter frame, ..., optional interpreter frame,
+  // (resized) caller of deoptee, ...).
+
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+
+  // return to the interpreter entry point
+  __ z_br(Z_R14);
+
+  masm->flush();
+  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
+}
+#endif // COMPILER2
+
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// and setup oopmap.
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+  assert(StubRoutines::forward_exception_entry() != NULL,
+         "must be generated before");
+
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map;
+
+  // Allocate space for the code. Setup code generation tools.
+  CodeBuffer buffer("handler_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  unsigned int start_off = __ offset();
+  address call_pc = NULL;
+  int frame_size_in_bytes;
+
+  bool cause_return = (poll_type == POLL_AT_RETURN);
+  // Make room for return address (or push it again)
+  if (!cause_return)
+    __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
+
+  // Save registers, fpu state, and flags
+  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+  // The following is basically a call_VM. However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselves.
+  __ set_last_Java_frame(Z_SP, noreg);
+
+  // call into the runtime to handle the safepoint poll
+  __ call_VM_leaf(call_ptr, Z_thread);
+
+
+  // Set an oopmap for the call site. This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved. This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  oop_maps->add_gc_map((int)(__ offset()-start_off), map);
+
+  Label noException;
+
+  __ reset_last_Java_frame();
+
+  __ load_and_test_long(Z_R1, thread_(pending_exception));
+  __ z_bre(noException);
+
+  // Pending exception case, used (sporadically) by
+  // api/java_lang/Thread.State/index#ThreadState et al.
+  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+  // Jump to forward_exception_entry, with the issuing PC in Z_R14
+  // so it looks like the original nmethod called forward_exception_entry.
+  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+  __ z_br(Z_R1_scratch);
+
+  // No exception case
+  __ bind(noException);
+
+  // Normal exit, restore registers and exit.
+  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+  __ z_br(Z_R14);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
+}
+
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a Java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  // allocate space for the code
+  ResourceMark rm;
+
+  CodeBuffer buffer(name, 1000, 512);
+  MacroAssembler* masm                = new MacroAssembler(&buffer);
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map = NULL;
+
+  unsigned int start_off = __ offset();
+
+  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+  // We must save a PC from within the stub as return PC
+  // C code doesn't store the LR where we expect the PC,
+  // so we would run into trouble upon stack walking.
+  __ get_PC(Z_R1_scratch);
+
+  unsigned int frame_complete = __ offset();
+
+  __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
+
+  __ call_VM_leaf(destination, Z_thread, Z_method);
+
+
+  // Set an oopmap for the call site.
+  // We need this not only for callee-saved registers, but also for volatile
+  // registers that the compiler might be keeping live across a safepoint.
+
+  oop_maps->add_gc_map((int)(frame_complete-start_off), map);
+
+  // clear last_Java_sp
+  __ reset_last_Java_frame();
+
+  // check for pending exceptions
+  Label pending;
+  __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+  __ z_brne(pending);
+
+  __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
+  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+  // get the returned method
+  __ get_vm_result_2(Z_method);
+
+  // We are back the the original state on entry and ready to go.
+  __ z_br(Z_R1_scratch);
+
+  // Pending exception after the safepoint
+
+  __ bind(pending);
+
+  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+  // exception pending => remove activation and forward to exception handler
+
+  __ z_lgr(Z_R2, Z_R0); // pending_exception
+  __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
+  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+  __ z_br(Z_R1_scratch);
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // return the blob
+  // frame_size_words or bytes??
+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
+                                       oop_maps, true);
+
+}
+
+//------------------------------Montgomery multiplication------------------------
+//
+
+// Subtract 0:b from carry:a. Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
+  unsigned long i, c = 8 * (unsigned long)(len - 1);
+  __asm__ __volatile__ (
+    "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
+    "LGHI   0, 8               \n" // index increment (for BRXLG)
+    "LGR    1, %[c]            \n" // index limit (for BRXLG)
+    "0:                        \n"
+    "LG     %[c], 0(%[i],%[a]) \n"
+    "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
+    "STG    %[c], 0(%[i],%[a]) \n"
+    "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
+    "SLBGR  %[c], %[c]         \n" // save carry - 1
+    : [i]"=&a"(i), [c]"+r"(c)
+    : [a]"a"(a), [b]"a"(b)
+    : "cc", "memory", "r0", "r1"
+ );
+  return carry + c;
+}
+
+// Multiply (unsigned) Long A by Long B, accumulating the double-
+// length result into the accumulator formed of T0, T1, and T2.
+inline void MACC(unsigned long A[], long A_ind,
+                 unsigned long B[], long B_ind,
+                 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
+  long A_si = 8 * A_ind,
+       B_si = 8 * B_ind;
+  __asm__ __volatile__ (
+    "LG     1, 0(%[A_si],%[A]) \n"
+    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
+    "ALGR   %[T0], 1           \n"
+    "LGHI   1, 0               \n" // r1 = 0
+    "ALCGR  %[T1], 0           \n"
+    "ALCGR  %[T2], 1           \n"
+    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
+    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
+    : "cc", "r0", "r1"
+ );
+}
+
+// As above, but add twice the double-length result into the
+// accumulator.
+inline void MACC2(unsigned long A[], long A_ind,
+                  unsigned long B[], long B_ind,
+                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
+  const unsigned long zero = 0;
+  long A_si = 8 * A_ind,
+       B_si = 8 * B_ind;
+  __asm__ __volatile__ (
+    "LG     1, 0(%[A_si],%[A]) \n"
+    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
+    "ALGR   %[T0], 1           \n"
+    "ALCGR  %[T1], 0           \n"
+    "ALCGR  %[T2], %[zero]     \n"
+    "ALGR   %[T0], 1           \n"
+    "ALCGR  %[T1], 0           \n"
+    "ALCGR  %[T2], %[zero]     \n"
+    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
+    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
+    : "cc", "r0", "r1"
+ );
+}
+
+// Fast Montgomery multiplication. The derivation of the algorithm is
+// in "A Cryptographic Library for the Motorola DSP56000,
+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
+static void
+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
+                    unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    for (j = 0; j < i; j++) {
+      MACC(a, j, b, i-j, t0, t1, t2);
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    MACC(a, i, b, 0, t0, t1, t2);
+    m[i] = t0 * inv;
+    MACC(m, i, n, 0, t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery multiply");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2 * len; i++) {
+    int j;
+    for (j = i - len + 1; j < len; j++) {
+      MACC(a, j, b, i-j, t0, t1, t2);
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0) {
+    t0 = sub(m, n, t0, len);
+  }
+}
+
+// Fast Montgomery squaring. This uses asymptotically 25% fewer
+// multiplies so it should be up to 25% faster than Montgomery
+// multiplication. However, its loop control is more complex and it
+// may actually run slower on some machines.
+static void
+montgomery_square(unsigned long a[], unsigned long n[],
+                  unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    int end = (i+1)/2;
+    for (j = 0; j < end; j++) {
+      MACC2(a, j, a, i-j, t0, t1, t2);
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a, j, a, j, t0, t1, t2);
+    }
+    for (; j < i; j++) {
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    m[i] = t0 * inv;
+    MACC(m, i, n, 0, t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery square");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int start = i-len+1;
+    int end = start + (len - start)/2;
+    int j;
+    for (j = start; j < end; j++) {
+      MACC2(a, j, a, i-j, t0, t1, t2);
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a, j, a, j, t0, t1, t2);
+    }
+    for (; j < len; j++) {
+      MACC(m, j, n, i-j, t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0) {
+    t0 = sub(m, n, t0, len);
+  }
+}
+
+// The threshold at which squaring is advantageous was determined
+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
+// Value seems to be ok for other platforms, too.
+#define MONTGOMERY_SQUARING_THRESHOLD 64
+
+// Copy len longwords from s to d, word-swapping as we go. The
+// destination array is reversed.
+static void reverse_words(unsigned long *s, unsigned long *d, int len) {
+  d += len;
+  while(len-- > 0) {
+    d--;
+    unsigned long s_val = *s;
+    // Swap words in a longword on little endian machines.
+#ifdef VM_LITTLE_ENDIAN
+     Unimplemented();
+#endif
+    *d = s_val;
+    s++;
+  }
+}
+
+void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                        jint len, jlong inv,
+                                        jint *m_ints) {
+  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
+  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow. 512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 8k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 4;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *b = scratch + 1 * longwords,
+    *n = scratch + 2 * longwords,
+    *m = scratch + 3 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)b_ints, b, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
+                                      jint len, jlong inv,
+                                      jint *m_ints) {
+  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
+  assert(len % 2 == 0, "array length in montgomery_square must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow. 512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 6k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 3;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *n = scratch + 1 * longwords,
+    *m = scratch + 2 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
+    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
+  } else {
+    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
+  }
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+extern "C"
+int SpinPause() {
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2563 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "registerSaver_s390.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp.
+
+#ifdef PRODUCT
+#define __ _masm->
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
+
+// -----------------------------------------------------------------------
+// Stub Code definitions
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+  //----------------------------------------------------------------------
+  // Call stubs are used to call Java from C.
+
+  //
+  // Arguments:
+  //
+  //   R2        - call wrapper address     : address
+  //   R3        - result                   : intptr_t*
+  //   R4        - result type              : BasicType
+  //   R5        - method                   : method
+  //   R6        - frame mgr entry point    : address
+  //   [SP+160]  - parameter block          : intptr_t*
+  //   [SP+172]  - parameter count in words : int
+  //   [SP+176]  - thread                   : Thread*
+  //
+  address generate_call_stub(address& return_address) {
+    // Set up a new C frame, copy Java arguments, call frame manager
+    // or native_entry, and process result.
+
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    Register r_arg_call_wrapper_addr   = Z_ARG1;
+    Register r_arg_result_addr         = Z_ARG2;
+    Register r_arg_result_type         = Z_ARG3;
+    Register r_arg_method              = Z_ARG4;
+    Register r_arg_entry               = Z_ARG5;
+
+    // offsets to fp
+    #define d_arg_thread 176
+    #define d_arg_argument_addr 160
+    #define d_arg_argument_count 168+4
+
+    Register r_entryframe_fp           = Z_tmp_1;
+    Register r_top_of_arguments_addr   = Z_ARG4;
+    Register r_new_arg_entry = Z_R14;
+
+    // macros for frame offsets
+    #define call_wrapper_address_offset \
+               _z_entry_frame_locals_neg(call_wrapper_address)
+    #define result_address_offset \
+              _z_entry_frame_locals_neg(result_address)
+    #define result_type_offset \
+              _z_entry_frame_locals_neg(result_type)
+    #define arguments_tos_address_offset \
+              _z_entry_frame_locals_neg(arguments_tos_address)
+
+    {
+      //
+      // STACK on entry to call_stub:
+      //
+      //     F1      [C_FRAME]
+      //            ...
+      //
+
+      Register r_argument_addr              = Z_tmp_3;
+      Register r_argumentcopy_addr          = Z_tmp_4;
+      Register r_argument_size_in_bytes     = Z_ARG5;
+      Register r_frame_size                 = Z_R1;
+
+      Label arguments_copied;
+
+      // Save non-volatile registers to ABI of caller frame.
+      BLOCK_COMMENT("save registers, push frame {");
+      __ z_stmg(Z_R6, Z_R14, 16, Z_SP);
+      __ z_std(Z_F8, 96, Z_SP);
+      __ z_std(Z_F9, 104, Z_SP);
+      __ z_std(Z_F10, 112, Z_SP);
+      __ z_std(Z_F11, 120, Z_SP);
+      __ z_std(Z_F12, 128, Z_SP);
+      __ z_std(Z_F13, 136, Z_SP);
+      __ z_std(Z_F14, 144, Z_SP);
+      __ z_std(Z_F15, 152, Z_SP);
+
+      //
+      // Push ENTRY_FRAME including arguments:
+      //
+      //     F0      [TOP_IJAVA_FRAME_ABI]
+      //             [outgoing Java arguments]
+      //             [ENTRY_FRAME_LOCALS]
+      //     F1      [C_FRAME]
+      //             ...
+      //
+
+      // Calculate new frame size and push frame.
+      #define abi_plus_locals_size \
+                (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size)
+      if (abi_plus_locals_size % BytesPerWord == 0) {
+        // Preload constant part of frame size.
+        __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord);
+        // Keep copy of our frame pointer (caller's SP).
+        __ z_lgr(r_entryframe_fp, Z_SP);
+        // Add space required by arguments to frame size.
+        __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP);
+        // Move Z_ARG5 early, it will be used as a local.
+        __ z_lgr(r_new_arg_entry, r_arg_entry);
+        // Convert frame size from words to bytes.
+        __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord);
+        __ push_frame(r_frame_size, r_entryframe_fp,
+                      false/*don't copy SP*/, true /*frame size sign inverted*/);
+      } else {
+        guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)");
+      }
+      BLOCK_COMMENT("} save, push");
+
+      // Load argument registers for call.
+      BLOCK_COMMENT("prepare/copy arguments {");
+      __ z_lgr(Z_method, r_arg_method);
+      __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp);
+
+      // Calculate top_of_arguments_addr which will be tos (not prepushed) later.
+      // Wimply use SP + frame::top_ijava_frame_size.
+      __ add2reg(r_top_of_arguments_addr,
+                 frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP);
+
+      // Initialize call_stub locals (step 1).
+      if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) &&
+          (result_address_offset + BytesPerWord == result_type_offset)          &&
+          (result_type_offset + BytesPerWord == arguments_tos_address_offset)) {
+
+        __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr,
+                  call_wrapper_address_offset, r_entryframe_fp);
+      } else {
+        __ z_stg(r_arg_call_wrapper_addr,
+                 call_wrapper_address_offset, r_entryframe_fp);
+        __ z_stg(r_arg_result_addr,
+                 result_address_offset, r_entryframe_fp);
+        __ z_stg(r_arg_result_type,
+                 result_type_offset, r_entryframe_fp);
+        __ z_stg(r_top_of_arguments_addr,
+                 arguments_tos_address_offset, r_entryframe_fp);
+      }
+
+      // Copy Java arguments.
+
+      // Any arguments to copy?
+      __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count));
+      __ z_bre(arguments_copied);
+
+      // Prepare loop and copy arguments in reverse order.
+      {
+        // Calculate argument size in bytes.
+        __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord);
+
+        // Get addr of first incoming Java argument.
+        __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp);
+
+        // Let r_argumentcopy_addr point to last outgoing Java argument.
+        __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively.
+
+        // Let r_argument_addr point to last incoming Java argument.
+        __ add2reg_with_index(r_argument_addr, -BytesPerWord,
+                              r_argument_size_in_bytes, r_argument_addr);
+
+        // Now loop while Z_R1 > 0 and copy arguments.
+        {
+          Label next_argument;
+          __ bind(next_argument);
+          // Mem-mem move.
+          __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr);
+          __ add2reg(r_argument_addr,    -BytesPerWord);
+          __ add2reg(r_argumentcopy_addr, BytesPerWord);
+          __ z_brct(Z_R1, next_argument);
+        }
+      }  // End of argument copy loop.
+
+      __ bind(arguments_copied);
+    }
+    BLOCK_COMMENT("} arguments");
+
+    BLOCK_COMMENT("call {");
+    {
+      // Call frame manager or native entry.
+
+      //
+      // Register state on entry to frame manager / native entry:
+      //
+      //   Z_ARG1 = r_top_of_arguments_addr  - intptr_t *sender tos (prepushed)
+      //                                       Lesp = (SP) + copied_arguments_offset - 8
+      //   Z_method                          - method
+      //   Z_thread                          - JavaThread*
+      //
+
+      // Here, the usual SP is the initial_caller_sp.
+      __ z_lgr(Z_R10, Z_SP);
+
+      // Z_esp points to the slot below the last argument.
+      __ z_lgr(Z_esp, r_top_of_arguments_addr);
+
+      //
+      // Stack on entry to frame manager / native entry:
+      //
+      //     F0      [TOP_IJAVA_FRAME_ABI]
+      //             [outgoing Java arguments]
+      //             [ENTRY_FRAME_LOCALS]
+      //     F1      [C_FRAME]
+      //             ...
+      //
+
+      // Do a light-weight C-call here, r_new_arg_entry holds the address
+      // of the interpreter entry point (frame manager or native entry)
+      // and save runtime-value of return_pc in return_address
+      // (call by reference argument).
+      return_address = __ call_stub(r_new_arg_entry);
+    }
+    BLOCK_COMMENT("} call");
+
+    {
+      BLOCK_COMMENT("restore registers {");
+      // Returned from frame manager or native entry.
+      // Now pop frame, process result, and return to caller.
+
+      //
+      // Stack on exit from frame manager / native entry:
+      //
+      //     F0      [ABI]
+      //             ...
+      //             [ENTRY_FRAME_LOCALS]
+      //     F1      [C_FRAME]
+      //             ...
+      //
+      // Just pop the topmost frame ...
+      //
+
+      Label ret_is_object;
+      Label ret_is_long;
+      Label ret_is_float;
+      Label ret_is_double;
+
+      // Restore frame pointer.
+      __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP);
+      // Pop frame. Done here to minimize stalls.
+      __ z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+
+      // Reload some volatile registers which we've spilled before the call
+      // to frame manager / native entry.
+      // Access all locals via frame pointer, because we know nothing about
+      // the topmost frame's size.
+      __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
+      __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp);
+
+      // Restore non-volatiles.
+      __ z_lmg(Z_R6, Z_R14, 16, Z_SP);
+      __ z_ld(Z_F8, 96, Z_SP);
+      __ z_ld(Z_F9, 104, Z_SP);
+      __ z_ld(Z_F10, 112, Z_SP);
+      __ z_ld(Z_F11, 120, Z_SP);
+      __ z_ld(Z_F12, 128, Z_SP);
+      __ z_ld(Z_F13, 136, Z_SP);
+      __ z_ld(Z_F14, 144, Z_SP);
+      __ z_ld(Z_F15, 152, Z_SP);
+      BLOCK_COMMENT("} restore");
+
+      //
+      // Stack on exit from call_stub:
+      //
+      //     0       [C_FRAME]
+      //             ...
+      //
+      // No call_stub frames left.
+      //
+
+      // All non-volatiles have been restored at this point!!
+
+      //------------------------------------------------------------------------
+      // The following code makes some assumptions on the T_<type> enum values.
+      // The enum is defined in globalDefinitions.hpp.
+      // The validity of the assumptions is tested as far as possible.
+      //   The assigned values should not be shuffled
+      //   T_BOOLEAN==4    - lowest used enum value
+      //   T_NARROWOOP==16 - largest used enum value
+      //------------------------------------------------------------------------
+      BLOCK_COMMENT("process result {");
+      Label firstHandler;
+      int   handlerLen= 8;
+#ifdef ASSERT
+      char  assertMsg[] = "check BasicType definition in globalDefinitions.hpp";
+      __ z_chi(r_arg_result_type, T_BOOLEAN);
+      __ asm_assert_low(assertMsg, 0x0234);
+      __ z_chi(r_arg_result_type, T_NARROWOOP);
+      __ asm_assert_high(assertMsg, 0x0235);
+#endif
+      __ add2reg(r_arg_result_type, -T_BOOLEAN);          // Remove offset.
+      __ z_larl(Z_R1, firstHandler);                      // location of first handler
+      __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long.
+      __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1);
+
+      __ align(handlerLen);
+      __ bind(firstHandler);
+      // T_BOOLEAN:
+        guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_CHAR:
+        guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_FLOAT:
+        guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_ste(Z_FRET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_DOUBLE:
+        guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_std(Z_FRET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_BYTE:
+        guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_SHORT:
+        guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_INT:
+        guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_LONG:
+        guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_stg(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_OBJECT:
+        guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_stg(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_ARRAY:
+        guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_stg(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_VOID:
+        guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_stg(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_ADDRESS:
+        guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_stg(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      // T_NARROWOOP:
+        guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp");
+        __ z_st(Z_RET, 0, r_arg_result_addr);
+        __ z_br(Z_R14); // Return to caller.
+        __ align(handlerLen);
+      BLOCK_COMMENT("} process result");
+    }
+    return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code. The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  address generate_catch_exception() {
+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
+
+    address start = __ pc();
+
+    //
+    // Registers alive
+    //
+    //   Z_thread
+    //   Z_ARG1 - address of pending exception
+    //   Z_ARG2 - return address in call stub
+    //
+
+    const Register exception_file = Z_R0;
+    const Register exception_line = Z_R1;
+
+    __ load_const_optimized(exception_file, (void*)__FILE__);
+    __ load_const_optimized(exception_line, (void*)__LINE__);
+
+    __ z_stg(Z_ARG1, thread_(pending_exception));
+    // Store into `char *'.
+    __ z_stg(exception_file, thread_(exception_file));
+    // Store into `int'.
+    __ z_st(exception_line, thread_(exception_line));
+
+    // Complete return to VM.
+    assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
+
+    // Continue in call stub.
+    __ z_br(Z_ARG2);
+
+    return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception. The pending exception check happened in the runtime
+  // or native call stub. The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Read:
+  //   Z_R14: pc the runtime library callee wants to return to.
+  //   Since the exception occurred in the callee, the return pc
+  //   from the point of view of Java is the exception pc.
+  //
+  // Invalidate:
+  //   Volatile registers (except below).
+  //
+  // Update:
+  //   Z_ARG1: exception
+  //   (Z_R14 is unchanged and is live out).
+  //
+  address generate_forward_exception() {
+    StubCodeMark mark(this, "StubRoutines", "forward_exception");
+    address start = __ pc();
+
+    #define pending_exception_offset in_bytes(Thread::pending_exception_offset())
+#ifdef ASSERT
+    // Get pending exception oop.
+    __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
+
+    // Make sure that this code is only executed if there is a pending exception.
+    {
+      Label L;
+      __ z_ltgr(Z_ARG1, Z_ARG1);
+      __ z_brne(L);
+      __ stop("StubRoutines::forward exception: no pending exception (1)");
+      __ bind(L);
+    }
+
+    __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop");
+#endif
+
+    __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2.
+    __ save_return_pc();
+    __ push_frame_abi160(0);
+    // Find exception handler.
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
+                    Z_thread,
+                    Z_ARG2);
+    // Copy handler's address.
+    __ z_lgr(Z_R1, Z_RET);
+    __ pop_frame();
+    __ restore_return_pc();
+
+    // Set up the arguments for the exception handler:
+    // - Z_ARG1: exception oop
+    // - Z_ARG2: exception pc
+
+    // Load pending exception oop.
+    __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
+
+    // The exception pc is the return address in the caller,
+    // must load it into Z_ARG2
+    __ z_lgr(Z_ARG2, Z_R14);
+
+#ifdef ASSERT
+    // Make sure exception is set.
+    { Label L;
+      __ z_ltgr(Z_ARG1, Z_ARG1);
+      __ z_brne(L);
+      __ stop("StubRoutines::forward exception: no pending exception (2)");
+      __ bind(L);
+    }
+#endif
+    // Clear the pending exception.
+    __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *));
+    // Jump to exception handler
+    __ z_br(Z_R1 /*handler address*/);
+
+    return start;
+
+    #undef pending_exception_offset
+  }
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Only callee-saved registers are preserved (through the
+  // normal RegisterMap handling). If the compiler
+  // needs all registers to be preserved between the fault point and
+  // the exception handler then it must assume responsibility for that
+  // in AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+
+  // Note that we generate only this stub into a RuntimeStub, because
+  // it needs to be properly traversed and ignored during GC, so we
+  // change the meaning of the "__" macro within this method.
+
+  // Note: the routine set_pc_not_at_call_for_caller in
+  // SharedRuntime.cpp requires that this code be generated into a
+  // RuntimeStub.
+#undef __
+#define __ masm->
+
+  address generate_throw_exception(const char* name, address runtime_entry,
+                                   bool restore_saved_exception_pc,
+                                   Register arg1 = noreg, Register arg2 = noreg) {
+    int insts_size = 256;
+    int locs_size  = 0;
+    CodeBuffer      code(name, insts_size, locs_size);
+    MacroAssembler* masm = new MacroAssembler(&code);
+    int framesize_in_bytes;
+    address start = __ pc();
+
+    __ save_return_pc();
+    framesize_in_bytes = __ push_frame_abi160(0);
+
+    address frame_complete_pc = __ pc();
+    if (restore_saved_exception_pc) {
+      __ unimplemented("StubGenerator::throw_exception", 74);
+    }
+
+    // Note that we always have a runtime stub frame on the top of stack at this point.
+    __ get_PC(Z_R1);
+    __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
+
+    // Do the call.
+    BLOCK_COMMENT("call runtime_entry");
+    __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2);
+
+    __ reset_last_Java_frame();
+
+#ifdef ASSERT
+    // Make sure that this code is only executed if there is a pending exception.
+    { Label L;
+      __ z_lg(Z_R0,
+                in_bytes(Thread::pending_exception_offset()),
+                Z_thread);
+      __ z_ltgr(Z_R0, Z_R0);
+      __ z_brne(L);
+      __ stop("StubRoutines::throw_exception: no pending exception");
+      __ bind(L);
+    }
+#endif
+
+    __ pop_frame();
+    __ restore_return_pc();
+
+    __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
+    __ z_br(Z_R1);
+
+    RuntimeStub* stub =
+      RuntimeStub::new_runtime_stub(name, &code,
+                                    frame_complete_pc - start,
+                                    framesize_in_bytes/wordSize,
+                                    NULL /*oop_maps*/, false);
+
+    return stub->entry_point();
+  }
+
+#undef __
+#ifdef PRODUCT
+#define __ _masm->
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+  //----------------------------------------------------------------------
+  // The following routine generates a subroutine to throw an asynchronous
+  // UnknownError when an unsafe access gets a fault that could not be
+  // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
+  //
+  // Arguments:
+  //   trapping PC: ??
+  //
+  // Results:
+  //   Posts an asynchronous exception, skips the trapping instruction.
+  //
+  address generate_handler_for_unsafe_access() {
+    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
+    {
+      address start = __ pc();
+      __ unimplemented("StubRoutines::handler_for_unsafe_access", 86);
+      return start;
+    }
+  }
+
+  // Support for uint StubRoutine::zarch::partial_subtype_check(Klass
+  // sub, Klass super);
+  //
+  // Arguments:
+  //   ret  : Z_RET, returned
+  //   sub  : Z_ARG2, argument, not changed
+  //   super: Z_ARG3, argument, not changed
+  //
+  //   raddr: Z_R14, blown by call
+  //
+  address generate_partial_subtype_check() {
+    StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
+    Label miss;
+
+    address start = __ pc();
+
+    const Register Rsubklass   = Z_ARG2; // subklass
+    const Register Rsuperklass = Z_ARG3; // superklass
+
+    // No args, but tmp registers that are killed.
+    const Register Rlength     = Z_ARG4; // cache array length
+    const Register Rarray_ptr  = Z_ARG5; // Current value from cache array.
+
+    if (UseCompressedOops) {
+      assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
+    }
+
+    // Always take the slow path (see SPARC).
+    __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass,
+                                     Rarray_ptr, Rlength, NULL, &miss);
+
+    // Match falls through here.
+    __ clear_reg(Z_RET);               // Zero indicates a match. Set EQ flag in CC.
+    __ z_br(Z_R14);
+
+    __ BIND(miss);
+    __ load_const_optimized(Z_RET, 1); // One indicates a miss.
+    __ z_ltgr(Z_RET, Z_RET);           // Set NE flag in CR.
+    __ z_br(Z_R14);
+
+    return start;
+  }
+
+  // Return address of code to be called from code generated by
+  // MacroAssembler::verify_oop.
+  //
+  // Don't generate, rather use C++ code.
+  address generate_verify_oop_subroutine() {
+    // Don't generate a StubCodeMark, because no code is generated!
+    // Generating the mark triggers notifying the oprofile jvmti agent
+    // about the dynamic code generation, but the stub without
+    // code (code_size == 0) confuses opjitconv
+    // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
+
+    address start = 0;
+    return start;
+  }
+
+  // Generate pre-write barrier for array.
+  //
+  // Input:
+  //    addr  - register containing starting address
+  //    count - register containing element count
+  //
+  // The input registers are overwritten.
+  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+
+    BarrierSet* const bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCTLogging:
+        // With G1, don't generate the call if we statically know that the target in uninitialized.
+        if (!dest_uninitialized) {
+          // Is marking active?
+          Label filtered;
+          Register Rtmp1 = Z_R0;
+          const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             SATBMarkQueue::byte_offset_of_active());
+          if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+            __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
+          } else {
+            guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+            __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
+          }
+          __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
+
+          // __ push_frame_abi160(0);
+          (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count);
+          (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
+          // __ pop_frame();
+
+          __ bind(filtered);
+        }
+        break;
+      case BarrierSet::CardTableForRS:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  // Generate post-write barrier for array.
+  //
+  // Input:
+  //    addr  - register containing starting address
+  //    count - register containing element count
+  //
+  // The input registers are overwritten.
+  void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) {
+    BarrierSet* const bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCTLogging:
+        {
+          if (branchToEnd) {
+            // __ push_frame_abi160(0);
+            (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
+            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
+            (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
+            // __ pop_frame();
+          } else {
+            // Tail call: call c and return to stub caller.
+            address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
+            if (Z_ARG1 != addr) __ z_lgr(Z_ARG1, addr);
+            if (Z_ARG2 != count) __ z_lgr(Z_ARG2, count);
+            __ load_const(Z_R1, entry_point);
+            __ z_br(Z_R1); // Branch without linking, callee will return to stub caller.
+          }
+        }
+        break;
+      case BarrierSet::CardTableForRS:
+      case BarrierSet::CardTableExtension:
+        // These cases formerly known as
+        //   void array_store_check(Register addr, Register count, bool branchToEnd).
+        {
+          NearLabel doXC, done;
+          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+          assert_different_registers(Z_R0, Z_R1, addr, count);
+
+          // Nothing to do if count <= 0.
+          if (branchToEnd) {
+            __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done);
+          } else {
+            __ z_ltgr(count, count);
+            __ z_bcr(Assembler::bcondNotPositive, Z_R14);
+          }
+
+          // Note: We can't combine the shifts. We could lose a carry
+          // from calculating the array end address.
+          // count = (count-1)*BytesPerHeapOop + addr
+          // Count holds addr of last oop in array then.
+          __ z_sllg(count, count, LogBytesPerHeapOop);
+          __ add2reg_with_index(count, -BytesPerHeapOop, count, addr);
+
+          // Get base address of card table.
+          __ load_const_optimized(Z_R1, (address)ct->byte_map_base);
+
+          // count = (count>>shift) - (addr>>shift)
+          __ z_srlg(addr,  addr,  CardTableModRefBS::card_shift);
+          __ z_srlg(count, count, CardTableModRefBS::card_shift);
+
+          // Prefetch first elements of card table for update.
+          if (VM_Version::has_Prefetch()) {
+            __ z_pfd(0x02, 0, addr, Z_R1);
+          }
+
+          // Special case: clear just one byte.
+          __ clear_reg(Z_R0, true, false);  // Used for doOneByte.
+          __ z_sgr(count, addr);            // Count = n-1 now, CC used for brc below.
+          __ z_stc(Z_R0, 0, addr, Z_R1);    // Must preserve CC from z_sgr.
+          if (branchToEnd) {
+            __ z_brz(done);
+          } else {
+            __ z_bcr(Assembler::bcondZero, Z_R14);
+          }
+
+          __ z_cghi(count, 255);
+          __ z_brnh(doXC);
+
+          // MVCLE: clear a long area.
+          // Start addr of card table range = base + addr.
+          // # bytes in    card table range = (count + 1)
+          __ add2reg_with_index(Z_R0, 0, Z_R1, addr);
+          __ add2reg(Z_R1, 1, count);
+
+          // dirty hack:
+          // There are just two callers. Both pass
+          // count in Z_ARG3 = Z_R4
+          // addr  in Z_ARG2 = Z_R3
+          // ==> use Z_ARG2 as src len reg = 0
+          //         Z_ARG1 as src addr (ignored)
+          assert(count == Z_ARG3, "count: unexpected register number");
+          assert(addr  == Z_ARG2, "addr:  unexpected register number");
+          __ clear_reg(Z_ARG2, true, false);
+
+          __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0);
+
+          if (branchToEnd) {
+            __ z_bru(done);
+          } else {
+            __ z_bcr(Assembler::bcondAlways, Z_R14);
+          }
+
+          // XC: clear a short area.
+          Label XC_template; // Instr template, never exec directly!
+          __ bind(XC_template);
+          __ z_xc(0, 0, addr, 0, addr);
+
+          __ bind(doXC);
+          // start addr of card table range = base + addr
+          // end   addr of card table range = base + addr + count
+          __ add2reg_with_index(addr, 0, Z_R1, addr);
+
+          if (VM_Version::has_ExecuteExtensions()) {
+            __ z_exrl(count, XC_template);   // Execute XC with var. len.
+          } else {
+            __ z_larl(Z_R1, XC_template);
+            __ z_ex(count, 0, Z_R0, Z_R1);   // Execute XC with var. len.
+          }
+          if (!branchToEnd) {
+            __ z_br(Z_R14);
+          }
+
+          __ bind(done);
+        }
+        break;
+      case BarrierSet::ModRef:
+        if (!branchToEnd) { __ z_br(Z_R14); }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+
+  // This is to test that the count register contains a positive int value.
+  // Required because C2 does not respect int to long conversion for stub calls.
+  void assert_positive_int(Register count) {
+#ifdef ASSERT
+    __ z_srag(Z_R0, count, 31);  // Just leave the sign (must be zero) in Z_R0.
+    __ asm_assert_eq("missing zero extend", 0xAFFE);
+#endif
+  }
+
+  //  Generate overlap test for array copy stubs.
+  //  If no actual overlap is detected, control is transferred to the
+  //  "normal" copy stub (entry address passed in disjoint_copy_target).
+  //  Otherwise, execution continues with the code generated by the
+  //  caller of array_overlap_test.
+  //
+  //  Input:
+  //    Z_ARG1    - from
+  //    Z_ARG2    - to
+  //    Z_ARG3    - element count
+  void array_overlap_test(address disjoint_copy_target, int log2_elem_size) {
+    __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh,
+                                                    disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
+
+    Register index = Z_ARG3;
+    if (log2_elem_size > 0) {
+      __ z_sllg(Z_R1, Z_ARG3, log2_elem_size);  // byte count
+      index = Z_R1;
+    }
+    __ add2reg_with_index(Z_R1, 0, index, Z_ARG1);  // First byte after "from" range.
+
+    __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh,
+                                                    disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
+
+    // Destructive overlap: let caller generate code for that.
+  }
+
+  //  Generate stub for disjoint array copy. If "aligned" is true, the
+  //  "from" and "to" addresses are assumed to be heapword aligned.
+  //
+  //  Arguments for generated stub:
+  //      from:  Z_ARG1
+  //      to:    Z_ARG2
+  //      count: Z_ARG3 treated as signed
+  void generate_disjoint_copy(bool aligned, int element_size,
+                              bool branchToEnd,
+                              bool restoreArgs) {
+    // This is the zarch specific stub generator for general array copy tasks.
+    // It has the following prereqs and features:
+    //
+    // - No destructive overlap allowed (else unpredictable results).
+    // - Destructive overlap does not exist if the leftmost byte of the target
+    //   does not coincide with any of the source bytes (except the leftmost).
+    //
+    //   Register usage upon entry:
+    //      Z_ARG1 == Z_R2 :   address of source array
+    //      Z_ARG2 == Z_R3 :   address of target array
+    //      Z_ARG3 == Z_R4 :   length of operands (# of elements on entry)
+    //
+    // Register usage within the generator:
+    // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len).
+    //                 Used as pair register operand in complex moves, scratch registers anyway.
+    // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg).
+    //                  Same as R0/R1, but no scratch register.
+    // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine,
+    //                          but they might get temporarily overwritten.
+
+    Register  save_reg    = Z_ARG4;   // (= Z_R5), holds original target operand address for restore.
+
+    {
+      Register   llen_reg = Z_R1;     // Holds left operand len (odd reg).
+      Register  laddr_reg = Z_R0;     // Holds left operand addr (even reg), overlaps with data_reg.
+      Register   rlen_reg = Z_R5;     // Holds right operand len (odd reg), overlaps with save_reg.
+      Register  raddr_reg = Z_R4;     // Holds right operand addr (even reg), overlaps with len_reg.
+
+      Register   data_reg = Z_R0;     // Holds copied data chunk in alignment process and copy loop.
+      Register    len_reg = Z_ARG3;   // Holds operand len (#elements at entry, #bytes shortly after).
+      Register    dst_reg = Z_ARG2;   // Holds left (target)  operand addr.
+      Register    src_reg = Z_ARG1;   // Holds right (source) operand addr.
+
+      Label     doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate;
+      Label     doMVCUnrolled;
+      NearLabel doMVC,  doMVCgeneral, done;
+      Label     MVC_template;
+      address   pcMVCblock_b, pcMVCblock_e;
+
+      bool      usedMVCLE       = true;
+      bool      usedMVCLOOP     = true;
+      bool      usedMVCUnrolled = false;
+      bool      usedMVC         = false;
+      bool      usedMVCgeneral  = false;
+
+      int       stride;
+      Register  stride_reg;
+      Register  ix_reg;
+
+      assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2");
+      unsigned int log2_size = exact_log2(element_size);
+
+      switch (element_size) {
+        case 1:  BLOCK_COMMENT("ARRAYCOPY DISJOINT byte  {"); break;
+        case 2:  BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break;
+        case 4:  BLOCK_COMMENT("ARRAYCOPY DISJOINT int   {"); break;
+        case 8:  BLOCK_COMMENT("ARRAYCOPY DISJOINT long  {"); break;
+        default: BLOCK_COMMENT("ARRAYCOPY DISJOINT       {"); break;
+      }
+
+      assert_positive_int(len_reg);
+
+      BLOCK_COMMENT("preparation {");
+
+      // No copying if len <= 0.
+      if (branchToEnd) {
+        __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done);
+      } else {
+        if (VM_Version::has_CompareBranch()) {
+          __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14);
+        } else {
+          __ z_ltgr(len_reg, len_reg);
+          __ z_bcr(Assembler::bcondNotPositive, Z_R14);
+        }
+      }
+
+      // Prefetch just one cache line. Speculative opt for short arrays.
+      // Do not use Z_R1 in prefetch. Is undefined here.
+      if (VM_Version::has_Prefetch()) {
+        __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
+        __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
+      }
+
+      BLOCK_COMMENT("} preparation");
+
+      // Save args only if really needed.
+      // Keep len test local to branch. Is generated only once.
+
+      BLOCK_COMMENT("mode selection {");
+
+      // Special handling for arrays with only a few elements.
+      // Nothing fancy: just an executed MVC.
+      if (log2_size > 0) {
+        __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1.
+      }
+      if (element_size != 8) {
+        __ z_cghi(len_reg, 256/element_size);
+        __ z_brnh(doMVC);
+        usedMVC = true;
+      }
+      if (element_size == 8) { // Long and oop arrays are always aligned.
+        __ z_cghi(len_reg, 256/element_size);
+        __ z_brnh(doMVCUnrolled);
+        usedMVCUnrolled = true;
+      }
+
+      // Prefetch another cache line. We, for sure, have more than one line to copy.
+      if (VM_Version::has_Prefetch()) {
+        __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access.
+        __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access.
+      }
+
+      if (restoreArgs) {
+        // Remember entry value of ARG2 to restore all arguments later from that knowledge.
+        __ z_lgr(save_reg, dst_reg);
+      }
+
+      __ z_cghi(len_reg, 4096/element_size);
+      if (log2_size == 0) {
+        __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes
+      }
+      __ z_brnh(doMVCLOOP);
+
+      // Fall through to MVCLE case.
+
+      BLOCK_COMMENT("} mode selection");
+
+      // MVCLE: for long arrays
+      //   DW aligned: Best performance for sizes > 4kBytes.
+      //   unaligned:  Least complex for sizes > 256 bytes.
+      if (usedMVCLE) {
+        BLOCK_COMMENT("mode MVCLE {");
+
+        // Setup registers for mvcle.
+        //__ z_lgr(llen_reg, len_reg);// r1 <- r4  #bytes already in Z_R1, aka llen_reg.
+        __ z_lgr(laddr_reg, dst_reg); // r0 <- r3
+        __ z_lgr(raddr_reg, src_reg); // r4 <- r2
+        __ z_lgr(rlen_reg, llen_reg); // r5 <- r1
+
+        __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0);    // special: bypass cache
+        // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache.
+        // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0);
+
+        if (restoreArgs) {
+          // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs.
+          // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required.
+          // Len_reg (Z_ARG3) is destroyed and must be restored.
+          __ z_slgr(laddr_reg, dst_reg);    // copied #bytes
+          if (log2_size > 0) {
+            __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements.
+          } else {
+            __ z_lgr(Z_ARG3, laddr_reg);
+          }
+        }
+        if (branchToEnd) {
+          __ z_bru(done);
+        } else {
+          __ z_br(Z_R14);
+        }
+        BLOCK_COMMENT("} mode MVCLE");
+      }
+      // No fallthru possible here.
+
+      //  MVCUnrolled: for short, aligned arrays.
+
+      if (usedMVCUnrolled) {
+        BLOCK_COMMENT("mode MVC unrolled {");
+        stride = 8;
+
+        // Generate unrolled MVC instructions.
+        for (int ii = 32; ii > 1; ii--) {
+          __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy
+          if (branchToEnd) {
+            __ z_bru(done);
+          } else {
+            __ z_br(Z_R14);
+          }
+        }
+
+        pcMVCblock_b = __ pc();
+        __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy
+        if (branchToEnd) {
+          __ z_bru(done);
+        } else {
+          __ z_br(Z_R14);
+        }
+
+        pcMVCblock_e = __ pc();
+        Label MVC_ListEnd;
+        __ bind(MVC_ListEnd);
+
+        // This is an absolute fast path:
+        // - Array len in bytes must be not greater than 256.
+        // - Array len in bytes must be an integer mult of DW
+        //   to save expensive handling of trailing bytes.
+        // - Argument restore is not done,
+        //   i.e. previous code must not alter arguments (this code doesn't either).
+
+        __ bind(doMVCUnrolled);
+
+        // Avoid mul, prefer shift where possible.
+        // Combine shift right (for #DW) with shift left (for block size).
+        // Set CC for zero test below (asm_assert).
+        // Note: #bytes comes in Z_R1, #DW in len_reg.
+        unsigned int MVCblocksize    = pcMVCblock_e - pcMVCblock_b;
+        unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning).
+
+        if (log2_size > 0) { // Len was scaled into Z_R1.
+          switch (MVCblocksize) {
+
+            case  8: logMVCblocksize = 3;
+                     __ z_ltgr(Z_R0, Z_R1); // #bytes is index
+                     break;                 // reasonable size, use shift
+
+            case 16: logMVCblocksize = 4;
+                     __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size);
+                     break;                 // reasonable size, use shift
+
+            default: logMVCblocksize = 0;
+                     __ z_ltgr(Z_R0, len_reg); // #DW for mul
+                     break;                 // all other sizes: use mul
+          }
+        } else {
+          guarantee(log2_size, "doMVCUnrolled: only for DW entities");
+        }
+
+        // This test (and branch) is redundant. Previous code makes sure that
+        //  - element count > 0
+        //  - element size == 8.
+        // Thus, len reg should never be zero here. We insert an asm_assert() here,
+        // just to double-check and to be on the safe side.
+        __ asm_assert(false, "zero len cannot occur", 99);
+
+        __ z_larl(Z_R1, MVC_ListEnd);        // Get addr of last instr block.
+        // Avoid mul, prefer shift where possible.
+        if (logMVCblocksize == 0) {
+          __ z_mghi(Z_R0, MVCblocksize);
+        }
+        __ z_slgr(Z_R1, Z_R0);
+        __ z_br(Z_R1);
+        BLOCK_COMMENT("} mode MVC unrolled");
+      }
+      // No fallthru possible here.
+
+      // MVC execute template
+      // Must always generate. Usage may be switched on below.
+      // There is no suitable place after here to put the template.
+      __ bind(MVC_template);
+      __ z_mvc(0,0,dst_reg,0,src_reg);      // Instr template, never exec directly!
+
+
+      // MVC Loop: for medium-sized arrays
+
+      // Only for DW aligned arrays (src and dst).
+      // #bytes to copy must be at least 256!!!
+      // Non-aligned cases handled separately.
+      stride     = 256;
+      stride_reg = Z_R1;   // Holds #bytes when control arrives here.
+      ix_reg     = Z_ARG3; // Alias for len_reg.
+
+
+      if (usedMVCLOOP) {
+        BLOCK_COMMENT("mode MVC loop {");
+        __ bind(doMVCLOOP);
+
+        __ z_lcgr(ix_reg, Z_R1);         // Ix runs from -(n-2)*stride to 1*stride (inclusive).
+        __ z_llill(stride_reg, stride);
+        __ add2reg(ix_reg, 2*stride);    // Thus: increment ix by 2*stride.
+
+        __ bind(doMVCLOOPiterate);
+          __ z_mvc(0, stride-1, dst_reg, 0, src_reg);
+          __ add2reg(dst_reg, stride);
+          __ add2reg(src_reg, stride);
+          __ bind(doMVCLOOPcount);
+          __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate);
+
+        // Don 't use add2reg() here, since we must set the condition code!
+        __ z_aghi(ix_reg, -2*stride);       // Compensate incr from above: zero diff means "all copied".
+
+        if (restoreArgs) {
+          __ z_lcgr(Z_R1, ix_reg);          // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
+          __ z_brnz(doMVCgeneral);          // We're not done yet, ix_reg is not zero.
+
+          // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg.
+          __ z_slgr(dst_reg, save_reg);     // copied #bytes
+          __ z_slgr(src_reg, dst_reg);      // = ARG1 (now restored)
+          if (log2_size) {
+            __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3.
+          } else {
+            __ z_lgr(Z_ARG3, dst_reg);
+          }
+          __ z_lgr(Z_ARG2, save_reg);       // ARG2 now restored.
+
+          if (branchToEnd) {
+            __ z_bru(done);
+          } else {
+            __ z_br(Z_R14);
+          }
+
+        } else {
+            if (branchToEnd) {
+              __ z_brz(done);                        // CC set by aghi instr.
+          } else {
+              __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero.
+            }
+
+          __ z_lcgr(Z_R1, ix_reg);    // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
+          // __ z_bru(doMVCgeneral);  // fallthru
+        }
+        usedMVCgeneral = true;
+        BLOCK_COMMENT("} mode MVC loop");
+      }
+      // Fallthru to doMVCgeneral
+
+      // MVCgeneral: for short, unaligned arrays, after other copy operations
+
+      // Somewhat expensive due to use of EX instruction, but simple.
+      if (usedMVCgeneral) {
+        BLOCK_COMMENT("mode MVC general {");
+        __ bind(doMVCgeneral);
+
+        __ add2reg(len_reg, -1, Z_R1);             // Get #bytes-1 for EXECUTE.
+        if (VM_Version::has_ExecuteExtensions()) {
+          __ z_exrl(len_reg, MVC_template);        // Execute MVC with variable length.
+        } else {
+          __ z_larl(Z_R1, MVC_template);           // Get addr of instr template.
+          __ z_ex(len_reg, 0, Z_R0, Z_R1);         // Execute MVC with variable length.
+        }                                          // penalty: 9 ticks
+
+        if (restoreArgs) {
+          // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg
+          __ z_slgr(dst_reg, save_reg);            // Copied #bytes without the "doMVCgeneral" chunk
+          __ z_slgr(src_reg, dst_reg);             // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk
+          __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet.
+          if (log2_size) {
+            __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3
+          } else {
+             __ z_lgr(Z_ARG3, dst_reg);
+          }
+          __ z_lgr(Z_ARG2, save_reg);              // ARG2 now restored.
+        }
+
+        if (usedMVC) {
+          if (branchToEnd) {
+            __ z_bru(done);
+          } else {
+            __ z_br(Z_R14);
+        }
+        } else {
+          if (!branchToEnd) __ z_br(Z_R14);
+        }
+        BLOCK_COMMENT("} mode MVC general");
+      }
+      // Fallthru possible if following block not generated.
+
+      // MVC: for short, unaligned arrays
+
+      // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks.
+      // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4.
+      if (usedMVC) {
+        BLOCK_COMMENT("mode MVC {");
+        __ bind(doMVC);
+
+        // get #bytes-1 for EXECUTE
+        if (log2_size) {
+          __ add2reg(Z_R1, -1);                // Length was scaled into Z_R1.
+        } else {
+          __ add2reg(Z_R1, -1, len_reg);       // Length was not scaled.
+        }
+
+        if (VM_Version::has_ExecuteExtensions()) {
+          __ z_exrl(Z_R1, MVC_template);       // Execute MVC with variable length.
+        } else {
+          __ z_lgr(Z_R0, Z_R5);                // Save ARG4, may be unnecessary.
+          __ z_larl(Z_R5, MVC_template);       // Get addr of instr template.
+          __ z_ex(Z_R1, 0, Z_R0, Z_R5);        // Execute MVC with variable length.
+          __ z_lgr(Z_R5, Z_R0);                // Restore ARG4, may be unnecessary.
+        }
+
+        if (!branchToEnd) {
+          __ z_br(Z_R14);
+        }
+        BLOCK_COMMENT("} mode MVC");
+      }
+
+      __ bind(done);
+
+      switch (element_size) {
+        case 1:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break;
+        case 2:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break;
+        case 4:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT int  "); break;
+        case 8:  BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break;
+        default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT      "); break;
+      }
+    }
+  }
+
+  // Generate stub for conjoint array copy. If "aligned" is true, the
+  // "from" and "to" addresses are assumed to be heapword aligned.
+  //
+  // Arguments for generated stub:
+  //   from:  Z_ARG1
+  //   to:    Z_ARG2
+  //   count: Z_ARG3 treated as signed
+  void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) {
+
+    // This is the zarch specific stub generator for general array copy tasks.
+    // It has the following prereqs and features:
+    //
+    // - Destructive overlap exists and is handled by reverse copy.
+    // - Destructive overlap exists if the leftmost byte of the target
+    //   does coincide with any of the source bytes (except the leftmost).
+    // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride)
+    // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine.
+    // - Z_ARG3 is USED but preserved by the stub routine.
+    // - Z_ARG4 is used as index register and is thus KILLed.
+    //
+    {
+      Register stride_reg = Z_R1;     // Stride & compare value in loop (negative element_size).
+      Register   data_reg = Z_R0;     // Holds value of currently processed element.
+      Register     ix_reg = Z_ARG4;   // Holds byte index of currently processed element.
+      Register    len_reg = Z_ARG3;   // Holds length (in #elements) of arrays.
+      Register    dst_reg = Z_ARG2;   // Holds left  operand addr.
+      Register    src_reg = Z_ARG1;   // Holds right operand addr.
+
+      assert(256%element_size == 0, "Element size must be power of 2.");
+      assert(element_size     <= 8, "Can't handle more than DW units.");
+
+      switch (element_size) {
+        case 1:  BLOCK_COMMENT("ARRAYCOPY CONJOINT byte  {"); break;
+        case 2:  BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break;
+        case 4:  BLOCK_COMMENT("ARRAYCOPY CONJOINT int   {"); break;
+        case 8:  BLOCK_COMMENT("ARRAYCOPY CONJOINT long  {"); break;
+        default: BLOCK_COMMENT("ARRAYCOPY CONJOINT       {"); break;
+      }
+
+      assert_positive_int(len_reg);
+
+      if (VM_Version::has_Prefetch()) {
+        __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
+        __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
+      }
+
+      unsigned int log2_size = exact_log2(element_size);
+      if (log2_size) {
+        __ z_sllg(ix_reg, len_reg, log2_size);
+      } else {
+        __ z_lgr(ix_reg, len_reg);
+      }
+
+      // Optimize reverse copy loop.
+      // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks.
+      // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic.
+      // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length.
+
+      Label countLoop1;
+      Label copyLoop1;
+      Label skipBY;
+      Label skipHW;
+      int   stride = -8;
+
+      __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop.
+
+      if (element_size == 8)    // Nothing to do here.
+        __ z_bru(countLoop1);
+      else {                    // Do not generate dead code.
+        __ z_tmll(ix_reg, 7);   // Check the "odd" bits.
+        __ z_bre(countLoop1);   // There are none, very good!
+      }
+
+      if (log2_size == 0) {     // Handle leftover Byte.
+        __ z_tmll(ix_reg, 1);
+        __ z_bre(skipBY);
+        __ z_lb(data_reg,   -1, ix_reg, src_reg);
+        __ z_stcy(data_reg, -1, ix_reg, dst_reg);
+        __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI.
+        __ bind(skipBY);
+        // fallthru
+      }
+      if (log2_size <= 1) {     // Handle leftover HW.
+        __ z_tmll(ix_reg, 2);
+        __ z_bre(skipHW);
+        __ z_lhy(data_reg,  -2, ix_reg, src_reg);
+        __ z_sthy(data_reg, -2, ix_reg, dst_reg);
+        __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI.
+        __ bind(skipHW);
+        __ z_tmll(ix_reg, 4);
+        __ z_bre(countLoop1);
+        // fallthru
+      }
+      if (log2_size <= 2) {     // There are just 4 bytes (left) that need to be copied.
+        __ z_ly(data_reg,  -4, ix_reg, src_reg);
+        __ z_sty(data_reg, -4, ix_reg, dst_reg);
+        __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI.
+        __ z_bru(countLoop1);
+      }
+
+      // Control can never get to here. Never! Never ever!
+      __ z_illtrap(0x99);
+      __ bind(copyLoop1);
+      __ z_lg(data_reg,  0, ix_reg, src_reg);
+      __ z_stg(data_reg, 0, ix_reg, dst_reg);
+      __ bind(countLoop1);
+      __ z_brxhg(ix_reg, stride_reg, copyLoop1);
+
+      if (!branchToEnd)
+        __ z_br(Z_R14);
+
+      switch (element_size) {
+        case 1:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break;
+        case 2:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break;
+        case 4:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT int  "); break;
+        case 8:  BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break;
+        default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT      "); break;
+      }
+    }
+  }
+
+  // Generate stub for disjoint byte copy. If "aligned" is true, the
+  // "from" and "to" addresses are assumed to be heapword aligned.
+  address generate_disjoint_byte_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // This is the zarch specific stub generator for byte array copy.
+    // Refer to generate_disjoint_copy for a list of prereqs and features:
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    generate_disjoint_copy(aligned, 1, false, false);
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_disjoint_short_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for short array copy.
+    // Refer to generate_disjoint_copy for a list of prereqs and features:
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    generate_disjoint_copy(aligned, 2, false, false);
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_disjoint_int_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for int array copy.
+    // Refer to generate_disjoint_copy for a list of prereqs and features:
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    generate_disjoint_copy(aligned, 4, false, false);
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_disjoint_long_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for long array copy.
+    // Refer to generate_disjoint_copy for a list of prereqs and features:
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    generate_disjoint_copy(aligned, 8, false, false);
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for oop array copy.
+    // Refer to generate_disjoint_copy for a list of prereqs and features.
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    unsigned int size      = UseCompressedOops ? 4 : 8;
+
+    gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
+
+    generate_disjoint_copy(aligned, size, true, true);
+
+    gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
+
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_conjoint_byte_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for overlapping byte array copy.
+    // Refer to generate_conjoint_copy for a list of prereqs and features:
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+    address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy()
+                                       : StubRoutines::jbyte_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint.
+    generate_conjoint_copy(aligned, 1, false);
+
+    return __ addr_at(start_off);
+  }
+
+
+  address generate_conjoint_short_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for overlapping short array copy.
+    // Refer to generate_conjoint_copy for a list of prereqs and features:
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+    address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy()
+                                       : StubRoutines::jshort_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint.
+    generate_conjoint_copy(aligned, 2, false);
+
+    return __ addr_at(start_off);
+  }
+
+  address generate_conjoint_int_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for overlapping int array copy.
+    // Refer to generate_conjoint_copy for a list of prereqs and features:
+
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+    address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy()
+                                       : StubRoutines::jint_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint.
+    generate_conjoint_copy(aligned, 4, false);
+
+    return __ addr_at(start_off);
+  }
+
+  address generate_conjoint_long_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for overlapping long array copy.
+    // Refer to generate_conjoint_copy for a list of prereqs and features:
+
+    unsigned int start_off   = __ offset();  // Remember stub start address (is rtn value).
+    address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy()
+                                       : StubRoutines::jlong_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint.
+    generate_conjoint_copy(aligned, 8, false);
+
+    return __ addr_at(start_off);
+  }
+
+  address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    // This is the zarch specific stub generator for overlapping oop array copy.
+    // Refer to generate_conjoint_copy for a list of prereqs and features.
+    unsigned int start_off = __ offset();  // Remember stub start address (is rtn value).
+    unsigned int size      = UseCompressedOops ? 4 : 8;
+    unsigned int shift     = UseCompressedOops ? 2 : 3;
+
+    address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized)
+                                       : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized);
+
+    // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier.
+    array_overlap_test(nooverlap_target, shift);  // Branch away to nooverlap_target if disjoint.
+
+    gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
+
+    generate_conjoint_copy(aligned, size, true);  // Must preserve ARG2, ARG3.
+
+    gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
+
+    return __ addr_at(start_off);
+  }
+
+
+  void generate_arraycopy_stubs() {
+
+    // Note: the disjoint stubs must be generated first, some of
+    // the conjoint stubs use them.
+    StubRoutines::_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy");
+    StubRoutines::_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
+    StubRoutines::_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (false, "jint_disjoint_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy");
+    StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy", false);
+    StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (false, "oop_disjoint_arraycopy_uninit", true);
+
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy");
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
+    StubRoutines::_arrayof_jint_disjoint_arraycopy       = generate_disjoint_int_copy  (true, "arrayof_jint_disjoint_arraycopy");
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy");
+    StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy", false);
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy  (true, "arrayof_oop_disjoint_arraycopy_uninit", true);
+
+    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy (false, "jbyte_arraycopy");
+    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
+    StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy  (false, "jint_arraycopy");
+    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_copy (false, "jlong_arraycopy");
+    StubRoutines::_oop_arraycopy             = generate_conjoint_oop_copy  (false, "oop_arraycopy", false);
+    StubRoutines::_oop_arraycopy_uninit      = generate_conjoint_oop_copy  (false, "oop_arraycopy_uninit", true);
+
+    StubRoutines::_arrayof_jbyte_arraycopy      = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy");
+    StubRoutines::_arrayof_jshort_arraycopy     = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
+    StubRoutines::_arrayof_jint_arraycopy       = generate_conjoint_int_copy  (true, "arrayof_jint_arraycopy");
+    StubRoutines::_arrayof_jlong_arraycopy      = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy");
+    StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy", false);
+    StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy  (true, "arrayof_oop_arraycopy_uninit", true);
+  }
+
+  void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
+
+    // safefetch signatures:
+    //   int      SafeFetch32(int*      adr, int      errValue);
+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+    //
+    // arguments:
+    //   Z_ARG1 = adr
+    //   Z_ARG2 = errValue
+    //
+    // result:
+    //   Z_RET  = *adr or errValue
+
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // entry point
+    // Load *adr into Z_ARG2, may fault.
+    *entry = *fault_pc = __ pc();
+    switch (size) {
+      case 4:
+        // Sign extended int32_t.
+        __ z_lgf(Z_ARG2, 0, Z_ARG1);
+        break;
+      case 8:
+        // int64_t
+        __ z_lg(Z_ARG2, 0, Z_ARG1);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // Return errValue or *adr.
+    *continuation_pc = __ pc();
+    __ z_lgr(Z_RET, Z_ARG2);
+    __ z_br(Z_R14);
+
+  }
+
+  // Call interface for AES_encryptBlock, AES_decryptBlock stubs.
+  //
+  //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
+  //   Z_ARG2 - destination data block. Ptr to leftmost byte to be stored.
+  //            For in-place encryption/decryption, ARG1 and ARG2 can point
+  //            to the same piece of storage.
+  //   Z_ARG3 - Crypto key address (expanded key). The first n bits of
+  //            the expanded key constitute the original AES-<n> key (see below).
+  //
+  //   Z_RET  - return value. First unprocessed byte offset in src buffer.
+  //
+  // Some remarks:
+  //   The crypto key, as passed from the caller to these encryption stubs,
+  //   is a so-called expanded key. It is derived from the original key
+  //   by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule
+  //   With the expanded key, the cipher/decipher task is decomposed in
+  //   multiple, less complex steps, called rounds. Sun SPARC and Intel
+  //   processors obviously implement support for those less complex steps.
+  //   z/Architecture provides instructions for full cipher/decipher complexity.
+  //   Therefore, we need the original, not the expanded key here.
+  //   Luckily, the first n bits of an AES-<n> expanded key are formed
+  //   by the original key itself. That takes us out of trouble. :-)
+  //   The key length (in bytes) relation is as follows:
+  //     original    expanded   rounds  key bit     keylen
+  //    key bytes   key bytes            length   in words
+  //           16         176       11      128         44
+  //           24         208       13      192         52
+  //           32         240       15      256         60
+  //
+  // The crypto instructions used in the AES* stubs have some specific register requirements.
+  //   Z_R0   holds the crypto function code. Please refer to the KM/KMC instruction
+  //          description in the "z/Architecture Principles of Operation" manual for details.
+  //   Z_R1   holds the parameter block address. The parameter block contains the cryptographic key
+  //          (KM instruction) and the chaining value (KMC instruction).
+  //   dst    must designate an even-numbered register, holding the address of the output message.
+  //   src    must designate an even/odd register pair, holding the address/length of the original message
+
+  // Helper function which generates code to
+  //  - load the function code in register fCode (== Z_R0)
+  //  - load the data block length (depends on cipher function) in register srclen if requested.
+  //  - is_decipher switches between cipher/decipher function codes
+  //  - set_len requests (if true) loading the data block length in register srclen
+  void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) {
+
+    BLOCK_COMMENT("Set fCode {"); {
+      Label fCode_set;
+      int   mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
+      bool  identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk)
+                                  && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
+      // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256.
+      __ z_cghi(keylen, 52);
+      __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
+      if (!identical_dataBlk_len) {
+        __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk);
+      }
+      __ z_brh(fCode_set);  // keyLen >  52: AES256
+
+      __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode);
+      if (!identical_dataBlk_len) {
+        __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk);
+      }
+      __ z_bre(fCode_set);  // keyLen == 52: AES192
+
+      __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
+      if (!identical_dataBlk_len) {
+        __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
+      }
+      // __ z_brl(fCode_set);  // keyLen <  52: AES128           // fallthru
+      __ bind(fCode_set);
+      if (identical_dataBlk_len) {
+        __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
+      }
+    }
+    BLOCK_COMMENT("} Set fCode");
+  }
+
+  // Push a parameter block for the cipher/decipher instruction on the stack.
+  // NOTE:
+  //   Before returning, the stub has to copy the chaining value from
+  //   the parmBlk, where it was updated by the crypto instruction, back
+  //   to the chaining value array the address of which was passed in the cv argument.
+  //   As all the available registers are used and modified by KMC, we need to save
+  //   the key length across the KMC instruction. We do so by spilling it to the stack,
+  //   just preceding the parmBlk (at (parmBlk - 8)).
+  void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) {
+    const int AES_parmBlk_align    = 32;
+    const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align.
+    int       cv_len, key_len;
+    int       mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
+    Label     parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
+
+    BLOCK_COMMENT("push parmBlk {");
+    if (VM_Version::has_Crypto_AES()   ) { __ z_cghi(keylen, 52); }
+    if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); }  // keyLen >  52: AES256
+    if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); }  // keyLen == 52: AES192
+    if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); }  // keyLen <  52: AES128
+
+    // Security net: requested AES function not available on this CPU.
+    // NOTE:
+    //   As of now (March 2015), this safety net is not required. JCE policy files limit the
+    //   cryptographic strength of the keys used to 128 bit. If we have AES hardware support
+    //   at all, we have at least AES-128.
+    __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0);
+
+    if (VM_Version::has_Crypto_AES128()) {
+      __ bind(parmBlk_128);
+      cv_len  = VM_Version::Cipher::_AES128_dataBlk;
+      key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len;
+      __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+      __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // align parameter block
+
+      // Resize the frame to accommodate for the aligned parameter block and other stuff.
+      // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+      __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
+      __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
+      __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
+      __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+      __ z_aghi(parmBlk,  AES_parmBlk_addspace);       // Restore parameter block address.
+
+      __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
+      __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
+      __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
+      if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
+        __ z_bru(parmBlk_set);  // Fallthru otherwise.
+      }
+    }
+
+    if (VM_Version::has_Crypto_AES192()) {
+      __ bind(parmBlk_192);
+      cv_len  = VM_Version::Cipher::_AES192_dataBlk;
+      key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len;
+      __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+      __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // Align parameter block.
+
+      // Resize the frame to accommodate for the aligned parameter block and other stuff.
+      // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+      __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
+      __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
+      __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
+      __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+      __ z_aghi(parmBlk, AES_parmBlk_addspace);        // Restore parameter block address.
+
+      __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
+      __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
+      __ z_lghi(fCode,    VM_Version::Cipher::_AES192 + mode);
+      if (VM_Version::has_Crypto_AES256()) {
+        __ z_bru(parmBlk_set);  // Fallthru otherwise.
+      }
+    }
+
+    if (VM_Version::has_Crypto_AES256()) {
+      __ bind(parmBlk_256);
+      cv_len  = VM_Version::Cipher::_AES256_dataBlk;
+      key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len;
+      __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+      __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff);  // Align parameter block.
+
+      // Resize the frame to accommodate for the aligned parameter block and other stuff.
+      // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+      __ z_stg(keylen, -8, parmBlk);                   // Spill keylen for later use.
+      __ z_stg(Z_SP,  -16, parmBlk);                   // Spill SP for easy revert.
+      __ z_aghi(parmBlk, -AES_parmBlk_addspace);       // Additional space for keylen, etc..
+      __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+      __ z_aghi(parmBlk,  AES_parmBlk_addspace);       // Restore parameter block address.
+
+      __ z_mvc(0,      cv_len-1,  parmBlk, 0, cv);     // Copy cv.
+      __ z_mvc(cv_len, key_len-1, parmBlk, 0, key);    // Copy key.
+      __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
+      // __ z_bru(parmBlk_set);  // fallthru
+    }
+
+    __ bind(parmBlk_set);
+    BLOCK_COMMENT("} push parmBlk");
+  }
+
+  // Pop a parameter block from the stack. The chaining value portion of the parameter block
+  // is copied back to the cv array as it is needed for subsequent cipher steps.
+  // The keylen value as well as the original SP (before resizing) was pushed to the stack
+  // when pushing the parameter block.
+  void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) {
+
+    BLOCK_COMMENT("pop parmBlk {");
+    bool identical_dataBlk_len =  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) &&
+                                  (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
+    if (identical_dataBlk_len) {
+      int cv_len = VM_Version::Cipher::_AES128_dataBlk;
+      __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
+    } else {
+      int cv_len;
+      Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
+      __ z_lg(keylen, -8, parmBlk);  // restore keylen
+      __ z_cghi(keylen, 52);
+      if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256);  // keyLen >  52: AES256
+      if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192);  // keyLen == 52: AES192
+      // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128);  // keyLen <  52: AES128  // fallthru
+
+      // Security net: there is no one here. If we would need it, we should have
+      // fallen into it already when pushing the parameter block.
+      if (VM_Version::has_Crypto_AES128()) {
+        __ bind(parmBlk_128);
+        cv_len = VM_Version::Cipher::_AES128_dataBlk;
+        __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
+        if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
+          __ z_bru(parmBlk_set);
+        }
+      }
+
+      if (VM_Version::has_Crypto_AES192()) {
+        __ bind(parmBlk_192);
+        cv_len = VM_Version::Cipher::_AES192_dataBlk;
+        __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
+        if (VM_Version::has_Crypto_AES256()) {
+          __ z_bru(parmBlk_set);
+        }
+      }
+
+      if (VM_Version::has_Crypto_AES256()) {
+        __ bind(parmBlk_256);
+        cv_len = VM_Version::Cipher::_AES256_dataBlk;
+        __ z_mvc(0, cv_len-1, cv, 0, parmBlk);  // Copy cv.
+        // __ z_bru(parmBlk_set);  // fallthru
+      }
+      __ bind(parmBlk_set);
+    }
+    __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute.
+    BLOCK_COMMENT("} pop parmBlk");
+  }
+
+  // Compute AES encrypt function.
+  address generate_AES_encryptBlock(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+
+    Register       from    = Z_ARG1; // source byte array
+    Register       to      = Z_ARG2; // destination byte array
+    Register       key     = Z_ARG3; // expanded key array
+
+    const Register keylen  = Z_R0;   // Temporarily (until fCode is set) holds the expanded key array length.
+    const Register fCode   = Z_R0;   // crypto function code
+    const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
+    const Register src     = Z_ARG1; // is Z_R2
+    const Register srclen  = Z_ARG2; // Overwrites destination address.
+    const Register dst     = Z_ARG3; // Overwrites expanded key address.
+
+    // Read key len of expanded key (in 4-byte words).
+    __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Copy arguments to registers as required by crypto instruction.
+    __ z_lgr(parmBlk, key);          // crypto key (in T_INT array).
+    // __ z_lgr(src, from);          // Copy not needed, src/from are identical.
+    __ z_lgr(dst, to);               // Copy destination address to even register.
+
+    // Construct function code in Z_R0, data block length in Z_ARG2.
+    generate_load_AES_fCode(keylen, fCode, srclen, false);
+
+    __ km(dst, src);          // Cipher the message.
+
+    __ z_br(Z_R14);
+
+    return __ addr_at(start_off);
+  }
+
+  // Compute AES decrypt function.
+  address generate_AES_decryptBlock(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+
+    Register       from    = Z_ARG1; // source byte array
+    Register       to      = Z_ARG2; // destination byte array
+    Register       key     = Z_ARG3; // expanded key array, not preset at entry!!!
+
+    const Register keylen  = Z_R0;   // Temporarily (until fCode is set) holds the expanded key array length.
+    const Register fCode   = Z_R0;   // crypto function code
+    const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
+    const Register src     = Z_ARG1; // is Z_R2
+    const Register srclen  = Z_ARG2; // Overwrites destination address.
+    const Register dst     = Z_ARG3; // Overwrites key address.
+
+    // Read key len of expanded key (in 4-byte words).
+    __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Copy arguments to registers as required by crypto instruction.
+    __ z_lgr(parmBlk, key);     // Copy crypto key address.
+    // __ z_lgr(src, from);     // Copy not needed, src/from are identical.
+    __ z_lgr(dst, to);          // Copy destination address to even register.
+
+    // Construct function code in Z_R0, data block length in Z_ARG2.
+    generate_load_AES_fCode(keylen, fCode, srclen, true);
+
+    __ km(dst, src);          // Cipher the message.
+
+    __ z_br(Z_R14);
+
+    return __ addr_at(start_off);
+  }
+
+  // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate
+  // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires
+  // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some
+  // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing.
+  // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller.
+  // *** WARNING ***
+  // Please note that we do not formally allocate stack space, nor do we
+  // update the stack pointer. Therefore, no function calls are allowed
+  // and nobody else must use the stack range where the parameter block
+  // is located.
+  // We align the parameter block to the next available octoword.
+  //
+  // Compute chained AES encrypt function.
+  address generate_cipherBlockChaining_AES_encrypt(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+
+    Register       from    = Z_ARG1; // source byte array (clear text)
+    Register       to      = Z_ARG2; // destination byte array (ciphered)
+    Register       key     = Z_ARG3; // expanded key array.
+    Register       cv      = Z_ARG4; // chaining value
+    const Register msglen  = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
+                                     // in Z_RET upon completion of this stub. Is 32-bit integer.
+
+    const Register keylen  = Z_R0;   // Expanded key length, as read from key array. Temp only.
+    const Register fCode   = Z_R0;   // crypto function code
+    const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
+    const Register src     = Z_ARG1; // is Z_R2
+    const Register srclen  = Z_ARG2; // Overwrites destination address.
+    const Register dst     = Z_ARG3; // Overwrites key address.
+
+    // Read key len of expanded key (in 4-byte words).
+    __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
+    // Construct function code in Z_R0.
+    generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false);
+
+    // Prepare other registers for instruction.
+    // __ z_lgr(src, from);     // Not needed, registers are the same.
+    __ z_lgr(dst, to);
+    __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required.
+
+    __ kmc(dst, src);           // Cipher the message.
+
+    generate_pop_parmBlk(keylen, parmBlk, key, cv);
+
+    __ z_llgfr(Z_RET, msglen);  // We pass the offsets as ints, not as longs as required.
+    __ z_br(Z_R14);
+
+    return __ addr_at(start_off);
+  }
+
+  // Compute chained AES encrypt function.
+  address generate_cipherBlockChaining_AES_decrypt(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+
+    Register       from    = Z_ARG1; // source byte array (ciphered)
+    Register       to      = Z_ARG2; // destination byte array (clear text)
+    Register       key     = Z_ARG3; // expanded key array, not preset at entry!!!
+    Register       cv      = Z_ARG4; // chaining value
+    const Register msglen  = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
+                                     // in Z_RET upon completion of this stub.
+
+    const Register keylen  = Z_R0;   // Expanded key length, as read from key array. Temp only.
+    const Register fCode   = Z_R0;   // crypto function code
+    const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
+    const Register src     = Z_ARG1; // is Z_R2
+    const Register srclen  = Z_ARG2; // Overwrites destination address.
+    const Register dst     = Z_ARG3; // Overwrites key address.
+
+    // Read key len of expanded key (in 4-byte words).
+    __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
+    // Construct function code in Z_R0.
+    generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true);
+
+    // Prepare other registers for instruction.
+    // __ z_lgr(src, from);  // Not needed, registers are the same.
+    __ z_lgr(dst, to);
+    __ z_lgr(srclen, msglen);
+
+    __ kmc(dst, src);          // Decipher the message.
+
+    generate_pop_parmBlk(keylen, parmBlk, key, cv);
+
+    __ z_lgr(Z_RET, msglen);
+    __ z_br(Z_R14);
+
+    return __ addr_at(start_off);
+  }
+
+
+  // Call interface for all SHA* stubs.
+  //
+  //   Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
+  //   Z_ARG2 - current SHA state. Ptr to state area. This area serves as
+  //            parameter block as required by the crypto instruction.
+  //   Z_ARG3 - current byte offset in source data block.
+  //   Z_ARG4 - last byte offset in source data block.
+  //            (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed.
+  //
+  //   Z_RET  - return value. First unprocessed byte offset in src buffer.
+  //
+  //   A few notes on the call interface:
+  //    - All stubs, whether they are single-block or multi-block, are assumed to
+  //      digest an integer multiple of the data block length of data. All data
+  //      blocks are digested using the intermediate message digest (KIMD) instruction.
+  //      Special end processing, as done by the KLMD instruction, seems to be
+  //      emulated by the calling code.
+  //
+  //    - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is
+  //      already accounted for.
+  //
+  //    - The current SHA state (the intermediate message digest value) is contained
+  //      in an area addressed by Z_ARG2. The area size depends on the SHA variant
+  //      and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I
+  //
+  //    - The single-block stub is expected to digest exactly one data block, starting
+  //      at the address passed in Z_ARG1.
+  //
+  //    - The multi-block stub is expected to digest all data blocks which start in
+  //      the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference
+  //      (srcLimit-srcOff), rounded up to the next multiple of the data block length,
+  //      gives the number of blocks to digest. It must be assumed that the calling code
+  //      provides for a large enough source data buffer.
+  //
+  // Compute SHA-1 function.
+  address generate_SHA1_stub(bool multiBlock, const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
+
+    const Register srcBuff        = Z_ARG1; // Points to first block to process (offset already added).
+    const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs.
+    const Register srcOff         = Z_ARG3; // int
+    const Register srcLimit       = Z_ARG4; // Only passed in multiBlock case. int
+
+    const Register SHAState_local = Z_R1;
+    const Register SHAState_save  = Z_ARG3;
+    const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
+    Label useKLMD, rtn;
+
+    __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1);   // function code
+    __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
+
+    if (multiBlock) {  // Process everything from offset to limit.
+
+      // The following description is valid if we get a raw (unpimped) source data buffer,
+      // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+      // the calling convention for these stubs is different. We leave the description in
+      // to inform the reader what must be happening hidden in the calling code.
+      //
+      // The data block to be processed can have arbitrary length, i.e. its length does not
+      // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+      // two different paths. If the length is an integer multiple, we use KIMD, saving us
+      // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+      // to the stack, execute a KLMD instruction on it and copy the result back to the
+      // caller's SHA state location.
+
+      // Total #srcBuff blocks to process.
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
+        __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
+        __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
+        __ z_llgfr(srcBufLen, srcBufLen);      // Cast to 64-bit.
+      } else {
+        __ z_lgfr(srcBufLen, srcLimit);        // Exact difference. srcLimit passed as int.
+        __ z_sgfr(srcBufLen, srcOff);          // SrcOff passed as int, now properly casted to long.
+        __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);   // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
+        __ z_lgr(srcLimit, srcOff);            // SrcLimit temporarily holds return value.
+        __ z_agr(srcLimit, srcBufLen);
+      }
+
+      // Integral #blocks to digest?
+      // As a result of the calculations above, srcBufLen MUST be an integer
+      // multiple of _SHA1_dataBlk, or else we are in big trouble.
+      // We insert an asm_assert into the KLMD case to guard against that.
+      __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);
+      __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+      // Process all full blocks.
+      __ kimd(srcBuff);
+
+      __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
+    } else {  // Process one data block only.
+      __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk);   // #srcBuff bytes to process
+      __ kimd(srcBuff);
+      __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff);            // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed.
+    }
+
+    __ bind(rtn);
+    __ z_br(Z_R14);
+
+    if (multiBlock) {
+      __ bind(useKLMD);
+
+#if 1
+      // Security net: this stub is believed to be called for full-sized data blocks only
+      // NOTE: The following code is believed to be correct, but is is not tested.
+      __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+    }
+
+    return __ addr_at(start_off);
+  }
+
+  // Compute SHA-256 function.
+  address generate_SHA256_stub(bool multiBlock, const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
+
+    const Register srcBuff        = Z_ARG1;
+    const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
+    const Register SHAState_local = Z_R1;
+    const Register SHAState_save  = Z_ARG3;
+    const Register srcOff         = Z_ARG3;
+    const Register srcLimit       = Z_ARG4;
+    const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
+    Label useKLMD, rtn;
+
+    __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code
+    __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
+
+    if (multiBlock) {  // Process everything from offset to limit.
+      // The following description is valid if we get a raw (unpimped) source data buffer,
+      // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+      // the calling convention for these stubs is different. We leave the description in
+      // to inform the reader what must be happening hidden in the calling code.
+      //
+      // The data block to be processed can have arbitrary length, i.e. its length does not
+      // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+      // two different paths. If the length is an integer multiple, we use KIMD, saving us
+      // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+      // to the stack, execute a KLMD instruction on it and copy the result back to the
+      // caller's SHA state location.
+
+      // total #srcBuff blocks to process
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
+        __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
+        __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
+        __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
+      } else {
+        __ z_lgfr(srcBufLen, srcLimit);          // exact difference
+        __ z_sgfr(srcBufLen, srcOff);
+        __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
+        __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
+        __ z_agr(srcLimit, srcBufLen);
+      }
+
+      // Integral #blocks to digest?
+      // As a result of the calculations above, srcBufLen MUST be an integer
+      // multiple of _SHA1_dataBlk, or else we are in big trouble.
+      // We insert an asm_assert into the KLMD case to guard against that.
+      __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1);
+      __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+      // Process all full blocks.
+      __ kimd(srcBuff);
+
+      __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
+    } else {  // Process one data block only.
+      __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process
+      __ kimd(srcBuff);
+      __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
+    }
+
+    __ bind(rtn);
+    __ z_br(Z_R14);
+
+    if (multiBlock) {
+      __ bind(useKLMD);
+#if 1
+      // Security net: this stub is believed to be called for full-sized data blocks only.
+      // NOTE:
+      //   The following code is believed to be correct, but is is not tested.
+      __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+    }
+
+    return __ addr_at(start_off);
+  }
+
+  // Compute SHA-512 function.
+  address generate_SHA512_stub(bool multiBlock, const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int start_off = __ offset();   // Remember stub start address (is rtn value).
+
+    const Register srcBuff        = Z_ARG1;
+    const Register SHAState       = Z_ARG2; // Only on entry. Reused soon thereafter.
+    const Register SHAState_local = Z_R1;
+    const Register SHAState_save  = Z_ARG3;
+    const Register srcOff         = Z_ARG3;
+    const Register srcLimit       = Z_ARG4;
+    const Register srcBufLen      = Z_ARG2; // Destroys state address, must be copied before.
+    Label useKLMD, rtn;
+
+    __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code
+    __ z_lgr(SHAState_local, SHAState);                                 // SHAState == parameter block
+
+    if (multiBlock) {  // Process everything from offset to limit.
+      // The following description is valid if we get a raw (unpimped) source data buffer,
+      // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+      // the calling convention for these stubs is different. We leave the description in
+      // to inform the reader what must be happening hidden in the calling code.
+      //
+      // The data block to be processed can have arbitrary length, i.e. its length does not
+      // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+      // two different paths. If the length is an integer multiple, we use KIMD, saving us
+      // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+      // to the stack, execute a KLMD instruction on it and copy the result back to the
+      // caller's SHA state location.
+
+      // total #srcBuff blocks to process
+      if (VM_Version::has_DistinctOpnds()) {
+        __ z_srk(srcBufLen, srcLimit, srcOff);   // exact difference
+        __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
+        __ z_ark(srcLimit, srcOff, srcBufLen);   // Srclimit temporarily holds return value.
+        __ z_llgfr(srcBufLen, srcBufLen);        // Cast to 64-bit.
+      } else {
+        __ z_lgfr(srcBufLen, srcLimit);          // exact difference
+        __ z_sgfr(srcBufLen, srcOff);
+        __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
+        __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
+        __ z_lgr(srcLimit, srcOff);              // Srclimit temporarily holds return value.
+        __ z_agr(srcLimit, srcBufLen);
+      }
+
+      // integral #blocks to digest?
+      // As a result of the calculations above, srcBufLen MUST be an integer
+      // multiple of _SHA1_dataBlk, or else we are in big trouble.
+      // We insert an asm_assert into the KLMD case to guard against that.
+      __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1);
+      __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+      // Process all full blocks.
+      __ kimd(srcBuff);
+
+      __ z_lgr(Z_RET, srcLimit);  // Offset of first unprocessed byte in buffer.
+    } else {  // Process one data block only.
+      __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process
+      __ kimd(srcBuff);
+      __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff);          // Offset of first unprocessed byte in buffer.
+    }
+
+    __ bind(rtn);
+    __ z_br(Z_R14);
+
+    if (multiBlock) {
+      __ bind(useKLMD);
+#if 1
+      // Security net: this stub is believed to be called for full-sized data blocks only
+      // NOTE:
+      //   The following code is believed to be correct, but is is not tested.
+      __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+    }
+
+    return __ addr_at(start_off);
+  }
+
+
+
+  // Arguments:
+  //   Z_ARG1  - int   crc
+  //   Z_ARG2  - byte* buf
+  //   Z_ARG3  - int   length (of buffer)
+  //
+  // Result:
+  //   Z_RET   - int   crc result
+  //
+  // Compute CRC32 function.
+  address generate_CRC32_updateBytes(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    unsigned int   start_off = __ offset();  // Remember stub start address (is rtn value).
+
+    // arguments to kernel_crc32:
+    Register       crc     = Z_ARG1;  // Current checksum, preset by caller or result from previous call, int.
+    Register       data    = Z_ARG2;  // source byte array
+    Register       dataLen = Z_ARG3;  // #bytes to process, int
+    Register       table   = Z_ARG4;  // crc table address
+    const Register t0      = Z_R10;   // work reg for kernel* emitters
+    const Register t1      = Z_R11;   // work reg for kernel* emitters
+    const Register t2      = Z_R12;   // work reg for kernel* emitters
+    const Register t3      = Z_R13;   // work reg for kernel* emitters
+
+    assert_different_registers(crc, data, dataLen, table);
+
+    // We pass these values as ints, not as longs as required by C calling convention.
+    // Crc used as int.
+    __ z_llgfr(dataLen, dataLen);
+
+    StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+
+    __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+    __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP);  // Spill regs 10..11 to make them available as work registers.
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3);
+    __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP);   // Spill regs 10..11 back from stack.
+    __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+
+    __ z_llgfr(Z_RET, crc);  // Updated crc is function result. No copying required, just zero upper 32 bits.
+    __ z_br(Z_R14);          // Result already in Z_RET == Z_ARG1.
+
+    return __ addr_at(start_off);
+  }
+
+
+  // Arguments:
+  //   Z_ARG1    - x address
+  //   Z_ARG2    - x length
+  //   Z_ARG3    - y address
+  //   Z_ARG4    - y length
+  //   Z_ARG5    - z address
+  //   160[Z_SP] - z length
+  address generate_multiplyToLen() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+
+    address start = __ pc();
+
+    const Register x    = Z_ARG1;
+    const Register xlen = Z_ARG2;
+    const Register y    = Z_ARG3;
+    const Register ylen = Z_ARG4;
+    const Register z    = Z_ARG5;
+    // zlen is passed on the stack:
+    // Address zlen(Z_SP, _z_abi(remaining_cargs));
+
+    // Next registers will be saved on stack in multiply_to_len().
+    const Register tmp1 = Z_tmp_1;
+    const Register tmp2 = Z_tmp_2;
+    const Register tmp3 = Z_tmp_3;
+    const Register tmp4 = Z_tmp_4;
+    const Register tmp5 = Z_R9;
+
+    BLOCK_COMMENT("Entry:");
+
+    __ z_llgfr(xlen, xlen);
+    __ z_llgfr(ylen, ylen);
+
+    __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5);
+
+    __ z_br(Z_R14);  // Return to caller.
+
+    return start;
+  }
+
+  void generate_initial() {
+    // Generates all stubs and initializes the entry points.
+
+    // Entry points that exist in all platforms.
+    // Note: This is code that could be shared among different
+    // platforms - however the benefit seems to be smaller than the
+    // disadvantage of having a much more complicated generator
+    // structure. See also comment in stubRoutines.hpp.
+    StubRoutines::_forward_exception_entry                 = generate_forward_exception();
+
+    StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
+    StubRoutines::_catch_exception_entry                   = generate_catch_exception();
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry          =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
+
+    //----------------------------------------------------------------------
+    // Entry points that are platform specific.
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry          =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
+
+    if (UseCRC32Intrinsics) {
+      // We have no CRC32 table on z/Architecture.
+      StubRoutines::_crc_table_adr    = (address)StubRoutines::zarch::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
+    }
+
+    // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+    StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table;
+  }
+
+
+  void generate_all() {
+    // Generates all stubs and initializes the entry points.
+
+    StubRoutines::zarch::_partial_subtype_check            = generate_partial_subtype_check();
+
+    // These entry points require SharedInfo::stack0 to be set up in non-core builds.
+    StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
+    StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
+    StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
+
+    StubRoutines::zarch::_handler_for_unsafe_access_entry  =  generate_handler_for_unsafe_access();
+
+    // Support for verify_oop (must happen after universe_init).
+    StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop_subroutine();
+
+    // Arraycopy stubs used by compilers.
+    generate_arraycopy_stubs();
+
+    // safefetch stubs
+    generate_safefetch("SafeFetch32", sizeof(int),      &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc);
+    generate_safefetch("SafeFetchN",  sizeof(intptr_t), &StubRoutines::_safefetchN_entry,  &StubRoutines::_safefetchN_fault_pc,  &StubRoutines::_safefetchN_continuation_pc);
+
+    // Generate AES intrinsics code.
+    if (UseAESIntrinsics) {
+      StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock");
+      StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock");
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining");
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining");
+    }
+
+    // Generate SHA1/SHA256/SHA512 intrinsics code.
+    if (UseSHA1Intrinsics) {
+      StubRoutines::_sha1_implCompress     = generate_SHA1_stub(false,   "SHA1_singleBlock");
+      StubRoutines::_sha1_implCompressMB   = generate_SHA1_stub(true,    "SHA1_multiBlock");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_SHA256_stub(false, "SHA256_singleBlock");
+      StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true,  "SHA256_multiBlock");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_SHA512_stub(false, "SHA512_singleBlock");
+      StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true,  "SHA512_multiBlock");
+    }
+
+#ifdef COMPILER2
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubRoutines::_montgomeryMultiply
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
+    }
+    if (UseMontgomerySquareIntrinsic) {
+      StubRoutines::_montgomerySquare
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
+    }
+#endif
+  }
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    // Replace the standard masm with a special one:
+    _masm = new MacroAssembler(code);
+
+    _stub_count = !all ? 0x100 : 0x200;
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+
+ private:
+  int _stub_count;
+  void stub_prolog(StubCodeDesc* cdesc) {
+#ifdef ASSERT
+    // Put extra information in the stub code, to make it more readable.
+    // Write the high part of the address.
+    // [RGV] Check if there is a dependency on the size of this prolog.
+    __ emit_32((intptr_t)cdesc >> 32);
+    __ emit_32((intptr_t)cdesc);
+    __ emit_32(++_stub_count);
+#endif
+    align(true);
+  }
+
+  void align(bool at_header = false) {
+    // z/Architecture cache line size is 256 bytes.
+    // There is no obvious benefit in aligning stub
+    // code to cache lines. Use CodeEntryAlignment instead.
+    const unsigned int icache_line_size      = CodeEntryAlignment;
+    const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment);
+
+    if (at_header) {
+      while ((intptr_t)(__ pc()) % icache_line_size != 0) {
+        __ emit_16(0);
+      }
+    } else {
+      while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
+        __ z_nop();
+      }
+    }
+  }
+
+};
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::zarch::_handler_for_unsafe_access_entry = NULL;
+
+address StubRoutines::zarch::_partial_subtype_check = NULL;
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+address StubRoutines::zarch::_trot_table_addr = NULL;
+
+int StubRoutines::zarch::_atomic_memory_operation_lock = StubRoutines::zarch::unlocked;
+
+#define __ masm->
+
+void StubRoutines::zarch::generate_load_crc_table_addr(MacroAssembler* masm, Register table) {
+
+  __ load_absolute_address(table, StubRoutines::_crc_table_adr);
+#ifdef ASSERT
+  assert(_crc_table_adr != NULL, "CRC lookup table address must be initialized by now");
+  {
+    Label L;
+    __ load_const_optimized(Z_R0, StubRoutines::_crc_table_adr);
+    __ z_cgr(table, Z_R0);  // safety net
+    __ z_bre(L);
+    __ z_illtrap();
+    __ asm_assert_eq("crc_table: external word relocation required for load_absolute_address", 0x33);
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ load_const_optimized(Z_R0, 0x77073096UL);
+    __ z_cl(Z_R0, Address(table, 4));  // safety net
+    __ z_bre(L);
+    __ z_l(Z_R0, Address(table, 4));   // Load data from memory, we know the constant we compared against.
+    __ z_illtrap();
+    __ asm_assert_eq("crc_table: address or contents seems to be messed up", 0x22);
+    __ bind(L);
+  }
+#endif
+}
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+void StubRoutines::zarch::generate_load_trot_table_addr(MacroAssembler* masm, Register table) {
+
+  RelocationHolder   rspec = external_word_Relocation::spec((address)_trot_table);
+  __ relocate(rspec);
+  __ load_absolute_address(table, _trot_table_addr);
+#ifdef ASSERT
+    assert(_trot_table_addr != NULL, "Translate table address must be initialized by now");
+    assert((p2i(_trot_table_addr) & (TROT_ALIGNMENT-1)) == 0, "Translate table alignment error");
+    for (int i = 0; i < 256; i++) {
+      assert(i == *((jshort*)(_trot_table_addr+2*i)), "trot_table[%d] = %d", i, *((jshort*)(_trot_table_addr+2*i)));
+    }
+    {
+      Label L;
+      __ load_const_optimized(Z_R0, StubRoutines::zarch::_trot_table_addr);
+      __ z_cgr(table, Z_R0);  // safety net
+      __ z_bre(L);
+      __ z_illtrap();
+      __ asm_assert_eq("crc_table: external word relocation does not work for load_absolute_address", 0x33);
+      __ bind(L);
+    }
+    {
+      Label L;
+      __ load_const_optimized(Z_R0, 0x0004000500060007UL);
+      __ z_clg(Z_R0, Address(table, 8));  // safety net
+      __ z_bre(L);
+      __ z_lg(Z_R0, Address(table, 8));   // Load data from memory, we know the constant we compared against.
+      __ z_illtrap();
+      __ asm_assert_eq("trot_table: address or contents seems to be messed up", 0x22);
+      __ bind(L);
+    }
+#endif
+}
+
+
+/**
+ *  trot_table[]
+ */
+
+jlong StubRoutines::zarch::_trot_table[TROT_COLUMN_SIZE] = {
+    0x0000000100020003UL, 0x0004000500060007UL, 0x00080009000a000bUL, 0x000c000d000e000fUL,
+    0x0010001100120013UL, 0x0014001500160017UL, 0x00180019001a001bUL, 0x001c001d001e001fUL,
+    0x0020002100220023UL, 0x0024002500260027UL, 0x00280029002a002bUL, 0x002c002d002e002fUL,
+    0x0030003100320033UL, 0x0034003500360037UL, 0x00380039003a003bUL, 0x003c003d003e003fUL,
+    0x0040004100420043UL, 0x0044004500460047UL, 0x00480049004a004bUL, 0x004c004d004e004fUL,
+    0x0050005100520053UL, 0x0054005500560057UL, 0x00580059005a005bUL, 0x005c005d005e005fUL,
+    0x0060006100620063UL, 0x0064006500660067UL, 0x00680069006a006bUL, 0x006c006d006e006fUL,
+    0x0070007100720073UL, 0x0074007500760077UL, 0x00780079007a007bUL, 0x007c007d007e007fUL,
+    0x0080008100820083UL, 0x0084008500860087UL, 0x00880089008a008bUL, 0x008c008d008e008fUL,
+    0x0090009100920093UL, 0x0094009500960097UL, 0x00980099009a009bUL, 0x009c009d009e009fUL,
+    0x00a000a100a200a3UL, 0x00a400a500a600a7UL, 0x00a800a900aa00abUL, 0x00ac00ad00ae00afUL,
+    0x00b000b100b200b3UL, 0x00b400b500b600b7UL, 0x00b800b900ba00bbUL, 0x00bc00bd00be00bfUL,
+    0x00c000c100c200c3UL, 0x00c400c500c600c7UL, 0x00c800c900ca00cbUL, 0x00cc00cd00ce00cfUL,
+    0x00d000d100d200d3UL, 0x00d400d500d600d7UL, 0x00d800d900da00dbUL, 0x00dc00dd00de00dfUL,
+    0x00e000e100e200e3UL, 0x00e400e500e600e7UL, 0x00e800e900ea00ebUL, 0x00ec00ed00ee00efUL,
+    0x00f000f100f200f3UL, 0x00f400f500f600f7UL, 0x00f800f900fa00fbUL, 0x00fc00fd00fe00ffUL
+  };
+
+
+// crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.h
+juint StubRoutines::zarch::_crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = {
+  {
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+#ifdef  CRC32_BYFOUR
+  },
+  {
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL
+  },
+  {
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL
+  },
+  {
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL
+  },
+  {
+    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+    0x8def022dUL
+  },
+  {
+    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+    0x72fd2493UL
+  },
+  {
+    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+    0xed3498beUL
+  },
+  {
+    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+    0xf10605deUL
+#endif
+  }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
+#define CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to extend it.
+
+static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
+
+enum { // Platform dependent constants.
+  // TODO: May be able to shrink this a lot
+  code_size1 = 20000,      // Simply increase if too small (assembler will crash if too small).
+  code_size2 = 20000       // Simply increase if too small (assembler will crash if too small).
+};
+
+// MethodHandles adapters
+enum method_handles_platform_dependent_constants {
+  method_handles_adapters_code_size = 5000
+};
+
+#define CRC32_COLUMN_SIZE 256
+#define CRC32_BYFOUR
+#ifdef CRC32_BYFOUR
+  #define CRC32_TABLES 8
+#else
+  #define CRC32_TABLES 1
+#endif
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+#define TROT_ALIGNMENT   8  // Required by instruction,
+                            // guaranteed by jlong table element type.
+#define TROT_COLUMN_SIZE (256*sizeof(jchar)/sizeof(jlong))
+
+class zarch {
+ friend class StubGenerator;
+
+ public:
+  enum { nof_instance_allocators = 10 };
+
+  // allocator lock values
+  enum {
+    unlocked = 0,
+    locked   = 1
+  };
+
+ private:
+  static address _handler_for_unsafe_access_entry;
+
+  static int _atomic_memory_operation_lock;
+
+  static address _partial_subtype_check;
+  static juint   _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
+
+  // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+  static address _trot_table_addr;
+  static jlong   _trot_table[TROT_COLUMN_SIZE];
+
+ public:
+  // Global lock for everyone who needs to use atomic_compare_and_exchange
+  // or atomic_increment -- should probably use more locks for more
+  // scalability -- for instance one for each eden space or group of.
+
+  // Address of the lock for atomic_compare_and_exchange.
+  static int* atomic_memory_operation_lock_addr() { return &_atomic_memory_operation_lock; }
+
+  // Accessor and mutator for _atomic_memory_operation_lock.
+  static int atomic_memory_operation_lock() { return _atomic_memory_operation_lock; }
+  static void set_atomic_memory_operation_lock(int value) { _atomic_memory_operation_lock = value; }
+
+  static address handler_for_unsafe_access_entry()        { return _handler_for_unsafe_access_entry; }
+
+  static address partial_subtype_check()                  { return _partial_subtype_check; }
+
+  static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
+
+  // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+  static void generate_load_trot_table_addr(MacroAssembler* masm, Register table);
+};
+
+#endif // CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2398 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/abstractInterpreter.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+
+// Size of interpreter code.  Increase if too small.  Interpreter will
+// fail with a guarantee ("not enough space for interpreter generation");
+// if too small.
+// Run with +PrintInterpreter to get the VM to print out the size.
+// Max size with JVMTI
+int TemplateInterpreter::InterpreterCodeSize = 320*K;
+
+#undef  __
+#ifdef PRODUCT
+  #define __ _masm->
+#else
+  #define __ _masm->
+//  #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
+
+#define oop_tmp_offset     _z_ijava_state_neg(oop_tmp)
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
+  //
+  // New slow_signature handler that respects the z/Architecture
+  // C calling conventions.
+  //
+  // We get called by the native entry code with our output register
+  // area == 8. First we call InterpreterRuntime::get_result_handler
+  // to copy the pointer to the signature string temporarily to the
+  // first C-argument and to return the result_handler in
+  // Z_RET. Since native_entry will copy the jni-pointer to the
+  // first C-argument slot later on, it's OK to occupy this slot
+  // temporarily. Then we copy the argument list on the java
+  // expression stack into native varargs format on the native stack
+  // and load arguments into argument registers. Integer arguments in
+  // the varargs vector will be sign-extended to 8 bytes.
+  //
+  // On entry:
+  //   Z_ARG1  - intptr_t*       Address of java argument list in memory.
+  //   Z_state - cppInterpreter* Address of interpreter state for
+  //                               this method
+  //   Z_method
+  //
+  // On exit (just before return instruction):
+  //   Z_RET contains the address of the result_handler.
+  //   Z_ARG2 is not updated for static methods and contains "this" otherwise.
+  //   Z_ARG3-Z_ARG5 contain the first 3 arguments of types other than float and double.
+  //   Z_FARG1-Z_FARG4 contain the first 4 arguments of type float or double.
+
+  const int LogSizeOfCase = 3;
+
+  const int max_fp_register_arguments   = Argument::n_float_register_parameters;
+  const int max_int_register_arguments  = Argument::n_register_parameters - 2;  // First 2 are reserved.
+
+  const Register arg_java       = Z_tmp_2;
+  const Register arg_c          = Z_tmp_3;
+  const Register signature      = Z_R1_scratch; // Is a string.
+  const Register fpcnt          = Z_R0_scratch;
+  const Register argcnt         = Z_tmp_4;
+  const Register intSlot        = Z_tmp_1;
+  const Register sig_end        = Z_tmp_1; // Assumed end of signature (only used in do_object).
+  const Register target_sp      = Z_tmp_1;
+  const FloatRegister floatSlot = Z_F1;
+
+  const int d_signature         = _z_abi(gpr6); // Only spill space, register contents not affected.
+  const int d_fpcnt             = _z_abi(gpr7); // Only spill space, register contents not affected.
+
+  unsigned int entry_offset = __ offset();
+
+  BLOCK_COMMENT("slow_signature_handler {");
+
+  // We use target_sp for storing arguments in the C frame.
+  __ save_return_pc();
+
+  __ z_stmg(Z_R10,Z_R13,-32,Z_SP);
+  __ push_frame_abi160(32);
+
+  __ z_lgr(arg_java, Z_ARG1);
+
+  Register   method = Z_ARG2; // Directly load into correct argument register.
+
+  __ get_method(method);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), Z_thread, method);
+
+  // Move signature to callee saved register.
+  // Don't directly write to stack. Frame is used by VM call.
+  __ z_lgr(Z_tmp_1, Z_RET);
+
+  // Reload method. Register may have been altered by VM call.
+  __ get_method(method);
+
+  // Get address of result handler.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), Z_thread, method);
+
+  // Save signature address to stack.
+  __ z_stg(Z_tmp_1, d_signature, Z_SP);
+
+  // Don't overwrite return value (Z_RET, Z_ARG1) in rest of the method !
+
+  {
+    Label   isStatic;
+
+    // Test if static.
+    // We can test the bit directly.
+    // Path is Z_method->_access_flags._flags.
+    // We only support flag bits in the least significant byte (assert !).
+    // Therefore add 3 to address that byte within "_flags".
+    // Reload method. VM call above may have destroyed register contents
+    __ get_method(method);
+    __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT);
+    method = noreg;  // end of life
+    __ z_btrue(isStatic);
+
+    // For non-static functions, pass "this" in Z_ARG2 and copy it to 2nd C-arg slot.
+    // Need to box the Java object here, so we use arg_java
+    // (address of current Java stack slot) as argument and
+    // don't dereference it as in case of ints, floats, etc..
+    __ z_lgr(Z_ARG2, arg_java);
+    __ add2reg(arg_java, -BytesPerWord);
+    __ bind(isStatic);
+  }
+
+  // argcnt == 0 corresponds to 3rd C argument.
+  //   arg #1 (result handler) and
+  //   arg #2 (this, for non-statics), unused else
+  // are reserved and pre-filled above.
+  // arg_java points to the corresponding Java argument here. It
+  // has been decremented by one argument (this) in case of non-static.
+  __ clear_reg(argcnt, true, false);  // Don't set CC.
+  __ z_lg(target_sp, 0, Z_SP);
+  __ add2reg(arg_c, _z_abi(remaining_cargs), target_sp);
+  // No floating-point args parsed so far.
+  __ clear_mem(Address(Z_SP, d_fpcnt), 8);
+
+  NearLabel   move_intSlot_to_ARG, move_floatSlot_to_FARG;
+  NearLabel   loop_start, loop_start_restore, loop_end;
+  NearLabel   do_int, do_long, do_float, do_double;
+  NearLabel   do_dontreachhere, do_object, do_array, do_boxed;
+
+#ifdef ASSERT
+  // Signature needs to point to '(' (== 0x28) at entry.
+  __ z_lg(signature, d_signature, Z_SP);
+  __ z_cli(0, signature, (int) '(');
+  __ z_brne(do_dontreachhere);
+#endif
+
+  __ bind(loop_start_restore);
+  __ z_lg(signature, d_signature, Z_SP);  // Restore signature ptr, destroyed by move_XX_to_ARG.
+
+  BIND(loop_start);
+  // Advance to next argument type token from the signature.
+  __ add2reg(signature, 1);
+
+  // Use CLI, works well on all CPU versions.
+    __ z_cli(0, signature, (int) ')');
+    __ z_bre(loop_end);                // end of signature
+    __ z_cli(0, signature, (int) 'L');
+    __ z_bre(do_object);               // object     #9
+    __ z_cli(0, signature, (int) 'F');
+    __ z_bre(do_float);                // float      #7
+    __ z_cli(0, signature, (int) 'J');
+    __ z_bre(do_long);                 // long       #6
+    __ z_cli(0, signature, (int) 'B');
+    __ z_bre(do_int);                  // byte       #1
+    __ z_cli(0, signature, (int) 'Z');
+    __ z_bre(do_int);                  // boolean    #2
+    __ z_cli(0, signature, (int) 'C');
+    __ z_bre(do_int);                  // char       #3
+    __ z_cli(0, signature, (int) 'S');
+    __ z_bre(do_int);                  // short      #4
+    __ z_cli(0, signature, (int) 'I');
+    __ z_bre(do_int);                  // int        #5
+    __ z_cli(0, signature, (int) 'D');
+    __ z_bre(do_double);               // double     #8
+    __ z_cli(0, signature, (int) '[');
+    __ z_bre(do_array);                // array      #10
+
+  __ bind(do_dontreachhere);
+
+  __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120);
+
+  // Array argument
+  BIND(do_array);
+
+  {
+    Label   start_skip, end_skip;
+
+    __ bind(start_skip);
+
+    // Advance to next type tag from signature.
+    __ add2reg(signature, 1);
+
+    // Use CLI, works well on all CPU versions.
+    __ z_cli(0, signature, (int) '[');
+    __ z_bre(start_skip);               // Skip further brackets.
+
+    __ z_cli(0, signature, (int) '9');
+    __ z_brh(end_skip);                 // no optional size
+
+    __ z_cli(0, signature, (int) '0');
+    __ z_brnl(start_skip);              // Skip optional size.
+
+    __ bind(end_skip);
+
+    __ z_cli(0, signature, (int) 'L');
+    __ z_brne(do_boxed);                // If not array of objects: go directly to do_boxed.
+  }
+
+  //  OOP argument
+  BIND(do_object);
+  // Pass by an object's type name.
+  {
+    Label   L;
+
+    __ add2reg(sig_end, 4095, signature);     // Assume object type name is shorter than 4k.
+    __ load_const_optimized(Z_R0, (int) ';'); // Type name terminator (must be in Z_R0!).
+    __ MacroAssembler::search_string(sig_end, signature);
+    __ z_brl(L);
+    __ z_illtrap();  // No semicolon found: internal error or object name too long.
+    __ bind(L);
+    __ z_lgr(signature, sig_end);
+    // fallthru to do_boxed
+  }
+
+  // Need to box the Java object here, so we use arg_java
+  // (address of current Java stack slot) as argument and
+  // don't dereference it as in case of ints, floats, etc..
+
+  // UNBOX argument
+  // Load reference and check for NULL.
+  Label  do_int_Entry4Boxed;
+  __ bind(do_boxed);
+  {
+    __ load_and_test_long(intSlot, Address(arg_java));
+    __ z_bre(do_int_Entry4Boxed);
+    __ z_lgr(intSlot, arg_java);
+    __ z_bru(do_int_Entry4Boxed);
+  }
+
+  // INT argument
+
+  // (also for byte, boolean, char, short)
+  // Use lgf for load (sign-extend) and stg for store.
+  BIND(do_int);
+  __ z_lgf(intSlot, 0, arg_java);
+
+  __ bind(do_int_Entry4Boxed);
+  __ add2reg(arg_java, -BytesPerWord);
+  // If argument fits into argument register, go and handle it, otherwise continue.
+  __ compare32_and_branch(argcnt, max_int_register_arguments,
+                          Assembler::bcondLow, move_intSlot_to_ARG);
+  __ z_stg(intSlot, 0, arg_c);
+  __ add2reg(arg_c, BytesPerWord);
+  __ z_bru(loop_start);
+
+  // LONG argument
+
+  BIND(do_long);
+  __ add2reg(arg_java, -2*BytesPerWord);  // Decrement first to have positive displacement for lg.
+  __ z_lg(intSlot, BytesPerWord, arg_java);
+  // If argument fits into argument register, go and handle it, otherwise continue.
+  __ compare32_and_branch(argcnt, max_int_register_arguments,
+                          Assembler::bcondLow, move_intSlot_to_ARG);
+  __ z_stg(intSlot, 0, arg_c);
+  __ add2reg(arg_c, BytesPerWord);
+  __ z_bru(loop_start);
+
+  // FLOAT argumen
+
+  BIND(do_float);
+  __ z_le(floatSlot, 0, arg_java);
+  __ add2reg(arg_java, -BytesPerWord);
+  assert(max_fp_register_arguments <= 255, "always true");  // safety net
+  __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments);
+  __ z_brl(move_floatSlot_to_FARG);
+  __ z_ste(floatSlot, 4, arg_c);
+  __ add2reg(arg_c, BytesPerWord);
+  __ z_bru(loop_start);
+
+  // DOUBLE argument
+
+  BIND(do_double);
+  __ add2reg(arg_java, -2*BytesPerWord);  // Decrement first to have positive displacement for lg.
+  __ z_ld(floatSlot, BytesPerWord, arg_java);
+  assert(max_fp_register_arguments <= 255, "always true");  // safety net
+  __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments);
+  __ z_brl(move_floatSlot_to_FARG);
+  __ z_std(floatSlot, 0, arg_c);
+  __ add2reg(arg_c, BytesPerWord);
+  __ z_bru(loop_start);
+
+  // Method exit, all arguments proocessed.
+  __ bind(loop_end);
+  __ pop_frame();
+  __ restore_return_pc();
+  __ z_lmg(Z_R10,Z_R13,-32,Z_SP);
+  __ z_br(Z_R14);
+
+  // Copy int arguments.
+
+  Label  iarg_caselist;   // Distance between each case has to be a power of 2
+                          // (= 1 << LogSizeOfCase).
+  __ align(16);
+  BIND(iarg_caselist);
+  __ z_lgr(Z_ARG3, intSlot);    // 4 bytes
+  __ z_bru(loop_start_restore); // 4 bytes
+
+  __ z_lgr(Z_ARG4, intSlot);
+  __ z_bru(loop_start_restore);
+
+  __ z_lgr(Z_ARG5, intSlot);
+  __ z_bru(loop_start_restore);
+
+  __ align(16);
+  __ bind(move_intSlot_to_ARG);
+  __ z_stg(signature, d_signature, Z_SP);       // Spill since signature == Z_R1_scratch.
+  __ z_larl(Z_R1_scratch, iarg_caselist);
+  __ z_sllg(Z_R0_scratch, argcnt, LogSizeOfCase);
+  __ add2reg(argcnt, 1);
+  __ z_agr(Z_R1_scratch, Z_R0_scratch);
+  __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+
+  // Copy float arguments.
+
+  Label  farg_caselist;   // Distance between each case has to be a power of 2
+                          // (= 1 << logSizeOfCase, padded with nop.
+  __ align(16);
+  BIND(farg_caselist);
+  __ z_ldr(Z_FARG1, floatSlot); // 2 bytes
+  __ z_bru(loop_start_restore); // 4 bytes
+  __ z_nop();                   // 2 bytes
+
+  __ z_ldr(Z_FARG2, floatSlot);
+  __ z_bru(loop_start_restore);
+  __ z_nop();
+
+  __ z_ldr(Z_FARG3, floatSlot);
+  __ z_bru(loop_start_restore);
+  __ z_nop();
+
+  __ z_ldr(Z_FARG4, floatSlot);
+  __ z_bru(loop_start_restore);
+  __ z_nop();
+
+  __ align(16);
+  __ bind(move_floatSlot_to_FARG);
+  __ z_stg(signature, d_signature, Z_SP);        // Spill since signature == Z_R1_scratch.
+  __ z_lg(Z_R0_scratch, d_fpcnt, Z_SP);          // Need old value for indexing.
+  __ add2mem_64(Address(Z_SP, d_fpcnt), 1, Z_R1_scratch); // Increment index.
+  __ z_larl(Z_R1_scratch, farg_caselist);
+  __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogSizeOfCase);
+  __ z_agr(Z_R1_scratch, Z_R0_scratch);
+  __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+
+  BLOCK_COMMENT("} slow_signature_handler");
+
+  return __ addr_at(entry_offset);
+}
+
+address TemplateInterpreterGenerator::generate_result_handler_for (BasicType type) {
+  address entry = __ pc();
+
+  assert(Z_tos == Z_RET, "Result handler: must move result!");
+  assert(Z_ftos == Z_FRET, "Result handler: must move float result!");
+
+  switch (type) {
+    case T_BOOLEAN:
+      __ c2bool(Z_tos);
+      break;
+    case T_CHAR:
+      __ and_imm(Z_tos, 0xffff);
+      break;
+    case T_BYTE:
+      __ z_lbr(Z_tos, Z_tos);
+      break;
+    case T_SHORT:
+      __ z_lhr(Z_tos, Z_tos);
+      break;
+    case T_INT:
+    case T_LONG:
+    case T_VOID:
+    case T_FLOAT:
+    case T_DOUBLE:
+      break;
+    case T_OBJECT:
+      // Retrieve result from frame...
+      __ mem2reg_opt(Z_tos, Address(Z_fp, oop_tmp_offset));
+      // and verify it.
+      __ verify_oop(Z_tos);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  __ z_br(Z_R14);      // Return from result handler.
+  return entry;
+}
+
+// Abstract method entry.
+// Attempt to execute abstract method. Throw exception.
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
+  unsigned int entry_offset = __ offset();
+
+  // Caller could be the call_stub or a compiled method (x86 version is wrong!).
+
+  BLOCK_COMMENT("abstract_entry {");
+
+  // Implement call of InterpreterRuntime::throw_AbstractMethodError.
+  __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1);
+  __ save_return_pc();       // Save Z_R14.
+  __ push_frame_abi160(0);   // Without new frame the RT call could overwrite the saved Z_R14.
+
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError), Z_thread);
+
+  __ pop_frame();
+  __ restore_return_pc();    // Restore Z_R14.
+  __ reset_last_Java_frame();
+
+  // Restore caller sp for c2i case.
+  __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+  // branch to SharedRuntime::generate_forward_exception() which handles all possible callers,
+  // i.e. call stub, compiled method, interpreted method.
+  __ load_absolute_address(Z_tmp_1, StubRoutines::forward_exception_entry());
+  __ z_br(Z_tmp_1);
+
+  BLOCK_COMMENT("} abstract_entry");
+
+  return __ addr_at(entry_offset);
+}
+
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+  if (UseG1GC) {
+    // Inputs:
+    //  Z_ARG1 - receiver
+    //
+    // What we do:
+    //  - Load the referent field address.
+    //  - Load the value in the referent field.
+    //  - Pass that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+
+    Register  scratch1 = Z_tmp_2;
+    Register  scratch2 = Z_tmp_3;
+    Register  pre_val  = Z_RET;   // return value
+    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+    Register  Rargp    = Z_esp;
+
+    Label     slow_path;
+    address   entry = __ pc();
+
+    const int referent_offset = java_lang_ref_Reference::referent_offset;
+    guarantee(referent_offset > 0, "referent offset not initialized");
+
+    BLOCK_COMMENT("Reference_get {");
+
+    //  If the receiver is null then it is OK to jump to the slow path.
+    __ load_and_test_long(pre_val, Address(Rargp, Interpreter::stackElementSize)); // Get receiver.
+    __ z_bre(slow_path);
+
+    //  Load the value of the referent field.
+    __ load_heap_oop(pre_val, referent_offset, pre_val);
+
+    // Restore caller sp for c2i case.
+    __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    // Note:
+    //   With these parameters the write_barrier_pre does not
+    //   generate instructions to load the previous value.
+    __ g1_write_barrier_pre(noreg,      // obj
+                            noreg,      // offset
+                            pre_val,    // pre_val
+                            noreg,      // no new val to preserve
+                            scratch1,   // tmp
+                            scratch2,   // tmp
+                            true);      // pre_val_needed
+
+    __ z_br(Z_R14);
+
+    // Branch to previously generated regular method entry.
+    __ bind(slow_path);
+
+    address meth_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
+    __ jump_to_entry(meth_entry, Z_R1);
+
+    BLOCK_COMMENT("} Reference_get");
+
+    return entry;
+  }
+#endif // INCLUDE_ALL_GCS
+
+  return NULL;
+}
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+  DEBUG_ONLY(__ verify_esp(Z_esp, Z_ARG5));
+
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted.
+  __ restore_bcp();
+
+  // Expression stack must be empty before entering the VM if an
+  // exception happened.
+  __ empty_expression_stack();
+  // Throw exception.
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+//
+// Args:
+//   Z_ARG3: aberrant index
+//
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char * name) {
+  address entry = __ pc();
+  address excp = CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException);
+
+  // Expression stack must be empty before entering the VM if an
+  // exception happened.
+  __ empty_expression_stack();
+
+  // Setup parameters.
+  // Leave out the name and use register for array to create more detailed exceptions.
+  __ load_absolute_address(Z_ARG2, (address) name);
+  __ call_VM(noreg, excp, Z_ARG2, Z_ARG3);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+
+  // Object is at TOS.
+  __ pop_ptr(Z_ARG2);
+
+  // Expression stack must be empty before entering the VM if an
+  // exception happened.
+  __ empty_expression_stack();
+
+  __ call_VM(Z_ARG1,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),
+             Z_ARG2);
+
+  DEBUG_ONLY(__ should_not_reach_here();)
+
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) {
+  assert(!pass_oop || message == NULL, "either oop or message but not both");
+  address entry = __ pc();
+
+  BLOCK_COMMENT("exception_handler_common {");
+
+  // Expression stack must be empty before entering the VM if an
+  // exception happened.
+  __ empty_expression_stack();
+  if (name != NULL) {
+    __ load_absolute_address(Z_ARG2, (address)name);
+  } else {
+    __ clear_reg(Z_ARG2, true, false);
+  }
+
+  if (pass_oop) {
+    __ call_VM(Z_tos,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception),
+               Z_ARG2, Z_tos /*object (see TT::aastore())*/);
+  } else {
+    if (message != NULL) {
+      __ load_absolute_address(Z_ARG3, (address)message);
+    } else {
+      __ clear_reg(Z_ARG3, true, false);
+    }
+    __ call_VM(Z_tos,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
+               Z_ARG2, Z_ARG3);
+  }
+  // Throw exception.
+  __ load_absolute_address(Z_R1_scratch, Interpreter::throw_exception_entry());
+  __ z_br(Z_R1_scratch);
+
+  BLOCK_COMMENT("} exception_handler_common");
+
+  return entry;
+}
+
+// Unused, should never pass by.
+address TemplateInterpreterGenerator::generate_continuation_for (TosState state) {
+  address entry = __ pc();
+  __ should_not_reach_here();
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for (TosState state, int step, size_t index_size) {
+  address entry = __ pc();
+
+  BLOCK_COMMENT("return_entry {");
+
+  // Pop i2c extension or revert top-2-parent-resize done by interpreted callees.
+  Register sp_before_i2c_extension = Z_bcp;
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+  __ z_lg(sp_before_i2c_extension, Address(Z_fp, _z_ijava_state_neg(top_frame_sp)));
+  __ resize_frame_absolute(sp_before_i2c_extension, Z_locals/*tmp*/, true/*load_fp*/);
+
+  // TODO(ZASM): necessary??
+  //  // and NULL it as marker that esp is now tos until next java call
+  //  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+
+  if (state == atos) {
+    __ profile_return_type(Z_tmp_1, Z_tos, Z_tmp_2);
+  }
+
+  Register cache  = Z_tmp_1;
+  Register size   = Z_tmp_1;
+  Register offset = Z_tmp_2;
+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  __ get_cache_and_index_at_bcp(cache, offset, 1, index_size);
+
+  // #args is in rightmost byte of the _flags field.
+  __ z_llgc(size, Address(cache, offset, flags_offset+(sizeof(size_t)-1)));
+  __ z_sllg(size, size, Interpreter::logStackElementSize); // Each argument size in bytes.
+  __ z_agr(Z_esp, size);                                   // Pop arguments.
+  __ dispatch_next(state, step);
+
+  BLOCK_COMMENT("} return_entry");
+
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for (TosState state,
+                                                               int step) {
+  address entry = __ pc();
+
+  BLOCK_COMMENT("deopt_entry {");
+
+  // TODO(ZASM): necessary? NULL last_sp until next java call
+  // __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+
+  // Handle exceptions.
+  {
+    Label L;
+    __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception));
+    __ z_bre(L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+  __ dispatch_next(state, step);
+
+  BLOCK_COMMENT("} deopt_entry");
+
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for (TosState state,
+                                                                address runtime_entry) {
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for (vtos));
+  return entry;
+}
+
+//
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+// Increment invocation count & check for overflow.
+//
+// Note: checking for negative value instead of overflow
+// so we have a 'sticky' overflow test.
+//
+// Z_ARG2: method (see generate_fixed_frame())
+//
+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
+  Label done;
+  Register method = Z_ARG2; // Generate_fixed_frame() copies Z_method into Z_ARG2.
+  Register m_counters = Z_ARG4;
+
+  BLOCK_COMMENT("counter_incr {");
+
+  // Note: In tiered we increment either counters in method or in MDO depending
+  // if we are profiling or not.
+  if (TieredCompilation) {
+    int increment = InvocationCounter::count_increment;
+    if (ProfileInterpreter) {
+      NearLabel no_mdo;
+      Register mdo = m_counters;
+      // Are we profiling?
+      __ load_and_test_long(mdo, method2_(method, method_data));
+      __ branch_optimized(Assembler::bcondZero, no_mdo);
+      // Increment counter in the MDO.
+      const Address mdo_invocation_counter(mdo, MethodData::invocation_counter_offset() +
+                                           InvocationCounter::counter_offset());
+      const Address mask(mdo, MethodData::invoke_mask_offset());
+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask,
+                                 Z_R1_scratch, false, Assembler::bcondZero,
+                                 overflow);
+      __ z_bru(done);
+      __ bind(no_mdo);
+    }
+
+    // Increment counter in MethodCounters.
+    const Address invocation_counter(m_counters,
+                                     MethodCounters::invocation_counter_offset() +
+                                     InvocationCounter::counter_offset());
+    // Get address of MethodCounters object.
+    __ get_method_counters(method, m_counters, done);
+    const Address mask(m_counters, MethodCounters::invoke_mask_offset());
+    __ increment_mask_and_jump(invocation_counter,
+                               increment, mask,
+                               Z_R1_scratch, false, Assembler::bcondZero,
+                               overflow);
+  } else {
+    Register counter_sum = Z_ARG3; // The result of this piece of code.
+    Register tmp         = Z_R1_scratch;
+#ifdef ASSERT
+    {
+      NearLabel ok;
+      __ get_method(tmp);
+      __ compare64_and_branch(method, tmp, Assembler::bcondEqual, ok);
+      __ z_illtrap(0x66);
+      __ bind(ok);
+    }
+#endif
+
+    // Get address of MethodCounters object.
+    __ get_method_counters(method, m_counters, done);
+    // Update standard invocation counters.
+    __ increment_invocation_counter(m_counters, counter_sum);
+    if (ProfileInterpreter) {
+      __ add2mem_32(Address(m_counters, MethodCounters::interpreter_invocation_counter_offset()), 1, tmp);
+      if (profile_method != NULL) {
+        const Address profile_limit(m_counters, MethodCounters::interpreter_profile_limit_offset());
+        __ z_cl(counter_sum, profile_limit);
+        __ branch_optimized(Assembler::bcondLow, *profile_method_continue);
+        // If no method data exists, go to profile_method.
+        __ test_method_data_pointer(tmp, *profile_method);
+      }
+    }
+
+    const Address invocation_limit(m_counters, MethodCounters::interpreter_invocation_limit_offset());
+    __ z_cl(counter_sum, invocation_limit);
+    __ branch_optimized(Assembler::bcondNotLow, *overflow);
+  }
+
+  __ bind(done);
+
+  BLOCK_COMMENT("} counter_incr");
+}
+
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+  // InterpreterRuntime::frequency_counter_overflow takes two
+  // arguments, the first (thread) is passed by call_VM, the second
+  // indicates if the counter overflow occurs at a backwards branch
+  // (NULL bcp). We pass zero for it. The call returns the address
+  // of the verified entry point for the method or NULL if the
+  // compilation did not complete (either went background or bailed
+  // out).
+  __ clear_reg(Z_ARG2);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
+             Z_ARG2);
+  __ z_bru(do_continue);
+}
+
+void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_size, Register tmp1) {
+  Register tmp2 = Z_R1_scratch;
+  const int page_size = os::vm_page_size();
+  NearLabel after_frame_check;
+
+  BLOCK_COMMENT("counter_overflow {");
+
+  assert_different_registers(frame_size, tmp1);
+
+  // Stack banging is sufficient overflow check if frame_size < page_size.
+  if (Immediate::is_uimm(page_size, 15)) {
+    __ z_chi(frame_size, page_size);
+    __ z_brl(after_frame_check);
+  } else {
+    __ load_const_optimized(tmp1, page_size);
+    __ compareU32_and_branch(frame_size, tmp1, Assembler::bcondLow, after_frame_check);
+  }
+
+  // Get the stack base, and in debug, verify it is non-zero.
+  __ z_lg(tmp1, thread_(stack_base));
+#ifdef ASSERT
+  address reentry = NULL;
+  NearLabel base_not_zero;
+  __ compareU64_and_branch(tmp1, (intptr_t)0L, Assembler::bcondNotEqual, base_not_zero);
+  reentry = __ stop_chain_static(reentry, "stack base is zero in generate_stack_overflow_check");
+  __ bind(base_not_zero);
+#endif
+
+  // Get the stack size, and in debug, verify it is non-zero.
+  assert(sizeof(size_t) == sizeof(intptr_t), "wrong load size");
+  __ z_lg(tmp2, thread_(stack_size));
+#ifdef ASSERT
+  NearLabel size_not_zero;
+  __ compareU64_and_branch(tmp2, (intptr_t)0L, Assembler::bcondNotEqual, size_not_zero);
+  reentry = __ stop_chain_static(reentry, "stack size is zero in generate_stack_overflow_check");
+  __ bind(size_not_zero);
+#endif
+
+  // Compute the beginning of the protected zone minus the requested frame size.
+  __ z_sgr(tmp1, tmp2);
+  __ add2reg(tmp1, JavaThread::stack_guard_zone_size());
+
+  // Add in the size of the frame (which is the same as subtracting it from the
+  // SP, which would take another register.
+  __ z_agr(tmp1, frame_size);
+
+  // The frame is greater than one page in size, so check against
+  // the bottom of the stack.
+  __ compareU64_and_branch(Z_SP, tmp1, Assembler::bcondHigh, after_frame_check);
+
+  // The stack will overflow, throw an exception.
+
+  // Restore SP to sender's sp. This is necessary if the sender's frame is an
+  // extended compiled frame (see gen_c2i_adapter()) and safer anyway in case of
+  // JSR292 adaptations.
+  __ resize_frame_absolute(Z_R10, tmp1, true/*load_fp*/);
+
+  // Note also that the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry());
+  __ load_absolute_address(tmp1, StubRoutines::throw_StackOverflowError_entry());
+  __ z_br(tmp1);
+
+  // If you get to here, then there is enough stack space.
+  __ bind(after_frame_check);
+
+  BLOCK_COMMENT("} counter_overflow");
+}
+
+// Allocate monitor and lock method (asm interpreter).
+//
+// Args:
+//   Z_locals: locals
+
+void TemplateInterpreterGenerator::lock_method(void) {
+
+  BLOCK_COMMENT("lock_method {");
+
+  // Synchronize method.
+  const Register method = Z_tmp_2;
+  __ get_method(method);
+
+#ifdef ASSERT
+  address reentry = NULL;
+  {
+    Label L;
+    __ testbit(method2_(method, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+    __ z_btrue(L);
+    reentry = __ stop_chain_static(reentry, "method doesn't need synchronization");
+    __ bind(L);
+  }
+#endif // ASSERT
+
+  // Get synchronization object.
+  const Register object = Z_tmp_2;
+
+  {
+    Label     done;
+    Label     static_method;
+
+    __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT);
+    __ z_btrue(static_method);
+
+    // non-static method: Load receiver obj from stack.
+    __ mem2reg_opt(object, Address(Z_locals, Interpreter::local_offset_in_bytes(0)));
+    __ z_bru(done);
+
+    __ bind(static_method);
+
+    // Lock the java mirror.
+    __ load_mirror(object, method);
+#ifdef ASSERT
+    {
+      NearLabel L;
+      __ compare64_and_branch(object, (intptr_t) 0, Assembler::bcondNotEqual, L);
+      reentry = __ stop_chain_static(reentry, "synchronization object is NULL");
+      __ bind(L);
+    }
+#endif // ASSERT
+
+    __ bind(done);
+  }
+
+  __ add_monitor_to_stack(true, Z_ARG3, Z_ARG4, Z_ARG5); // Allocate monitor elem.
+  // Store object and lock it.
+  __ get_monitors(Z_tmp_1);
+  __ reg2mem_opt(object, Address(Z_tmp_1, BasicObjectLock::obj_offset_in_bytes()));
+  __ lock_object(Z_tmp_1, object);
+
+  BLOCK_COMMENT("} lock_method");
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+//
+// Registers alive
+//   Z_thread   - JavaThread*
+//   Z_SP       - old stack pointer
+//   Z_method   - callee's method
+//   Z_esp      - parameter list (slot 'above' last param)
+//   Z_R14      - return pc, to be stored in caller's frame
+//   Z_R10      - sender sp, note: Z_tmp_1 is Z_R10!
+//
+// Registers updated
+//   Z_SP       - new stack pointer
+//   Z_esp      - callee's operand stack pointer
+//                points to the slot above the value on top
+//   Z_locals   - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+//   Z_bcp      - the bytecode pointer
+//   Z_fp       - the frame pointer, thereby killing Z_method
+//   Z_ARG2     - copy of Z_method
+//
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+
+  //  stack layout
+  //
+  //   F1 [TOP_IJAVA_FRAME_ABI]              <-- Z_SP, Z_R10 (see note below)
+  //      [F1's operand stack (unused)]
+  //      [F1's outgoing Java arguments]     <-- Z_esp
+  //      [F1's operand stack (non args)]
+  //      [monitors]      (optional)
+  //      [IJAVA_STATE]
+  //
+  //   F2 [PARENT_IJAVA_FRAME_ABI]
+  //      ...
+  //
+  //  0x000
+  //
+  // Note: Z_R10, the sender sp, will be below Z_SP if F1 was extended by a c2i adapter.
+
+  //=============================================================================
+  // Allocate space for locals other than the parameters, the
+  // interpreter state, monitors, and the expression stack.
+
+  const Register local_count     = Z_ARG5;
+  const Register fp              = Z_tmp_2;
+
+  BLOCK_COMMENT("generate_fixed_frame {");
+
+  {
+  // local registers
+  const Register top_frame_size  = Z_ARG2;
+  const Register sp_after_resize = Z_ARG3;
+  const Register max_stack       = Z_ARG4;
+
+  // local_count = method->constMethod->max_locals();
+  __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+  __ z_llgh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_locals_offset()));
+
+  if (native_call) {
+    // If we're calling a native method, we replace max_stack (which is
+    // zero) with space for the worst-case signature handler varargs
+    // vector, which is:
+    //   max_stack = max(Argument::n_register_parameters, parameter_count+2);
+    //
+    // We add two slots to the parameter_count, one for the jni
+    // environment and one for a possible native mirror. We allocate
+    // space for at least the number of ABI registers, even though
+    // InterpreterRuntime::slow_signature_handler won't write more than
+    // parameter_count+2 words when it creates the varargs vector at the
+    // top of the stack. The generated slow signature handler will just
+    // load trash into registers beyond the necessary number. We're
+    // still going to cut the stack back by the ABI register parameter
+    // count so as to get SP+16 pointing at the ABI outgoing parameter
+    // area, so we need to allocate at least that much even though we're
+    // going to throw it away.
+    //
+
+    __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+    __ z_llgh(max_stack,  Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset()));
+    __ add2reg(max_stack, 2);
+
+    NearLabel passing_args_on_stack;
+
+    // max_stack in bytes
+    __ z_sllg(max_stack, max_stack, LogBytesPerWord);
+
+    int argument_registers_in_bytes = Argument::n_register_parameters << LogBytesPerWord;
+    __ compare64_and_branch(max_stack, argument_registers_in_bytes, Assembler::bcondNotLow, passing_args_on_stack);
+
+    __ load_const_optimized(max_stack, argument_registers_in_bytes);
+
+    __ bind(passing_args_on_stack);
+  } else {
+    // !native_call
+    __ z_lg(max_stack, method_(const));
+
+    // Calculate number of non-parameter locals (in slots):
+    __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+    __ z_sh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset()));
+
+    // max_stack = method->max_stack();
+    __ z_llgh(max_stack, Address(max_stack, ConstMethod::max_stack_offset()));
+    // max_stack in bytes
+    __ z_sllg(max_stack, max_stack, LogBytesPerWord);
+  }
+
+  // Resize (i.e. normally shrink) the top frame F1 ...
+  //   F1      [TOP_IJAVA_FRAME_ABI]          <-- Z_SP, Z_R10
+  //           F1's operand stack (free)
+  //           ...
+  //           F1's operand stack (free)      <-- Z_esp
+  //           F1's outgoing Java arg m
+  //           ...
+  //           F1's outgoing Java arg 0
+  //           ...
+  //
+  //  ... into a parent frame (Z_R10 holds F1's SP before any modification, see also above)
+  //
+  //           +......................+
+  //           :                      :        <-- Z_R10, saved below as F0's z_ijava_state.sender_sp
+  //           :                      :
+  //   F1      [PARENT_IJAVA_FRAME_ABI]        <-- Z_SP       \
+  //           F0's non arg local                             | = delta
+  //           ...                                            |
+  //           F0's non arg local              <-- Z_esp      /
+  //           F1's outgoing Java arg m
+  //           ...
+  //           F1's outgoing Java arg 0
+  //           ...
+  //
+  // then push the new top frame F0.
+  //
+  //   F0      [TOP_IJAVA_FRAME_ABI]    = frame::z_top_ijava_frame_abi_size \
+  //           [operand stack]          = max_stack                          | = top_frame_size
+  //           [IJAVA_STATE]            = frame::z_ijava_state_size         /
+
+  // sp_after_resize = Z_esp - delta
+  //
+  // delta = PARENT_IJAVA_FRAME_ABI + (locals_count - params_count)
+
+  __ add2reg(sp_after_resize, (Interpreter::stackElementSize) - (frame::z_parent_ijava_frame_abi_size), Z_esp);
+  __ z_sllg(Z_R0_scratch, local_count, LogBytesPerWord); // Params have already been subtracted from local_count.
+  __ z_slgr(sp_after_resize, Z_R0_scratch);
+
+  // top_frame_size = TOP_IJAVA_FRAME_ABI + max_stack + size of interpreter state
+  __ add2reg(top_frame_size,
+             frame::z_top_ijava_frame_abi_size +
+               frame::z_ijava_state_size +
+               frame::interpreter_frame_monitor_size() * wordSize,
+             max_stack);
+
+  // Check if there's room for the new frame...
+  Register frame_size = max_stack; // Reuse the regiser for max_stack.
+  __ z_lgr(frame_size, Z_SP);
+  __ z_sgr(frame_size, sp_after_resize);
+  __ z_agr(frame_size, top_frame_size);
+  generate_stack_overflow_check(frame_size, fp/*tmp1*/);
+
+  DEBUG_ONLY(__ z_cg(Z_R14, _z_abi16(return_pc), Z_SP));
+  __ asm_assert_eq("killed Z_R14", 0);
+  __ resize_frame_absolute(sp_after_resize, fp, true);
+  __ save_return_pc(Z_R14);
+
+  // ... and push the new frame F0.
+  __ push_frame(top_frame_size, fp, true /*copy_sp*/, false);
+  }
+
+  //=============================================================================
+  // Initialize the new frame F0: initialize interpreter state.
+
+  {
+  // locals
+  const Register local_addr = Z_ARG4;
+
+  BLOCK_COMMENT("generate_fixed_frame: initialize interpreter state {");
+
+#ifdef ASSERT
+  // Set the magic number (using local_addr as tmp register).
+  __ load_const_optimized(local_addr, frame::z_istate_magic_number);
+  __ z_stg(local_addr, _z_ijava_state_neg(magic), fp);
+#endif
+
+  // Save sender SP from F1 (i.e. before it was potentially modified by an
+  // adapter) into F0's interpreter state. We us it as well to revert
+  // resizing the frame above.
+  __ z_stg(Z_R10, _z_ijava_state_neg(sender_sp), fp);
+
+  // Load cp cache and save it at the and of this block.
+  __ z_lg(Z_R1_scratch, Address(Z_method,    Method::const_offset()));
+  __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstMethod::constants_offset()));
+  __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstantPool::cache_offset_in_bytes()));
+
+  // z_ijava_state->method = method;
+  __ z_stg(Z_method, _z_ijava_state_neg(method), fp);
+
+  // Point locals at the first argument. Method's locals are the
+  // parameters on top of caller's expression stack.
+  // Tos points past last Java argument.
+
+  __ z_lg(Z_locals, Address(Z_method, Method::const_offset()));
+  __ z_llgh(Z_locals /*parameter_count words*/,
+            Address(Z_locals, ConstMethod::size_of_parameters_offset()));
+  __ z_sllg(Z_locals /*parameter_count bytes*/, Z_locals /*parameter_count*/, LogBytesPerWord);
+  __ z_agr(Z_locals, Z_esp);
+  // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0)
+  // z_ijava_state->locals = Z_esp + parameter_count bytes
+  __ z_stg(Z_locals, _z_ijava_state_neg(locals), fp);
+
+  // z_ijava_state->oop_temp = NULL;
+  __ store_const(Address(fp, oop_tmp_offset), 0);
+
+  // Initialize z_ijava_state->mdx.
+  Register Rmdp = Z_bcp;
+  // native_call: assert that mdo == NULL
+  const bool check_for_mdo = !native_call DEBUG_ONLY(|| native_call);
+  if (ProfileInterpreter && check_for_mdo) {
+#ifdef FAST_DISPATCH
+    // FAST_DISPATCH and ProfileInterpreter are mutually exclusive since
+    // they both use I2.
+    assert(0, "FAST_DISPATCH and +ProfileInterpreter are mutually exclusive");
+#endif // FAST_DISPATCH
+    Label get_continue;
+
+    __ load_and_test_long(Rmdp, method_(method_data));
+    __ z_brz(get_continue);
+    DEBUG_ONLY(if (native_call) __ stop("native methods don't have a mdo"));
+    __ add2reg(Rmdp, in_bytes(MethodData::data_offset()));
+    __ bind(get_continue);
+  }
+  __ z_stg(Rmdp, _z_ijava_state_neg(mdx), fp);
+
+  // Initialize z_ijava_state->bcp and Z_bcp.
+  if (native_call) {
+    __ clear_reg(Z_bcp); // Must initialize. Will get written into frame where GC reads it.
+  } else {
+    __ z_lg(Z_bcp, method_(const));
+    __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset()));
+  }
+  __ z_stg(Z_bcp, _z_ijava_state_neg(bcp), fp);
+
+  // no monitors and empty operand stack
+  // => z_ijava_state->monitors points to the top slot in IJAVA_STATE.
+  // => Z_ijava_state->esp points one slot above into the operand stack.
+  // z_ijava_state->monitors = fp - frame::z_ijava_state_size - Interpreter::stackElementSize;
+  // z_ijava_state->esp = Z_esp = z_ijava_state->monitors;
+  __ add2reg(Z_esp, -frame::z_ijava_state_size, fp);
+  __ z_stg(Z_esp, _z_ijava_state_neg(monitors), fp);
+  __ add2reg(Z_esp, -Interpreter::stackElementSize);
+  __ z_stg(Z_esp, _z_ijava_state_neg(esp), fp);
+
+  // z_ijava_state->cpoolCache = Z_R1_scratch (see load above);
+  __ z_stg(Z_R1_scratch, _z_ijava_state_neg(cpoolCache), fp);
+
+  // Get mirror and store it in the frame as GC root for this Method*.
+  __ load_mirror(Z_R1_scratch, Z_method);
+  __ z_stg(Z_R1_scratch, _z_ijava_state_neg(mirror), fp);
+
+  BLOCK_COMMENT("} generate_fixed_frame: initialize interpreter state");
+
+  //=============================================================================
+  if (!native_call) {
+    // Fill locals with 0x0s.
+    NearLabel locals_zeroed;
+    NearLabel doXC;
+
+    // Local_count is already num_locals_slots - num_param_slots.
+    __ compare64_and_branch(local_count, (intptr_t)0L, Assembler::bcondNotHigh, locals_zeroed);
+
+    // Advance local_addr to point behind locals (creates positive incr. in loop).
+    __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+    __ z_llgh(Z_R0_scratch,
+              Address(Z_R1_scratch, ConstMethod::size_of_locals_offset()));
+    if (Z_R0_scratch == Z_R0) {
+      __ z_aghi(Z_R0_scratch, -1);
+    } else {
+      __ add2reg(Z_R0_scratch, -1);
+    }
+    __ z_lgr(local_addr/*locals*/, Z_locals);
+    __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogBytesPerWord);
+    __ z_sllg(local_count, local_count, LogBytesPerWord); // Local_count are non param locals.
+    __ z_sgr(local_addr, Z_R0_scratch);
+
+    if (VM_Version::has_Prefetch()) {
+      __ z_pfd(0x02, 0, Z_R0, local_addr);
+      __ z_pfd(0x02, 256, Z_R0, local_addr);
+    }
+
+    // Can't optimise for Z10 using "compare and branch" (immediate value is too big).
+    __ z_cghi(local_count, 256);
+    __ z_brnh(doXC);
+
+    // MVCLE: Initialize if quite a lot locals.
+    //  __ bind(doMVCLE);
+    __ z_lgr(Z_R0_scratch, local_addr);
+    __ z_lgr(Z_R1_scratch, local_count);
+    __ clear_reg(Z_ARG2);        // Src len of MVCLE is zero.
+
+    __ MacroAssembler::move_long_ext(Z_R0_scratch, Z_ARG1, 0);
+    __ z_bru(locals_zeroed);
+
+    Label  XC_template;
+    __ bind(XC_template);
+    __ z_xc(0, 0, local_addr, 0, local_addr);
+
+    __ bind(doXC);
+    __ z_bctgr(local_count, Z_R0);                  // Get #bytes-1 for EXECUTE.
+    if (VM_Version::has_ExecuteExtensions()) {
+      __ z_exrl(local_count, XC_template);          // Execute XC with variable length.
+    } else {
+      __ z_larl(Z_R1_scratch, XC_template);
+      __ z_ex(local_count, 0, Z_R0, Z_R1_scratch);  // Execute XC with variable length.
+    }
+
+    __ bind(locals_zeroed);
+  }
+
+  }
+  // Finally set the frame pointer, destroying Z_method.
+  assert(Z_fp == Z_method, "maybe set Z_fp earlier if other register than Z_method");
+  // Oprofile analysis suggests to keep a copy in a register to be used by
+  // generate_counter_incr().
+  __ z_lgr(Z_ARG2, Z_method);
+  __ z_lgr(Z_fp, fp);
+
+  BLOCK_COMMENT("} generate_fixed_frame");
+}
+
+// Various method entries
+
+// Math function, frame manager must set up an interpreter state, etc.
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (!InlineIntrinsics) { return NULL; } // Generate a vanilla entry.
+
+  // Only support absolute value and square root.
+  if (kind != Interpreter::java_lang_math_abs && kind != Interpreter::java_lang_math_sqrt) {
+    return NULL;
+  }
+
+  BLOCK_COMMENT("math_entry {");
+
+  address math_entry = __ pc();
+
+  if (kind == Interpreter::java_lang_math_abs) {
+    // Load operand from stack.
+    __ mem2freg_opt(Z_FRET, Address(Z_esp, Interpreter::stackElementSize));
+    __ z_lpdbr(Z_FRET);
+  } else {
+    // sqrt
+    // Can use memory operand directly.
+    __ z_sqdb(Z_FRET, Interpreter::stackElementSize, Z_esp);
+  }
+
+  // Restore caller sp for c2i case.
+  __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+  // We are done, return.
+  __ z_br(Z_R14);
+
+  BLOCK_COMMENT("} math_entry");
+
+  return math_entry;
+}
+
+// Interpreter stub for calling a native method. (asm interpreter).
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+  // Determine code generation flags.
+  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+  // Interpreter entry for ordinary Java methods.
+  //
+  // Registers alive
+  //   Z_SP          - stack pointer
+  //   Z_thread      - JavaThread*
+  //   Z_method      - callee's method (method to be invoked)
+  //   Z_esp         - operand (or expression) stack pointer of caller. one slot above last arg.
+  //   Z_R10         - sender sp (before modifications, e.g. by c2i adapter
+  //                   and as well by generate_fixed_frame below)
+  //   Z_R14         - return address to caller (call_stub or c2i_adapter)
+  //
+  // Registers updated
+  //   Z_SP          - stack pointer
+  //   Z_fp          - callee's framepointer
+  //   Z_esp         - callee's operand stack pointer
+  //                   points to the slot above the value on top
+  //   Z_locals      - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+  //   Z_tos         - integer result, if any
+  //   z_ftos        - floating point result, if any
+  //
+  // Stack layout at this point:
+  //
+  //   F1      [TOP_IJAVA_FRAME_ABI]         <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if
+  //                                                          frame was extended by c2i adapter)
+  //           [outgoing Java arguments]     <-- Z_esp
+  //           ...
+  //   PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+  //
+
+  address entry_point = __ pc();
+
+  // Make sure registers are different!
+  assert_different_registers(Z_thread, Z_method, Z_esp);
+
+  BLOCK_COMMENT("native_entry {");
+
+  // Make sure method is native and not abstract.
+#ifdef ASSERT
+  address reentry = NULL;
+  { Label L;
+    __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT);
+    __ z_btrue(L);
+    reentry = __ stop_chain_static(reentry, "tried to execute non-native method as native");
+    __ bind(L);
+  }
+  { Label L;
+    __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT);
+    __ z_bfalse(L);
+    reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract");
+    __ bind(L);
+  }
+#endif // ASSERT
+
+#ifdef ASSERT
+  // Save the return PC into the callers frame for assertion in generate_fixed_frame.
+  __ save_return_pc(Z_R14);
+#endif
+
+  // Generate the code to allocate the interpreter stack frame.
+  generate_fixed_frame(true);
+
+  const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset());
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. If any exception was thrown by
+  // runtime, exception handling i.e. unlock_if_synchronized_method will
+  // check this thread local flag.
+  __ z_mvi(do_not_unlock_if_synchronized, true);
+
+  // Increment invocation count and check for overflow.
+  NearLabel invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+  bang_stack_shadow_pages(true);
+
+  // Reset the _do_not_unlock_if_synchronized flag.
+  __ z_mvi(do_not_unlock_if_synchronized, false);
+
+  // Check for synchronized methods.
+  // This mst happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    lock_method();
+  } else {
+    // No synchronization necessary.
+#ifdef ASSERT
+    { Label L;
+      __ get_method(Z_R1_scratch);
+      __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+      __ z_bfalse(L);
+      reentry = __ stop_chain_static(reentry, "method needs synchronization");
+      __ bind(L);
+    }
+#endif // ASSERT
+  }
+
+  // start execution
+
+  // jvmti support
+  __ notify_method_entry();
+
+  //=============================================================================
+  // Get and call the signature handler.
+  const Register Rmethod                 = Z_tmp_2;
+  const Register signature_handler_entry = Z_tmp_1;
+  const Register Rresult_handler         = Z_tmp_3;
+  Label call_signature_handler;
+
+  assert_different_registers(Z_fp, Rmethod, signature_handler_entry, Rresult_handler);
+  assert(Rresult_handler->is_nonvolatile(), "Rresult_handler must be in a non-volatile register");
+
+  // Reload method.
+  __ get_method(Rmethod);
+
+  // Check for signature handler.
+  __ load_and_test_long(signature_handler_entry, method2_(Rmethod, signature_handler));
+  __ z_brne(call_signature_handler);
+
+  // Method has never been called. Either generate a specialized
+  // handler or point to the slow one.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call),
+             Rmethod);
+
+  // Reload method.
+  __ get_method(Rmethod);
+
+  // Reload signature handler, it must have been created/assigned in the meantime.
+  __ z_lg(signature_handler_entry, method2_(Rmethod, signature_handler));
+
+  __ bind(call_signature_handler);
+
+  // We have a TOP_IJAVA_FRAME here, which belongs to us.
+  __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1/*tmp*/);
+
+  // Call signature handler and pass locals address in Z_ARG1.
+  __ z_lgr(Z_ARG1, Z_locals);
+  __ call_stub(signature_handler_entry);
+  // Save result handler returned by signature handler.
+  __ z_lgr(Rresult_handler, Z_RET);
+
+  // Reload method (the slow signature handler may block for GC).
+  __ get_method(Rmethod);
+
+  // Pass mirror handle if static call.
+  {
+    Label method_is_not_static;
+    __ testbit(method2_(Rmethod, access_flags), JVM_ACC_STATIC_BIT);
+    __ z_bfalse(method_is_not_static);
+    // Get mirror.
+    __ load_mirror(Z_R1, Rmethod);
+    // z_ijava_state.oop_temp = pool_holder->klass_part()->java_mirror();
+    __ z_stg(Z_R1, oop_tmp_offset, Z_fp);
+    // Pass handle to mirror as 2nd argument to JNI method.
+    __ add2reg(Z_ARG2, oop_tmp_offset, Z_fp);
+    __ bind(method_is_not_static);
+  }
+
+  // Pass JNIEnv address as first parameter.
+  __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
+
+  // Note: last java frame has been set above already. The pc from there
+  // is precise enough.
+
+  // Get native function entry point before we change the thread state.
+  __ z_lg(Z_R1/*native_method_entry*/, method2_(Rmethod, native_function));
+
+  //=============================================================================
+  // Transition from _thread_in_Java to _thread_in_native. As soon as
+  // we make this change the safepoint code needs to be certain that
+  // the last Java frame we established is good. The pc in that frame
+  // just need to be near here not an actual return address.
+#ifdef ASSERT
+  {
+    NearLabel L;
+    __ mem2reg_opt(Z_R14, Address(Z_thread, JavaThread::thread_state_offset()), false /*32 bits*/);
+    __ compareU32_and_branch(Z_R14, _thread_in_Java, Assembler::bcondEqual, L);
+    reentry = __ stop_chain_static(reentry, "Wrong thread state in native stub");
+    __ bind(L);
+  }
+#endif
+
+  // Memory ordering: Z does not reorder store/load with subsequent load. That's strong enough.
+  __ set_thread_state(_thread_in_native);
+
+  //=============================================================================
+  // Call the native method. Argument registers must not have been
+  // overwritten since "__ call_stub(signature_handler);" (except for
+  // ARG1 and ARG2 for static methods).
+
+  __ call_c(Z_R1/*native_method_entry*/);
+
+  // NOTE: frame::interpreter_frame_result() depends on these stores.
+  __ z_stg(Z_RET, _z_ijava_state_neg(lresult), Z_fp);
+  __ freg2mem_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult)));
+  const Register Rlresult = signature_handler_entry;
+  assert(Rlresult->is_nonvolatile(), "Rlresult must be in a non-volatile register");
+  __ z_lgr(Rlresult, Z_RET);
+
+  // Z_method may no longer be valid, because of GC.
+
+  // Block, if necessary, before resuming in _thread_in_Java state.
+  // In order for GC to work, don't clear the last_Java_sp until after
+  // blocking.
+
+  //=============================================================================
+  // Switch thread to "native transition" state before reading the
+  // synchronization state. This additional state is necessary
+  // because reading and testing the synchronization state is not
+  // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
+  // in _thread_in_native state, loads _not_synchronized and is
+  // preempted. VM thread changes sync state to synchronizing and
+  // suspends threads for GC. Thread A is resumed to finish this
+  // native method, but doesn't block here since it didn't see any
+  // synchronization is progress, and escapes.
+
+  __ set_thread_state(_thread_in_native_trans);
+  if (UseMembar) {
+    __ z_fence();
+  } else {
+    // Write serialization page so VM thread can do a pseudo remote
+    // membar. We use the current thread pointer to calculate a thread
+    // specific offset to write to within the page. This minimizes bus
+    // traffic due to cache line collision.
+    __ serialize_memory(Z_thread, Z_R1, Z_R0);
+  }
+  // Now before we return to java we must look for a current safepoint
+  // (a new safepoint can not start since we entered native_trans).
+  // We must check here because a current safepoint could be modifying
+  // the callers registers right this moment.
+
+  // Check for safepoint operation in progress and/or pending suspend requests.
+  {
+    Label Continue, do_safepoint;
+    __ generate_safepoint_check(do_safepoint, Z_R1, true);
+    // Check for suspend.
+    __ load_and_test_int(Z_R0/*suspend_flags*/, thread_(suspend_flags));
+    __ z_bre(Continue); // 0 -> no flag set -> not suspended
+    __ bind(do_safepoint);
+    __ z_lgr(Z_ARG1, Z_thread);
+    __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+    __ bind(Continue);
+  }
+
+  //=============================================================================
+  // Back in Interpreter Frame.
+
+  // We are in thread_in_native_trans here and back in the normal
+  // interpreter frame. We don't have to do anything special about
+  // safepoints and we can switch to Java mode anytime we are ready.
+
+  // Note: frame::interpreter_frame_result has a dependency on how the
+  // method result is saved across the call to post_method_exit. For
+  // native methods it assumes that the non-FPU/non-void result is
+  // saved in z_ijava_state.lresult and a FPU result in z_ijava_state.fresult. If
+  // this changes then the interpreter_frame_result implementation
+  // will need to be updated too.
+
+  //=============================================================================
+  // Back in Java.
+
+  // Memory ordering: Z does not reorder store/load with subsequent
+  // load. That's strong enough.
+  __ set_thread_state(_thread_in_Java);
+
+  __ reset_last_Java_frame();
+
+  // We reset the JNI handle block only after unboxing the result; see below.
+
+  // The method register is junk from after the thread_in_native transition
+  // until here. Also can't call_VM until the bcp has been
+  // restored. Need bcp for throwing exception below so get it now.
+  __ get_method(Rmethod);
+
+  // Restore Z_bcp to have legal interpreter frame,
+  // i.e., bci == 0 <=> Z_bcp == code_base().
+  __ z_lg(Z_bcp, Address(Rmethod, Method::const_offset())); // get constMethod
+  __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset())); // get codebase
+
+  if (CheckJNICalls) {
+    // clear_pending_jni_exception_check
+    __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
+  }
+
+  // Check if the native method returns an oop, and if so, move it
+  // from the jni handle to z_ijava_state.oop_temp. This is
+  // necessary, because we reset the jni handle block below.
+  // NOTE: frame::interpreter_frame_result() depends on this, too.
+  { NearLabel no_oop_result, store_oop_result;
+  __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT));
+  __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_result);
+  __ compareU64_and_branch(Rlresult, (intptr_t)0L, Assembler::bcondEqual, store_oop_result);
+  __ z_lg(Rlresult, 0, Rlresult);  // unbox
+  __ bind(store_oop_result);
+  __ z_stg(Rlresult, oop_tmp_offset, Z_fp);
+  __ verify_oop(Rlresult);
+  __ bind(no_oop_result);
+  }
+
+  // Reset handle block.
+  __ z_lg(Z_R1/*active_handles*/, thread_(active_handles));
+  __ clear_mem(Address(Z_R1, JNIHandleBlock::top_offset_in_bytes()), 4);
+
+  // Bandle exceptions (exception handling will handle unlocking!).
+  {
+    Label L;
+    __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception));
+    __ z_bre(L);
+    __ MacroAssembler::call_VM(noreg,
+                               CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  if (synchronized) {
+    Register Rfirst_monitor = Z_ARG2;
+    __ add2reg(Rfirst_monitor, -(frame::z_ijava_state_size + (int)sizeof(BasicObjectLock)), Z_fp);
+#ifdef ASSERT
+    NearLabel ok;
+    __ z_lg(Z_R1, _z_ijava_state_neg(monitors), Z_fp);
+    __ compareU64_and_branch(Rfirst_monitor, Z_R1, Assembler::bcondEqual, ok);
+    reentry = __ stop_chain_static(reentry, "native_entry:unlock: inconsistent z_ijava_state.monitors");
+    __ bind(ok);
+#endif
+    __ unlock_object(Rfirst_monitor);
+  }
+
+  // JVMTI support. Result has already been saved above to the frame.
+  __ notify_method_exit(true/*native_method*/, ilgl, InterpreterMacroAssembler::NotifyJVMTI);
+
+  // Move native method result back into proper registers and return.
+  // C++ interpreter does not use result handler. So do we need to here? TODO(ZASM): check if correct.
+  { NearLabel no_oop_or_null;
+  __ mem2freg_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult)));
+  __ load_and_test_long(Z_RET, Address(Z_fp, _z_ijava_state_neg(lresult)));
+  __ z_bre(no_oop_or_null); // No unboxing if the result is NULL.
+  __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT));
+  __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_or_null);
+  __ z_lg(Z_RET, oop_tmp_offset, Z_fp);
+  __ verify_oop(Z_RET);
+  __ bind(no_oop_or_null);
+  }
+
+  // Pop the native method's interpreter frame.
+  __ pop_interpreter_frame(Z_R14 /*return_pc*/, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/);
+
+  // Return to caller.
+  __ z_br(Z_R14);
+
+  if (inc_counter) {
+    // Handle overflow of counter and compile method.
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(continue_after_compile);
+  }
+
+  BLOCK_COMMENT("} native_entry");
+
+  return entry_point;
+}
+
+//
+// Generic interpreted method entry to template interpreter.
+//
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+  address entry_point = __ pc();
+
+  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+  // Interpreter entry for ordinary Java methods.
+  //
+  // Registers alive
+  //   Z_SP       - stack pointer
+  //   Z_thread   - JavaThread*
+  //   Z_method   - callee's method (method to be invoked)
+  //   Z_esp      - operand (or expression) stack pointer of caller. one slot above last arg.
+  //   Z_R10      - sender sp (before modifications, e.g. by c2i adapter
+  //                           and as well by generate_fixed_frame below)
+  //   Z_R14      - return address to caller (call_stub or c2i_adapter)
+  //
+  // Registers updated
+  //   Z_SP       - stack pointer
+  //   Z_fp       - callee's framepointer
+  //   Z_esp      - callee's operand stack pointer
+  //                points to the slot above the value on top
+  //   Z_locals   - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+  //   Z_tos      - integer result, if any
+  //   z_ftos     - floating point result, if any
+  //
+  //
+  // stack layout at this point:
+  //
+  //   F1      [TOP_IJAVA_FRAME_ABI]         <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if
+  //                                                          frame was extended by c2i adapter)
+  //           [outgoing Java arguments]     <-- Z_esp
+  //           ...
+  //   PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+  //
+  // stack layout before dispatching the first bytecode:
+  //
+  //   F0      [TOP_IJAVA_FRAME_ABI]         <-- Z_SP
+  //           [operand stack]               <-- Z_esp
+  //           monitor (optional, can grow)
+  //           [IJAVA_STATE]
+  //   F1      [PARENT_IJAVA_FRAME_ABI]      <-- Z_fp (== *Z_SP)
+  //           [F0's locals]                 <-- Z_locals
+  //           [F1's operand stack]
+  //           [F1's monitors] (optional)
+  //           [IJAVA_STATE]
+
+  // Make sure registers are different!
+  assert_different_registers(Z_thread, Z_method, Z_esp);
+
+  BLOCK_COMMENT("normal_entry {");
+
+  // Make sure method is not native and not abstract.
+  // Rethink these assertions - they can be simplified and shared.
+#ifdef ASSERT
+  address reentry = NULL;
+  { Label L;
+    __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT);
+    __ z_bfalse(L);
+    reentry = __ stop_chain_static(reentry, "tried to execute native method as non-native");
+    __ bind(L);
+  }
+  { Label L;
+    __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT);
+    __ z_bfalse(L);
+    reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract");
+    __ bind(L);
+  }
+#endif // ASSERT
+
+#ifdef ASSERT
+  // Save the return PC into the callers frame for assertion in generate_fixed_frame.
+  __ save_return_pc(Z_R14);
+#endif
+
+  // Generate the code to allocate the interpreter stack frame.
+  generate_fixed_frame(false);
+
+  const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset());
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. If any exception was thrown by
+  // runtime, exception handling i.e. unlock_if_synchronized_method will
+  // check this thread local flag.
+  __ z_mvi(do_not_unlock_if_synchronized, true);
+
+  __ profile_parameters_type(Z_tmp_2, Z_ARG3, Z_ARG4);
+
+  // Increment invocation counter and check for overflow.
+  //
+  // Note: checking for negative value instead of overflow so we have a 'sticky'
+  // overflow test (may be of importance as soon as we have true MT/MP).
+  NearLabel invocation_counter_overflow;
+  NearLabel profile_method;
+  NearLabel profile_method_continue;
+  NearLabel Lcontinue;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue);
+    if (ProfileInterpreter) {
+      __ bind(profile_method_continue);
+    }
+  }
+  __ bind(Lcontinue);
+
+  bang_stack_shadow_pages(false);
+
+  // Reset the _do_not_unlock_if_synchronized flag.
+  __ z_mvi(do_not_unlock_if_synchronized, false);
+
+  // Check for synchronized methods.
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    // Allocate monitor and lock method.
+    lock_method();
+  } else {
+#ifdef ASSERT
+    { Label L;
+      __ get_method(Z_R1_scratch);
+      __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+      __ z_bfalse(L);
+      reentry = __ stop_chain_static(reentry, "method needs synchronization");
+      __ bind(L);
+    }
+#endif // ASSERT
+  }
+
+  // start execution
+
+#ifdef ASSERT
+  __ verify_esp(Z_esp, Z_R1_scratch);
+
+  __ verify_thread();
+#endif
+
+  // jvmti support
+  __ notify_method_entry();
+
+  // Start executing instructions.
+  __ dispatch_next(vtos);
+  // Dispatch_next does not return.
+  DEBUG_ONLY(__ should_not_reach_here());
+
+  // Invocation counter overflow.
+  if (inc_counter) {
+    if (ProfileInterpreter) {
+      // We have decided to profile this method in the interpreter.
+      __ bind(profile_method);
+
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ set_method_data_pointer_for_bcp();
+      __ z_bru(profile_method_continue);
+    }
+
+    // Handle invocation counter overflow.
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(Lcontinue);
+  }
+
+  BLOCK_COMMENT("} normal_entry");
+
+  return entry_point;
+}
+
+// Method entry for static native methods:
+//   int java.util.zip.CRC32.update(int crc, int b)
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
+
+  if (UseCRC32Intrinsics) {
+    uint64_t entry_off = __ offset();
+    Label    slow_path;
+
+    // If we need a safepoint check, generate full interpreter entry.
+    __ generate_safepoint_check(slow_path, Z_R1, false);
+
+    BLOCK_COMMENT("CRC32_update {");
+
+    // We don't generate local frame and don't align stack because
+    // we not even call stub code (we generate the code inline)
+    // and there is no safepoint on this path.
+
+    // Load java parameters.
+    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+    const Register argP    = Z_esp;
+    const Register crc     = Z_ARG1;  // crc value
+    const Register data    = Z_ARG2;  // address of java byte value (kernel_crc32 needs address)
+    const Register dataLen = Z_ARG3;  // source data len (1 byte). Not used because calling the single-byte emitter.
+    const Register table   = Z_ARG4;  // address of crc32 table
+
+    // Arguments are reversed on java expression stack.
+    __ z_la(data, 3+1*wordSize, argP);  // byte value (stack address).
+                                        // Being passed as an int, the single byte is at offset +3.
+    __ z_llgf(crc, 2 * wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register.
+
+    StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+    __ kernel_crc32_singleByte(crc, data, dataLen, table, Z_R1);
+
+    // Restore caller sp for c2i case.
+    __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+    __ z_br(Z_R14);
+
+    BLOCK_COMMENT("} CRC32_update");
+
+    // Use a previously generated vanilla native entry as the slow path.
+    BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1);
+    return __ addr_at(entry_off);
+  }
+
+  return NULL;
+}
+
+
+// Method entry for static native methods:
+//   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+//   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (UseCRC32Intrinsics) {
+    uint64_t entry_off = __ offset();
+    Label    slow_path;
+
+    // If we need a safepoint check, generate full interpreter entry.
+    __ generate_safepoint_check(slow_path, Z_R1, false);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters.
+    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+    const Register argP    = Z_esp;
+    const Register crc     = Z_ARG1;  // crc value
+    const Register data    = Z_ARG2;  // address of java byte array
+    const Register dataLen = Z_ARG3;  // source data len
+    const Register table   = Z_ARG4;  // address of crc32 table
+    const Register t0      = Z_R10;   // work reg for kernel* emitters
+    const Register t1      = Z_R11;   // work reg for kernel* emitters
+    const Register t2      = Z_R12;   // work reg for kernel* emitters
+    const Register t3      = Z_R13;   // work reg for kernel* emitters
+
+    // Arguments are reversed on java expression stack.
+    // Calculate address of start element.
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct".
+      // crc     @ (SP + 5W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to long array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off
+      BLOCK_COMMENT("CRC32_updateByteBuffer {");
+      __ z_llgf(crc,    5*wordSize, argP);  // current crc state
+      __ z_lg(data,    3*wordSize, argP);   // start of byte buffer
+      __ z_agf(data,    2*wordSize, argP);  // Add byte buffer offset.
+      __ z_lgf(dataLen, 1*wordSize, argP);  // #bytes to process
+    } else {                         // Used for "updateBytes update".
+      // crc     @ (SP + 4W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to byte array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off + base_offset
+      BLOCK_COMMENT("CRC32_updateBytes {");
+      __ z_llgf(crc,    4*wordSize, argP);  // current crc state
+      __ z_lg(data,    3*wordSize, argP);   // start of byte buffer
+      __ z_agf(data,    2*wordSize, argP);  // Add byte buffer offset.
+      __ z_lgf(dataLen, 1*wordSize, argP);  // #bytes to process
+      __ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    }
+
+    StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+
+    __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+    __ z_stmg(t0, t3, 1*8, Z_SP);        // Spill regs 10..13 to make them available as work registers.
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3);
+    __ z_lmg(t0, t3, 1*8, Z_SP);         // Spill regs 10..13 back from stack.
+
+    // Restore caller sp for c2i case.
+    __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+    __ z_br(Z_R14);
+
+    BLOCK_COMMENT("} CRC32_update{Bytes|ByteBuffer}");
+
+    // Use a previously generated vanilla native entry as the slow path.
+    BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1);
+    return __ addr_at(entry_off);
+  }
+
+  return NULL;
+}
+
+// Not supported
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  return NULL;
+}
+
+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+  // Quick & dirty stack overflow checking: bang the stack & handle trap.
+  // Note that we do the banging after the frame is setup, since the exception
+  // handling code expects to find a valid interpreter frame on the stack.
+  // Doing the banging earlier fails if the caller frame is not an interpreter
+  // frame.
+  // (Also, the exception throwing code expects to unlock any synchronized
+  // method receiver, so do the banging after locking the receiver.)
+
+  // Bang each page in the shadow zone. We can't assume it's been done for
+  // an interpreter frame with greater than a page of locals, so each page
+  // needs to be checked. Only true for non-native. For native, we only bang the last page.
+  if (UseStackBanging) {
+    const int page_size      = os::vm_page_size();
+    const int n_shadow_pages = (int)(JavaThread::stack_shadow_zone_size()/page_size);
+    const int start_page_num = native_call ? n_shadow_pages : 1;
+    for (int pages = start_page_num; pages <= n_shadow_pages; pages++) {
+      __ bang_stack_with_offset(pages*page_size);
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+
+  BLOCK_COMMENT("throw_exception {");
+
+  // Entry point in previous activation (i.e., if the caller was interpreted).
+  Interpreter::_rethrow_exception_entry = __ pc();
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Frame accessors use Z_fp.
+  // Z_ARG1 (==Z_tos): exception
+  // Z_ARG2          : Return address/pc that threw exception.
+  __ restore_bcp();    // R13 points to call/send.
+  __ restore_locals();
+
+  // Fallthrough, no need to restore Z_esp.
+
+  // Entry point for exceptions thrown within interpreter code.
+  Interpreter::_throw_exception_entry = __ pc();
+  // Expression stack is undefined here.
+  // Z_ARG1 (==Z_tos): exception
+  // Z_bcp: exception bcp
+  __ verify_oop(Z_ARG1);
+  __ z_lgr(Z_ARG2, Z_ARG1);
+
+  // Expression stack must be empty before entering the VM in case of
+  // an exception.
+  __ empty_expression_stack();
+  // Find exception handler address and preserve exception oop.
+  const Register Rpreserved_exc_oop = Z_tmp_1;
+  __ call_VM(Rpreserved_exc_oop,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception),
+             Z_ARG2);
+  // Z_RET: exception handler entry point
+  // Z_bcp: bcp for exception handler
+  __ push_ptr(Rpreserved_exc_oop); // Push exception which is now the only value on the stack.
+  __ z_br(Z_RET); // Jump to exception handler (may be _remove_activation_entry!).
+
+  // If the exception is not handled in the current frame the frame is
+  // removed and the exception is rethrown (i.e. exception
+  // continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bci for the instruction
+  // which caused the exception and the expression stack is
+  // empty. Thus, for any VM calls at this point, GC will find a legal
+  // oop map (with empty expression stack).
+
+  //
+  // JVMTI PopFrame support
+  //
+
+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP);
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition
+  // indicating that we are currently handling popframe, so that
+  // call_VMs that may happen later do not trigger new popframe
+  // handling cycles.
+  __ load_sized_value(Z_tmp_1, Address(Z_thread, JavaThread::popframe_condition_offset()), 4, false /*signed*/);
+  __ z_oill(Z_tmp_1, JavaThread::popframe_processing_bit);
+  __ z_sty(Z_tmp_1, thread_(popframe_condition));
+
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    NearLabel caller_not_deoptimized;
+    __ z_lg(Z_ARG1, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), Z_ARG1);
+    __ compareU64_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, caller_not_deoptimized);
+
+    // Compute size of arguments for saving when returning to
+    // deoptimized caller.
+    __ get_method(Z_ARG2);
+    __ z_lg(Z_ARG2, Address(Z_ARG2, Method::const_offset()));
+    __ z_llgh(Z_ARG2, Address(Z_ARG2, ConstMethod::size_of_parameters_offset()));
+    __ z_sllg(Z_ARG2, Z_ARG2, Interpreter::logStackElementSize); // slots 2 bytes
+    __ restore_locals();
+    // Compute address of args to be saved.
+    __ z_lgr(Z_ARG3, Z_locals);
+    __ z_slgr(Z_ARG3, Z_ARG2);
+    __ add2reg(Z_ARG3, wordSize);
+    // Save these arguments.
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args),
+                    Z_thread, Z_ARG2, Z_ARG3);
+
+    __ remove_activation(vtos, Z_R14,
+                         /* throw_monitor_exception */ false,
+                         /* install_monitor_exception */ false,
+                         /* notify_jvmdi */ false);
+
+    // Inform deoptimization that it is responsible for restoring
+    // these arguments.
+    __ store_const(thread_(popframe_condition),
+                   JavaThread::popframe_force_deopt_reexecution_bit,
+                   Z_tmp_1, false);
+
+    // Continue in deoptimization handler.
+    __ z_br(Z_R14);
+
+    __ bind(caller_not_deoptimized);
+  }
+
+  // Clear the popframe condition flag.
+  __ clear_mem(thread_(popframe_condition), sizeof(int));
+
+  __ remove_activation(vtos,
+                       noreg,  // Retaddr is not used.
+                       false,  // throw_monitor_exception
+                       false,  // install_monitor_exception
+                       false); // notify_jvmdi
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+#if INCLUDE_JVMTI
+  {
+    Label L_done;
+
+    __ z_cli(0, Z_bcp, Bytecodes::_invokestatic);
+    __ z_brc(Assembler::bcondNotEqual, L_done);
+
+    // The member name argument must be restored if _invokestatic is
+    // re-executed after a PopFrame call.  Detect such a case in the
+    // InterpreterRuntime function and return the member name
+    // argument, or NULL.
+    __ z_lg(Z_ARG2, Address(Z_locals));
+    __ get_method(Z_ARG3);
+    __ call_VM(Z_tmp_1,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),
+               Z_ARG2, Z_ARG3, Z_bcp);
+
+    __ z_ltgr(Z_tmp_1, Z_tmp_1);
+    __ z_brc(Assembler::bcondEqual, L_done);
+
+    __ z_stg(Z_tmp_1, Address(Z_esp, wordSize));
+    __ bind(L_done);
+  }
+#endif // INCLUDE_JVMTI
+  __ dispatch_next(vtos);
+  // End of PopFrame support.
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects the
+  // following registers set up:
+  //
+  // Z_ARG1 (==Z_tos): exception
+  // Z_ARG2          : return address/pc that threw exception
+
+  Register return_pc = Z_tmp_1;
+  Register handler   = Z_tmp_2;
+   assert(return_pc->is_nonvolatile(), "use non-volatile reg. to preserve exception pc");
+   assert(handler->is_nonvolatile(),   "use non-volatile reg. to handler pc");
+  __ asm_assert_ijava_state_magic(return_pc/*tmp*/); // The top frame should be an interpreter frame.
+  __ z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+
+  // Moved removing the activation after VM call, because the new top
+  // frame does not necessarily have the z_abi_160 required for a VM
+  // call (e.g. if it is compiled).
+
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                         SharedRuntime::exception_handler_for_return_address),
+                        Z_thread, return_pc);
+  __ z_lgr(handler, Z_RET); // Save exception handler.
+
+  // Preserve exception over this code sequence.
+  __ pop_ptr(Z_ARG1);
+  __ set_vm_result(Z_ARG1);
+  // Remove the activation (without doing throws on illegalMonitorExceptions).
+  __ remove_activation(vtos, noreg/*ret.pc already loaded*/, false/*throw exc*/, true/*install exc*/, false/*notify jvmti*/);
+  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+
+  __ get_vm_result(Z_ARG1);     // Restore exception.
+  __ verify_oop(Z_ARG1);
+  __ z_lgr(Z_ARG2, return_pc);  // Restore return address.
+
+#ifdef ASSERT
+  // The return_pc in the new top frame is dead... at least that's my
+  // current understanding. To assert this I overwrite it.
+  // Note: for compiled frames the handler is the deopt blob
+  // which writes Z_ARG2 into the return_pc slot.
+  __ load_const_optimized(return_pc, 0xb00b1);
+  __ z_stg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_SP);
+#endif
+
+  // Z_ARG1 (==Z_tos): exception
+  // Z_ARG2          : return address/pc that threw exception
+
+  // Note that an "issuing PC" is actually the next PC after the call.
+  __ z_br(handler);         // Jump to exception handler of caller.
+
+  BLOCK_COMMENT("} throw_exception");
+}
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for (TosState state) {
+  address entry = __ pc();
+
+  BLOCK_COMMENT("earlyret_entry {");
+
+  __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP);
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_esp();
+  __ empty_expression_stack();
+  __ load_earlyret_value(state);
+
+  Register RjvmtiState = Z_tmp_1;
+  __ z_lg(RjvmtiState, thread_(jvmti_thread_state));
+  __ store_const(Address(RjvmtiState, JvmtiThreadState::earlyret_state_offset()),
+                 JvmtiThreadState::earlyret_inactive, 4, 4, Z_R0_scratch);
+
+  __ remove_activation(state,
+                       Z_tmp_1, // retaddr
+                       false,   // throw_monitor_exception
+                       false,   // install_monitor_exception
+                       true);   // notify_jvmdi
+  __ z_br(Z_tmp_1);
+
+  BLOCK_COMMENT("} earlyret_entry");
+
+  return entry;
+}
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation.
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  aep = __ pc(); __ push_ptr(); __ z_bru(L);
+  fep = __ pc(); __ push_f();   __ z_bru(L);
+  dep = __ pc(); __ push_d();   __ z_bru(L);
+  lep = __ pc(); __ push_l();   __ z_bru(L);
+  bep = cep = sep =
+  iep = __ pc(); __ push_i();
+  vep = __ pc();
+  __ bind(L);
+  generate_and_dispatch(t);
+}
+
+//-----------------------------------------------------------------------------
+
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+  NearLabel counter_below_trace_threshold;
+
+  if (TraceBytecodesAt > 0) {
+    // Skip runtime call, if the trace threshold is not yet reached.
+    __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value);
+    __ load_absolute_address(Z_tmp_2, (address)&TraceBytecodesAt);
+    __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/);
+    __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/);
+    __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, counter_below_trace_threshold);
+  }
+
+  int offset2 = state == ltos || state == dtos ? 2 : 1;
+
+  __ push(state);
+  // Preserved return pointer is in Z_R14.
+  // InterpreterRuntime::trace_bytecode() preserved and returns the value passed as second argument.
+  __ z_lgr(Z_ARG2, Z_R14);
+  __ z_lg(Z_ARG3, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)));
+  if (WizardMode) {
+    __ z_lgr(Z_ARG4, Z_esp); // Trace Z_esp in WizardMode.
+  } else {
+    __ z_lg(Z_ARG4, Address(Z_esp, Interpreter::expr_offset_in_bytes(offset2)));
+  }
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), Z_ARG2, Z_ARG3, Z_ARG4);
+  __ z_lgr(Z_R14, Z_RET); // Estore return address (see above).
+  __ pop(state);
+
+  __ bind(counter_below_trace_threshold);
+  __ z_br(Z_R14); // return
+
+  return entry;
+}
+
+// Make feasible for old CPUs.
+void TemplateInterpreterGenerator::count_bytecode() {
+  __ load_absolute_address(Z_R1_scratch, (address) &BytecodeCounter::_counter_value);
+  __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template * t) {
+  __ load_absolute_address(Z_R1_scratch, (address)&BytecodeHistogram::_counters[ t->bytecode() ]);
+  __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template * t) {
+  Address  index_addr(Z_tmp_1, (intptr_t) 0);
+  Register index = Z_tmp_2;
+
+  // Load previous index.
+  __ load_absolute_address(Z_tmp_1, (address) &BytecodePairHistogram::_index);
+  __ mem2reg_opt(index, index_addr, false);
+
+  // Mask with current bytecode and store as new previous index.
+  __ z_srl(index, BytecodePairHistogram::log2_number_of_codes);
+  __ load_const_optimized(Z_R0_scratch,
+                          (int)t->bytecode() << BytecodePairHistogram::log2_number_of_codes);
+  __ z_or(index, Z_R0_scratch);
+  __ reg2mem_opt(index, index_addr, false);
+
+  // Load counter array's address.
+  __ z_lgfr(index, index);   // Sign extend for addressing.
+  __ z_sllg(index, index, LogBytesPerInt);  // index2bytes
+  __ load_absolute_address(Z_R1_scratch,
+                           (address) &BytecodePairHistogram::_counters);
+  // Add index and increment counter.
+  __ z_agr(Z_R1_scratch, index);
+  __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1);
+}
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+  address entry = Interpreter::trace_code(t->tos_in());
+  guarantee(entry != NULL, "entry must have been generated");
+  __ call_stub(entry);
+}
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  NearLabel L;
+
+  __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value);
+  __ load_absolute_address(Z_tmp_2, (address)&StopInterpreterAt);
+  __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/);
+  __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/);
+  __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, L);
+  assert(Z_tmp_1->is_nonvolatile(), "must be nonvolatile to preserve Z_tos");
+  assert(Z_F8->is_nonvolatile(), "must be nonvolatile to preserve Z_ftos");
+  __ z_lgr(Z_tmp_1, Z_tos);      // Save tos.
+  __ z_lgr(Z_tmp_2, Z_bytecode); // Save Z_bytecode.
+  __ z_ldr(Z_F8, Z_ftos);        // Save ftos.
+  // Use -XX:StopInterpreterAt=<num> to set the limit
+  // and break at breakpoint().
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, breakpoint), false);
+  __ z_lgr(Z_tos, Z_tmp_1);      // Restore tos.
+  __ z_lgr(Z_bytecode, Z_tmp_2); // Save Z_bytecode.
+  __ z_ldr(Z_ftos, Z_F8);        // Restore ftos.
+  __ bind(L);
+}
+
+#endif // !PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateTable_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,4250 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+
+#ifdef PRODUCT
+#define __ _masm->
+#define BLOCK_COMMENT(str)
+#define BIND(label)        __ bind(label);
+#else
+#define __ (PRODUCT_ONLY(false&&)Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+// The assumed minimum size of a BranchTableBlock.
+// The actual size of each block heavily depends on the CPU capabilities and,
+// of course, on the logic implemented in each block.
+#ifdef ASSERT
+  #define BTB_MINSIZE 256
+#else
+  #define BTB_MINSIZE  64
+#endif
+
+#ifdef ASSERT
+// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_BEGIN(lbl, alignment, name)                                        \
+  __ align_address(alignment);                                                 \
+  __ bind(lbl);                                                                \
+  { unsigned int b_off = __ offset();                                          \
+    uintptr_t   b_addr = (uintptr_t)__ pc();                                   \
+    __ z_larl(Z_R0, (int64_t)0);     /* Check current address alignment. */    \
+    __ z_slgr(Z_R0, br_tab);         /* Current Address must be equal    */    \
+    __ z_slgr(Z_R0, flags);          /* to calculated branch target.     */    \
+    __ z_brc(Assembler::bcondLogZero, 3); /* skip trap if ok. */               \
+    __ z_illtrap(0x55);                                                        \
+    guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name);
+
+// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_END(lbl, alignment, name)                                          \
+    uintptr_t   e_addr = (uintptr_t)__ pc();                                   \
+    unsigned int e_off = __ offset();                                          \
+    unsigned int len   = e_off-b_off;                                          \
+    if (len > alignment) {                                                     \
+      tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s",        \
+                    len, alignment, e_addr-len, name);                         \
+      guarantee(len <= alignment, "block too large");                          \
+    }                                                                          \
+    guarantee(len == e_addr-b_addr, "block len mismatch");                     \
+  }
+#else
+// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_BEGIN(lbl, alignment, name)                                        \
+  __ align_address(alignment);                                                 \
+  __ bind(lbl);                                                                \
+  { unsigned int b_off = __ offset();                                          \
+    uintptr_t   b_addr = (uintptr_t)__ pc();                                   \
+    guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name);
+
+// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_END(lbl, alignment, name)                                          \
+    uintptr_t   e_addr = (uintptr_t)__ pc();                                   \
+    unsigned int e_off = __ offset();                                          \
+    unsigned int len   = e_off-b_off;                                          \
+    if (len > alignment) {                                                     \
+      tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s",        \
+                    len, alignment, e_addr-len, name);                         \
+      guarantee(len <= alignment, "block too large");                          \
+    }                                                                          \
+    guarantee(len == e_addr-b_addr, "block len mismatch");                     \
+  }
+#endif // ASSERT
+
+// Platform-dependent initialization.
+
+void TemplateTable::pd_initialize() {
+  // No specific initialization.
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+  return Address(Z_locals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+
+// Pass NULL, if no shift instruction should be emitted.
+static inline Address iaddress(InterpreterMacroAssembler *masm, Register r) {
+  if (masm) {
+    masm->z_sllg(r, r, LogBytesPerWord);  // index2bytes
+  }
+  return Address(Z_locals, r, Interpreter::local_offset_in_bytes(0));
+}
+
+// Pass NULL, if no shift instruction should be emitted.
+static inline Address laddress(InterpreterMacroAssembler *masm, Register r) {
+  if (masm) {
+    masm->z_sllg(r, r, LogBytesPerWord);  // index2bytes
+  }
+  return Address(Z_locals, r, Interpreter::local_offset_in_bytes(1) );
+}
+
+static inline Address faddress(InterpreterMacroAssembler *masm, Register r) {
+  return iaddress(masm, r);
+}
+
+static inline Address daddress(InterpreterMacroAssembler *masm, Register r) {
+  return laddress(masm, r);
+}
+
+static inline Address aaddress(InterpreterMacroAssembler *masm, Register r) {
+  return iaddress(masm, r);
+}
+
+// At top of Java expression stack which may be different than esp(). It
+// isn't for category 1 objects.
+static inline Address at_tos(int slot = 0) {
+  return Address(Z_esp, Interpreter::expr_offset_in_bytes(slot));
+}
+
+// Condition conversion
+static Assembler::branch_condition j_not(TemplateTable::Condition cc) {
+  switch (cc) {
+    case TemplateTable::equal :
+      return Assembler::bcondNotEqual;
+    case TemplateTable::not_equal :
+      return Assembler::bcondEqual;
+    case TemplateTable::less :
+      return Assembler::bcondNotLow;
+    case TemplateTable::less_equal :
+      return Assembler::bcondHigh;
+    case TemplateTable::greater :
+      return Assembler::bcondNotHigh;
+    case TemplateTable::greater_equal:
+      return Assembler::bcondLow;
+  }
+  ShouldNotReachHere();
+  return Assembler::bcondZero;
+}
+
+// Do an oop store like *(base + offset) = val
+// offset can be a register or a constant.
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Register base,
+                         RegisterOrConstant offset,
+                         Register val,
+                         bool val_is_null, // == false does not guarantee that val really is not equal NULL.
+                         Register tmp1,    // If tmp3 is volatile, either tmp1 or tmp2 must be
+                         Register tmp2,    // non-volatile to hold a copy of pre_val across runtime calls.
+                         Register tmp3,    // Ideally, this tmp register is non-volatile, as it is used to
+                                           // hold pre_val (must survive runtime calls).
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  BLOCK_COMMENT("do_oop_store {");
+  assert(val != noreg, "val must always be valid, even if it is zero");
+  assert_different_registers(tmp1, tmp2, tmp3, val, base, offset.register_or_noreg());
+  __ verify_oop(val);
+  switch (barrier) {
+#if INCLUDE_ALL_GCS
+    case BarrierSet::G1SATBCTLogging:
+      {
+#ifdef ASSERT
+        if (val_is_null) { // Check if the flag setting reflects reality.
+          Label OK;
+          __ z_ltgr(val, val);
+          __ z_bre(OK);
+          __ z_illtrap(0x11);
+          __ bind(OK);
+        }
+#endif
+        Register pre_val = tmp3;
+        // Load and record the previous value.
+        __ g1_write_barrier_pre(base, offset, pre_val, val,
+                                tmp1, tmp2,
+                                false);  // Needs to hold pre_val in non_volatile register?
+
+        if (val_is_null) {
+          __ store_heap_oop_null(val, offset, base);
+        } else {
+          Label Done;
+          // val_is_null == false does not guarantee that val really is not equal NULL.
+          // Checking for this case dynamically has some cost, but also some benefit (in GC).
+          // It's hard to say if cost or benefit is greater.
+          { Label OK;
+            __ z_ltgr(val, val);
+            __ z_brne(OK);
+            __ store_heap_oop_null(val, offset, base);
+            __ z_bru(Done);
+            __ bind(OK);
+          }
+          // G1 barrier needs uncompressed oop for region cross check.
+          // Store_heap_oop compresses the oop in the argument register.
+          Register val_work = val;
+          if (UseCompressedOops) {
+            val_work = tmp3;
+            __ z_lgr(val_work, val);
+          }
+          __ store_heap_oop_not_null(val_work, offset, base);
+
+          // We need precise card marks for oop array stores.
+          // Otherwise, cardmarking the object which contains the oop is sufficient.
+          if (precise && !(offset.is_constant() && offset.as_constant() == 0)) {
+            __ add2reg_with_index(base,
+                                  offset.constant_or_zero(),
+                                  offset.register_or_noreg(),
+                                  base);
+          }
+          __ g1_write_barrier_post(base /* store_adr */, val, tmp1, tmp2, tmp3);
+          __ bind(Done);
+        }
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+    case BarrierSet::CardTableForRS:
+    case BarrierSet::CardTableExtension:
+    {
+      if (val_is_null) {
+        __ store_heap_oop_null(val, offset, base);
+      } else {
+        __ store_heap_oop(val, offset, base);
+        // Flatten object address if needed.
+        if (precise && ((offset.register_or_noreg() != noreg) || (offset.constant_or_zero() != 0))) {
+          __ load_address(base, Address(base, offset.register_or_noreg(), offset.constant_or_zero()));
+        }
+        __ card_write_barrier_post(base, tmp1);
+      }
+    }
+    break;
+  case BarrierSet::ModRef:
+    // fall through
+  default:
+    ShouldNotReachHere();
+
+  }
+  BLOCK_COMMENT("} do_oop_store");
+}
+
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(Z_bcp, offset);
+}
+
+void TemplateTable::patch_bytecode(Bytecodes::Code bc,
+                                   Register        bc_reg,
+                                   Register        temp_reg,
+                                   bool            load_bc_into_bc_reg, // = true
+                                   int             byte_no) {
+  if (!RewriteBytecodes) { return; }
+
+  NearLabel L_patch_done;
+  BLOCK_COMMENT("patch_bytecode {");
+
+  switch (bc) {
+    case Bytecodes::_fast_aputfield:
+    case Bytecodes::_fast_bputfield:
+    case Bytecodes::_fast_zputfield:
+    case Bytecodes::_fast_cputfield:
+    case Bytecodes::_fast_dputfield:
+    case Bytecodes::_fast_fputfield:
+    case Bytecodes::_fast_iputfield:
+    case Bytecodes::_fast_lputfield:
+    case Bytecodes::_fast_sputfield:
+      {
+        // We skip bytecode quickening for putfield instructions when
+        // the put_code written to the constant pool cache is zero.
+        // This is required so that every execution of this instruction
+        // calls out to InterpreterRuntime::resolve_get_put to do
+        // additional, required work.
+        assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+        assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+        __ get_cache_and_index_and_bytecode_at_bcp(Z_R1_scratch, bc_reg,
+                                                   temp_reg, byte_no, 1);
+        __ load_const_optimized(bc_reg, bc);
+        __ compareU32_and_branch(temp_reg, (intptr_t)0,
+                                 Assembler::bcondZero, L_patch_done);
+      }
+      break;
+    default:
+      assert(byte_no == -1, "sanity");
+      // The pair bytecodes have already done the load.
+      if (load_bc_into_bc_reg) {
+        __ load_const_optimized(bc_reg, bc);
+      }
+      break;
+  }
+
+  if (JvmtiExport::can_post_breakpoint()) {
+
+    Label   L_fast_patch;
+
+    // If a breakpoint is present we can't rewrite the stream directly.
+    __ z_cli(at_bcp(0), Bytecodes::_breakpoint);
+    __ z_brne(L_fast_patch);
+    __ get_method(temp_reg);
+    // Let breakpoint table handling rewrite to quicker bytecode.
+    __ call_VM_static(noreg,
+                      CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at),
+                      temp_reg, Z_R13, bc_reg);
+    __ z_bru(L_patch_done);
+
+    __ bind(L_fast_patch);
+  }
+
+#ifdef ASSERT
+  NearLabel   L_okay;
+
+  // We load into 64 bits, since this works on any CPU.
+  __ z_llgc(temp_reg, at_bcp(0));
+  __ compareU32_and_branch(temp_reg, Bytecodes::java_code(bc),
+                            Assembler::bcondEqual, L_okay        );
+  __ compareU32_and_branch(temp_reg, bc_reg, Assembler::bcondEqual, L_okay);
+  __ stop_static("patching the wrong bytecode");
+  __ bind(L_okay);
+#endif
+
+  // Patch bytecode.
+  __ z_stc(bc_reg, at_bcp(0));
+
+  __ bind(L_patch_done);
+  BLOCK_COMMENT("} patch_bytecode");
+}
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null() {
+  transition(vtos, atos);
+  __ clear_reg(Z_tos, true, false);
+}
+
+void TemplateTable::iconst(int value) {
+  transition(vtos, itos);
+  // Zero extension of the iconst makes zero extension at runtime obsolete.
+  __ load_const_optimized(Z_tos, ((unsigned long)(unsigned int)value));
+}
+
+void TemplateTable::lconst(int value) {
+  transition(vtos, ltos);
+  __ load_const_optimized(Z_tos, value);
+}
+
+// No pc-relative load/store for floats.
+void TemplateTable::fconst(int value) {
+  transition(vtos, ftos);
+  static float   one = 1.0f, two = 2.0f;
+
+  switch (value) {
+    case 0:
+      __ z_lzer(Z_ftos);
+      return;
+    case 1:
+      __ load_absolute_address(Z_R1_scratch, (address) &one);
+      __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false);
+      return;
+    case 2:
+      __ load_absolute_address(Z_R1_scratch, (address) &two);
+      __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false);
+      return;
+    default:
+      ShouldNotReachHere();
+      return;
+  }
+}
+
+void TemplateTable::dconst(int value) {
+  transition(vtos, dtos);
+  static double one = 1.0;
+
+  switch (value) {
+    case 0:
+      __ z_lzdr(Z_ftos);
+      return;
+    case 1:
+      __ load_absolute_address(Z_R1_scratch, (address) &one);
+      __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch));
+      return;
+    default:
+      ShouldNotReachHere();
+      return;
+  }
+}
+
+void TemplateTable::bipush() {
+  transition(vtos, itos);
+  __ z_lb(Z_tos, at_bcp(1));
+}
+
+void TemplateTable::sipush() {
+  transition(vtos, itos);
+  __ get_2_byte_integer_at_bcp(Z_tos, 1, InterpreterMacroAssembler::Signed);
+}
+
+
+void TemplateTable::ldc(bool wide) {
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, Done;
+  const Register RcpIndex = Z_tmp_1;
+  const Register Rtags = Z_ARG2;
+
+  if (wide) {
+    __ get_2_byte_integer_at_bcp(RcpIndex, 1, InterpreterMacroAssembler::Unsigned);
+  } else {
+    __ z_llgc(RcpIndex, at_bcp(1));
+  }
+
+  __ get_cpool_and_tags(Z_tmp_2, Rtags);
+
+  const int      base_offset = ConstantPool::header_size() * wordSize;
+  const int      tags_offset = Array<u1>::base_offset_in_bytes();
+  const Register Raddr_type = Rtags;
+
+  // Get address of type.
+  __ add2reg_with_index(Raddr_type, tags_offset, RcpIndex, Rtags);
+
+  __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClass);
+  __ z_bre(call_ldc);    // Unresolved class - get the resolved class.
+
+  __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClassInError);
+  __ z_bre(call_ldc);    // Unresolved class in error state - call into runtime
+                         // to throw the error from the first resolution attempt.
+
+  __ z_cli(0, Raddr_type, JVM_CONSTANT_Class);
+  __ z_brne(notClass);   // Resolved class - need to call vm to get java
+                         // mirror of the class.
+
+  // We deal with a class. Call vm to do the appropriate.
+  __ bind(call_ldc);
+  __ load_const_optimized(Z_ARG2, wide);
+  call_VM(Z_RET, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), Z_ARG2);
+  __ push_ptr(Z_RET);
+  __ z_bru(Done);
+
+  // Not a class.
+  __ bind(notClass);
+  Register RcpOffset = RcpIndex;
+  __ z_sllg(RcpOffset, RcpIndex, LogBytesPerWord); // Convert index to offset.
+  __ z_cli(0, Raddr_type, JVM_CONSTANT_Float);
+  __ z_brne(notFloat);
+
+  // ftos
+  __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, RcpOffset, base_offset), false);
+  __ push_f();
+  __ z_bru(Done);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  {
+    Label   L;
+
+    __ z_cli(0, Raddr_type, JVM_CONSTANT_Integer);
+    __ z_bre(L);
+    // String and Object are rewritten to fast_aldc.
+    __ stop("unexpected tag type in ldc");
+
+    __ bind(L);
+  }
+#endif
+
+  // itos
+  __ mem2reg_opt(Z_tos, Address(Z_tmp_2, RcpOffset, base_offset), false);
+  __ push_i(Z_tos);
+
+  __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+// %%% We should use this to handle Class and String constants also.
+// %%% It will simplify the ldc/primitive path considerably.
+void TemplateTable::fast_aldc(bool wide) {
+  transition(vtos, atos);
+
+  const Register index = Z_tmp_2;
+  int            index_size = wide ? sizeof(u2) : sizeof(u1);
+  Label          L_resolved;
+
+  // We are resolved if the resolved reference cache entry contains a
+  // non-null object (CallSite, etc.).
+  __ get_cache_index_at_bcp(index, 1, index_size);  // Load index.
+  __ load_resolved_reference_at_index(Z_tos, index);
+  __ z_ltgr(Z_tos, Z_tos);
+  __ z_brne(L_resolved);
+
+  // First time invocation - must resolve first.
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+  __ load_const_optimized(Z_ARG1, (int)bytecode());
+  __ call_VM(Z_tos, entry, Z_ARG1);
+
+  __ bind(L_resolved);
+  __ verify_oop(Z_tos);
+}
+
+void TemplateTable::ldc2_w() {
+  transition(vtos, vtos);
+  Label Long, Done;
+
+  // Z_tmp_1 = index of cp entry
+  __ get_2_byte_integer_at_bcp(Z_tmp_1, 1, InterpreterMacroAssembler::Unsigned);
+
+  __ get_cpool_and_tags(Z_tmp_2, Z_tos);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // Get address of type.
+  __ add2reg_with_index(Z_tos, tags_offset, Z_tos, Z_tmp_1);
+
+  // Index needed in both branches, so calculate here.
+  __ z_sllg(Z_tmp_1, Z_tmp_1, LogBytesPerWord);  // index2bytes
+
+  // Check type.
+  __ z_cli(0, Z_tos, JVM_CONSTANT_Double);
+  __ z_brne(Long);
+
+  // dtos
+  __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, Z_tmp_1, base_offset));
+  __ push_d();
+  __ z_bru(Done);
+
+  __ bind(Long);
+  // ltos
+  __ mem2reg_opt(Z_tos, Address(Z_tmp_2, Z_tmp_1, base_offset));
+  __ push_l();
+
+  __ bind(Done);
+}
+
+void TemplateTable::locals_index(Register reg, int offset) {
+  __ z_llgc(reg, at_bcp(offset));
+  __ z_lcgr(reg);
+}
+
+void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
+  transition(vtos, itos);
+
+  if (RewriteFrequentPairs && rc == may_rewrite) {
+    NearLabel rewrite, done;
+    const Register bc = Z_ARG4;
+
+    assert(Z_R1_scratch != bc, "register damaged");
+
+    // Get next byte.
+    __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_iload)));
+
+    // If _iload, wait to rewrite to iload2. We only want to rewrite the
+    // last two iloads in a pair. Comparing against fast_iload means that
+    // the next bytecode is neither an iload or a caload, and therefore
+    // an iload pair.
+    __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_iload,
+                             Assembler::bcondEqual, done);
+
+    __ load_const_optimized(bc, Bytecodes::_fast_iload2);
+    __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_iload,
+                             Assembler::bcondEqual, rewrite);
+
+    // If _caload, rewrite to fast_icaload.
+    __ load_const_optimized(bc, Bytecodes::_fast_icaload);
+    __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_caload,
+                             Assembler::bcondEqual, rewrite);
+
+    // Rewrite so iload doesn't check again.
+    __ load_const_optimized(bc, Bytecodes::_fast_iload);
+
+    // rewrite
+    // bc: fast bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_iload, bc, Z_R1_scratch, false);
+
+    __ bind(done);
+
+  }
+
+  // Get the local value into tos.
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::fast_iload2() {
+  transition(vtos, itos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+  __ push_i(Z_tos);
+  locals_index(Z_R1_scratch, 3);
+  __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::fast_iload() {
+  transition(vtos, itos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::lload() {
+  transition(vtos, ltos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_tos, laddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::fload() {
+  transition(vtos, ftos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2freg_opt(Z_ftos, faddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::dload() {
+  transition(vtos, dtos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2freg_opt(Z_ftos, daddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::aload() {
+  transition(vtos, atos);
+
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_tos, aaddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+  __ get_2_byte_integer_at_bcp(reg, 2, InterpreterMacroAssembler::Unsigned);
+  __ z_lcgr(reg);
+}
+
+void TemplateTable::wide_iload() {
+  transition(vtos, itos);
+
+  locals_index_wide(Z_tmp_1);
+  __ mem2reg_opt(Z_tos, iaddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_lload() {
+  transition(vtos, ltos);
+
+  locals_index_wide(Z_tmp_1);
+  __ mem2reg_opt(Z_tos, laddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_fload() {
+  transition(vtos, ftos);
+
+  locals_index_wide(Z_tmp_1);
+  __ mem2freg_opt(Z_ftos, faddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_dload() {
+  transition(vtos, dtos);
+
+  locals_index_wide(Z_tmp_1);
+  __ mem2freg_opt(Z_ftos, daddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_aload() {
+  transition(vtos, atos);
+
+  locals_index_wide(Z_tmp_1);
+  __ mem2reg_opt(Z_tos, aaddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::index_check(Register array, Register index, unsigned int shift) {
+  assert_different_registers(Z_R1_scratch, array, index);
+
+  // Check array.
+  __ null_check(array, Z_R0_scratch, arrayOopDesc::length_offset_in_bytes());
+
+  // Sign extend index for use by indexed load.
+  __ z_lgfr(index, index);
+
+  // Check index.
+  Label index_ok;
+  __ z_cl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
+  __ z_brl(index_ok);
+  __ lgr_if_needed(Z_ARG3, index); // See generate_ArrayIndexOutOfBounds_handler().
+  // Give back the array to create more detailed exceptions.
+  __ lgr_if_needed(Z_ARG2, array); // See generate_ArrayIndexOutOfBounds_handler().
+  __ load_absolute_address(Z_R1_scratch,
+                           Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+  __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+  __ bind(index_ok);
+
+  if (shift > 0)
+    __ z_sllg(index, index, shift);
+}
+
+void TemplateTable::iaload() {
+  transition(itos, itos);
+
+  __ pop_ptr(Z_tmp_1);  // array
+  // Index is in Z_tos.
+  Register index = Z_tos;
+  index_check(Z_tmp_1, index, LogBytesPerInt); // Kills Z_ARG3.
+  // Load the value.
+  __ mem2reg_opt(Z_tos,
+                 Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)),
+                 false);
+}
+
+void TemplateTable::laload() {
+  transition(itos, ltos);
+
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : index
+  // Z_tmp_2 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_2, index, LogBytesPerLong);
+  __ mem2reg_opt(Z_tos,
+                 Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::faload() {
+  transition(itos, ftos);
+
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : index
+  // Z_tmp_2 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_2, index, LogBytesPerInt);
+  __ mem2freg_opt(Z_ftos,
+                  Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
+                  false);
+}
+
+void TemplateTable::daload() {
+  transition(itos, dtos);
+
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : index
+  // Z_tmp_2 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_2, index, LogBytesPerLong);
+  __ mem2freg_opt(Z_ftos,
+                  Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aaload() {
+  transition(itos, atos);
+
+  unsigned const int shift = LogBytesPerHeapOop;
+  __ pop_ptr(Z_tmp_1);  // array
+  // Index is in Z_tos.
+  Register index = Z_tos;
+  index_check(Z_tmp_1, index, shift);
+  // Now load array element.
+  __ load_heap_oop(Z_tos,
+                   Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ verify_oop(Z_tos);
+}
+
+void TemplateTable::baload() {
+  transition(itos, itos);
+
+  __ pop_ptr(Z_tmp_1);
+  // Z_tos   : index
+  // Z_tmp_1 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_1, index, 0);
+  __ z_lb(Z_tos,
+          Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::caload() {
+  transition(itos, itos);
+
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : index
+  // Z_tmp_2 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_2, index, LogBytesPerShort);
+  // Load into 64 bits, works on all CPUs.
+  __ z_llgh(Z_tos,
+            Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+// Iload followed by caload frequent pair.
+void TemplateTable::fast_icaload() {
+  transition(vtos, itos);
+
+  // Load index out of locals.
+  locals_index(Z_R1_scratch);
+  __ mem2reg_opt(Z_ARG3, iaddress(_masm, Z_R1_scratch), false);
+  // Z_ARG3  : index
+  // Z_tmp_2 : array
+  __ pop_ptr(Z_tmp_2);
+  index_check(Z_tmp_2, Z_ARG3, LogBytesPerShort);
+  // Load into 64 bits, works on all CPUs.
+  __ z_llgh(Z_tos,
+            Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::saload() {
+  transition(itos, itos);
+
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : index
+  // Z_tmp_2 : array
+  Register index = Z_tos;
+  index_check(Z_tmp_2, index, LogBytesPerShort);
+  __ z_lh(Z_tos,
+          Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+}
+
+void TemplateTable::iload(int n) {
+  transition(vtos, itos);
+  __ z_ly(Z_tos, iaddress(n));
+}
+
+void TemplateTable::lload(int n) {
+  transition(vtos, ltos);
+  __ z_lg(Z_tos, laddress(n));
+}
+
+void TemplateTable::fload(int n) {
+  transition(vtos, ftos);
+  __ mem2freg_opt(Z_ftos, faddress(n), false);
+}
+
+void TemplateTable::dload(int n) {
+  transition(vtos, dtos);
+  __ mem2freg_opt(Z_ftos, daddress(n));
+}
+
+void TemplateTable::aload(int n) {
+  transition(vtos, atos);
+  __ mem2reg_opt(Z_tos, aaddress(n));
+}
+
+void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
+  transition(vtos, atos);
+
+  // According to bytecode histograms, the pairs:
+  //
+  // _aload_0, _fast_igetfield
+  // _aload_0, _fast_agetfield
+  // _aload_0, _fast_fgetfield
+  //
+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
+  // _aload_0 bytecode checks if the next bytecode is either
+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+  // rewrites the current bytecode into a pair bytecode; otherwise it
+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
+  // the pair check anymore.
+  //
+  // Note: If the next bytecode is _getfield, the rewrite must be
+  //       delayed, otherwise we may miss an opportunity for a pair.
+  //
+  // Also rewrite frequent pairs
+  //   aload_0, aload_1
+  //   aload_0, iload_1
+  // These bytecodes with a small amount of code are most profitable
+  // to rewrite.
+  if (!(RewriteFrequentPairs && (rc == may_rewrite))) {
+    aload(0);
+    return;
+  }
+
+  NearLabel rewrite, done;
+  const Register bc = Z_ARG4;
+
+  assert(Z_R1_scratch != bc, "register damaged");
+  // Get next byte.
+  __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_aload_0)));
+
+  // Do actual aload_0.
+  aload(0);
+
+  // If _getfield then wait with rewrite.
+  __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_getfield,
+                           Assembler::bcondEqual, done);
+
+  // If _igetfield then rewrite to _fast_iaccess_0.
+  assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0)
+            == Bytecodes::_aload_0, "fix bytecode definition");
+
+  __ load_const_optimized(bc, Bytecodes::_fast_iaccess_0);
+  __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_igetfield,
+                           Assembler::bcondEqual, rewrite);
+
+  // If _agetfield then rewrite to _fast_aaccess_0.
+  assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0)
+            == Bytecodes::_aload_0, "fix bytecode definition");
+
+  __ load_const_optimized(bc, Bytecodes::_fast_aaccess_0);
+  __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_agetfield,
+                           Assembler::bcondEqual, rewrite);
+
+  // If _fgetfield then rewrite to _fast_faccess_0.
+  assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0)
+            == Bytecodes::_aload_0, "fix bytecode definition");
+
+  __ load_const_optimized(bc, Bytecodes::_fast_faccess_0);
+  __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_fgetfield,
+                           Assembler::bcondEqual, rewrite);
+
+  // Else rewrite to _fast_aload0.
+  assert(Bytecodes::java_code(Bytecodes::_fast_aload_0)
+            == Bytecodes::_aload_0, "fix bytecode definition");
+  __ load_const_optimized(bc, Bytecodes::_fast_aload_0);
+
+  // rewrite
+  // bc: fast bytecode
+  __ bind(rewrite);
+
+  patch_bytecode(Bytecodes::_aload_0, bc, Z_R1_scratch, false);
+  // Reload local 0 because of VM call inside patch_bytecode().
+  // this may trigger GC and thus change the oop.
+  aload(0);
+
+  __ bind(done);
+}
+
+void TemplateTable::istore() {
+  transition(itos, vtos);
+  locals_index(Z_R1_scratch);
+  __ reg2mem_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::lstore() {
+  transition(ltos, vtos);
+  locals_index(Z_R1_scratch);
+  __ reg2mem_opt(Z_tos, laddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::fstore() {
+  transition(ftos, vtos);
+  locals_index(Z_R1_scratch);
+  __ freg2mem_opt(Z_ftos, faddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::dstore() {
+  transition(dtos, vtos);
+  locals_index(Z_R1_scratch);
+  __ freg2mem_opt(Z_ftos, daddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(Z_tos);
+  locals_index(Z_R1_scratch);
+  __ reg2mem_opt(Z_tos, aaddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::wide_istore() {
+  transition(vtos, vtos);
+  __ pop_i(Z_tos);
+  locals_index_wide(Z_tmp_1);
+  __ reg2mem_opt(Z_tos, iaddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_lstore() {
+  transition(vtos, vtos);
+  __ pop_l(Z_tos);
+  locals_index_wide(Z_tmp_1);
+  __ reg2mem_opt(Z_tos, laddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_fstore() {
+  transition(vtos, vtos);
+  __ pop_f(Z_ftos);
+  locals_index_wide(Z_tmp_1);
+  __ freg2mem_opt(Z_ftos, faddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_dstore() {
+  transition(vtos, vtos);
+  __ pop_d(Z_ftos);
+  locals_index_wide(Z_tmp_1);
+  __ freg2mem_opt(Z_ftos, daddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(Z_tos);
+  locals_index_wide(Z_tmp_1);
+  __ reg2mem_opt(Z_tos, aaddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::iastore() {
+  transition(itos, vtos);
+
+  Register index = Z_ARG3; // Index_check expects index in Z_ARG3.
+  // Value is in Z_tos ...
+  __ pop_i(index);        // index
+  __ pop_ptr(Z_tmp_1);    // array
+  index_check(Z_tmp_1, index, LogBytesPerInt);
+  // ... and then move the value.
+  __ reg2mem_opt(Z_tos,
+                 Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)),
+                 false);
+}
+
+void TemplateTable::lastore() {
+  transition(ltos, vtos);
+
+  __ pop_i(Z_ARG3);
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : value
+  // Z_ARG3  : index
+  // Z_tmp_2 : array
+ index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3.
+  __ reg2mem_opt(Z_tos,
+                 Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::fastore() {
+  transition(ftos, vtos);
+
+  __ pop_i(Z_ARG3);
+  __ pop_ptr(Z_tmp_2);
+  // Z_ftos  : value
+  // Z_ARG3  : index
+  // Z_tmp_2 : array
+  index_check(Z_tmp_2, Z_ARG3, LogBytesPerInt); // Prefer index in Z_ARG3.
+  __ freg2mem_opt(Z_ftos,
+                  Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
+                  false);
+}
+
+void TemplateTable::dastore() {
+  transition(dtos, vtos);
+
+  __ pop_i(Z_ARG3);
+  __ pop_ptr(Z_tmp_2);
+  // Z_ftos  : value
+  // Z_ARG3  : index
+  // Z_tmp_2 : array
+  index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3.
+  __ freg2mem_opt(Z_ftos,
+                  Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aastore() {
+  NearLabel is_null, ok_is_subtype, done;
+  transition(vtos, vtos);
+
+  // stack: ..., array, index, value
+
+  Register Rvalue = Z_tos;
+  Register Rarray = Z_ARG2;
+  Register Rindex = Z_ARG3; // Convention for index_check().
+
+  __ load_ptr(0, Rvalue);
+  __ z_l(Rindex, Address(Z_esp, Interpreter::expr_offset_in_bytes(1)));
+  __ load_ptr(2, Rarray);
+
+  unsigned const int shift = LogBytesPerHeapOop;
+  index_check(Rarray, Rindex, shift); // side effect: Rindex = Rindex << shift
+  Register Rstore_addr  = Rindex;
+  // Address where the store goes to, i.e. &(Rarry[index])
+  __ load_address(Rstore_addr, Address(Rarray, Rindex, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+
+  // do array store check - check for NULL value first.
+  __ compareU64_and_branch(Rvalue, (intptr_t)0, Assembler::bcondEqual, is_null);
+
+  Register Rsub_klass   = Z_ARG4;
+  Register Rsuper_klass = Z_ARG5;
+  __ load_klass(Rsub_klass, Rvalue);
+  // Load superklass.
+  __ load_klass(Rsuper_klass, Rarray);
+  __ z_lg(Rsuper_klass, Address(Rsuper_klass, ObjArrayKlass::element_klass_offset()));
+
+  // Generate a fast subtype check.  Branch to ok_is_subtype if no failure.
+  // Throw if failure.
+  Register tmp1 = Z_tmp_1;
+  Register tmp2 = Z_tmp_2;
+  __ gen_subtype_check(Rsub_klass, Rsuper_klass, tmp1, tmp2, ok_is_subtype);
+
+  // Fall through on failure.
+  // Object is in Rvalue == Z_tos.
+  assert(Rvalue == Z_tos, "that's the expected location");
+  __ load_absolute_address(tmp1, Interpreter::_throw_ArrayStoreException_entry);
+  __ z_br(tmp1);
+
+  // Come here on success.
+  __ bind(ok_is_subtype);
+
+  // Now store using the appropriate barrier.
+  Register tmp3 = Rsub_klass;
+  do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, false/*val==null*/,
+               tmp3, tmp2, tmp1, _bs->kind(), true);
+  __ z_bru(done);
+
+  // Have a NULL in Rvalue.
+  __ bind(is_null);
+  __ profile_null_seen(tmp1);
+
+  // Store a NULL.
+  do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, true/*val==null*/,
+               tmp3, tmp2, tmp1, _bs->kind(), true);
+
+  // Pop stack arguments.
+  __ bind(done);
+  __ add2reg(Z_esp, 3 * Interpreter::stackElementSize);
+}
+
+
+void TemplateTable::bastore() {
+  transition(itos, vtos);
+
+  __ pop_i(Z_ARG3);
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : value
+  // Z_ARG3 : index
+  // Z_tmp_2 : array
+  // No index shift necessary - pass 0.
+  index_check(Z_tmp_2, Z_ARG3, 0); // Prefer index in Z_ARG3.
+  __ z_stc(Z_tos,
+           Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::castore() {
+  transition(itos, vtos);
+
+  __ pop_i(Z_ARG3);
+  __ pop_ptr(Z_tmp_2);
+  // Z_tos   : value
+  // Z_ARG3  : index
+  // Z_tmp_2 : array
+  Register index = Z_ARG3; // prefer index in Z_ARG3
+  index_check(Z_tmp_2, index, LogBytesPerShort);
+  __ z_sth(Z_tos,
+           Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::sastore() {
+  castore();
+}
+
+void TemplateTable::istore(int n) {
+  transition(itos, vtos);
+  __ reg2mem_opt(Z_tos, iaddress(n), false);
+}
+
+void TemplateTable::lstore(int n) {
+  transition(ltos, vtos);
+  __ reg2mem_opt(Z_tos, laddress(n));
+}
+
+void TemplateTable::fstore(int n) {
+  transition(ftos, vtos);
+  __ freg2mem_opt(Z_ftos, faddress(n), false);
+}
+
+void TemplateTable::dstore(int n) {
+  transition(dtos, vtos);
+  __ freg2mem_opt(Z_ftos, daddress(n));
+}
+
+void TemplateTable::astore(int n) {
+  transition(vtos, vtos);
+  __ pop_ptr(Z_tos);
+  __ reg2mem_opt(Z_tos, aaddress(n));
+}
+
+void TemplateTable::pop() {
+  transition(vtos, vtos);
+  __ add2reg(Z_esp, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2() {
+  transition(vtos, vtos);
+  __ add2reg(Z_esp, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup() {
+  transition(vtos, vtos);
+  __ load_ptr(0, Z_tos);
+  __ push_ptr(Z_tos);
+  // stack: ..., a, a
+}
+
+void TemplateTable::dup_x1() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b
+  __ load_ptr(0, Z_tos);          // load b
+  __ load_ptr(1, Z_R0_scratch);   // load a
+  __ store_ptr(1, Z_tos);         // store b
+  __ store_ptr(0, Z_R0_scratch);  // store a
+  __ push_ptr(Z_tos);             // push b
+  // stack: ..., b, a, b
+}
+
+void TemplateTable::dup_x2() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b, c
+  __ load_ptr(0, Z_R0_scratch);   // load c
+  __ load_ptr(2, Z_R1_scratch);   // load a
+  __ store_ptr(2, Z_R0_scratch);  // store c in a
+  __ push_ptr(Z_R0_scratch);      // push c
+  // stack: ..., c, b, c, c
+  __ load_ptr(2, Z_R0_scratch);   // load b
+  __ store_ptr(2, Z_R1_scratch);  // store a in b
+  // stack: ..., c, a, c, c
+  __ store_ptr(1, Z_R0_scratch);  // store b in c
+  // stack: ..., c, a, b, c
+}
+
+void TemplateTable::dup2() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b
+  __ load_ptr(1, Z_R0_scratch);  // load a
+  __ push_ptr(Z_R0_scratch);     // push a
+  __ load_ptr(1, Z_R0_scratch);  // load b
+  __ push_ptr(Z_R0_scratch);     // push b
+  // stack: ..., a, b, a, b
+}
+
+void TemplateTable::dup2_x1() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b, c
+  __ load_ptr(0, Z_R0_scratch);  // load c
+  __ load_ptr(1, Z_R1_scratch);  // load b
+  __ push_ptr(Z_R1_scratch);     // push b
+  __ push_ptr(Z_R0_scratch);     // push c
+  // stack: ..., a, b, c, b, c
+  __ store_ptr(3, Z_R0_scratch); // store c in b
+  // stack: ..., a, c, c, b, c
+  __ load_ptr( 4, Z_R0_scratch); // load a
+  __ store_ptr(2, Z_R0_scratch); // store a in 2nd c
+  // stack: ..., a, c, a, b, c
+  __ store_ptr(4, Z_R1_scratch); // store b in a
+  // stack: ..., b, c, a, b, c
+}
+
+void TemplateTable::dup2_x2() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b, c, d
+  __ load_ptr(0, Z_R0_scratch);   // load d
+  __ load_ptr(1, Z_R1_scratch);   // load c
+  __ push_ptr(Z_R1_scratch);      // push c
+  __ push_ptr(Z_R0_scratch);      // push d
+  // stack: ..., a, b, c, d, c, d
+  __ load_ptr(4, Z_R1_scratch);   // load b
+  __ store_ptr(2, Z_R1_scratch);  // store b in d
+  __ store_ptr(4, Z_R0_scratch);  // store d in b
+  // stack: ..., a, d, c, b, c, d
+  __ load_ptr(5, Z_R0_scratch);   // load a
+  __ load_ptr(3, Z_R1_scratch);   // load c
+  __ store_ptr(3, Z_R0_scratch);  // store a in c
+  __ store_ptr(5, Z_R1_scratch);  // store c in a
+  // stack: ..., c, d, a, b, c, d
+}
+
+void TemplateTable::swap() {
+  transition(vtos, vtos);
+
+  // stack: ..., a, b
+  __ load_ptr(1, Z_R0_scratch);  // load a
+  __ load_ptr(0, Z_R1_scratch);  // load b
+  __ store_ptr(0, Z_R0_scratch);  // store a in b
+  __ store_ptr(1, Z_R1_scratch);  // store b in a
+  // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op) {
+  transition(itos, itos);
+  switch (op) {
+    case add  :                           __ z_ay(Z_tos,  __ stackTop()); __ pop_i(); break;
+    case sub  :                           __ z_sy(Z_tos,  __ stackTop()); __ pop_i(); __ z_lcr(Z_tos, Z_tos); break;
+    case mul  :                           __ z_msy(Z_tos, __ stackTop()); __ pop_i(); break;
+    case _and :                           __ z_ny(Z_tos,  __ stackTop()); __ pop_i(); break;
+    case _or  :                           __ z_oy(Z_tos,  __ stackTop()); __ pop_i(); break;
+    case _xor :                           __ z_xy(Z_tos,  __ stackTop()); __ pop_i(); break;
+    case shl  : __ z_lr(Z_tmp_1, Z_tos);
+                __ z_nill(Z_tmp_1, 31);  // Lowest 5 bits are shiftamount.
+                                          __ pop_i(Z_tos);   __ z_sll(Z_tos, 0,  Z_tmp_1); break;
+    case shr  : __ z_lr(Z_tmp_1, Z_tos);
+                __ z_nill(Z_tmp_1, 31);  // Lowest 5 bits are shiftamount.
+                                          __ pop_i(Z_tos);   __ z_sra(Z_tos, 0,  Z_tmp_1); break;
+    case ushr : __ z_lr(Z_tmp_1, Z_tos);
+                __ z_nill(Z_tmp_1, 31);  // Lowest 5 bits are shiftamount.
+                                          __ pop_i(Z_tos);   __ z_srl(Z_tos, 0,  Z_tmp_1); break;
+    default   : ShouldNotReachHere(); break;
+  }
+  return;
+}
+
+void TemplateTable::lop2(Operation op) {
+  transition(ltos, ltos);
+
+  switch (op) {
+    case add  :  __ z_ag(Z_tos,  __ stackTop()); __ pop_l(); break;
+    case sub  :  __ z_sg(Z_tos,  __ stackTop()); __ pop_l(); __ z_lcgr(Z_tos, Z_tos); break;
+    case mul  :  __ z_msg(Z_tos, __ stackTop()); __ pop_l(); break;
+    case _and :  __ z_ng(Z_tos,  __ stackTop()); __ pop_l(); break;
+    case _or  :  __ z_og(Z_tos,  __ stackTop()); __ pop_l(); break;
+    case _xor :  __ z_xg(Z_tos,  __ stackTop()); __ pop_l(); break;
+    default   : ShouldNotReachHere(); break;
+  }
+  return;
+}
+
+// Common part of idiv/irem.
+static void idiv_helper(InterpreterMacroAssembler * _masm, address exception) {
+  NearLabel not_null;
+
+  // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE.
+  assert(Z_tmp_1->successor() == Z_tmp_2, " need even/odd register pair for idiv/irem");
+
+  // Get dividend.
+  __ pop_i(Z_tmp_2);
+
+  // If divisor == 0 throw exception.
+  __ compare32_and_branch(Z_tos, (intptr_t) 0,
+                          Assembler::bcondNotEqual, not_null   );
+  __ load_absolute_address(Z_R1_scratch, exception);
+  __ z_br(Z_R1_scratch);
+
+  __ bind(not_null);
+
+  __ z_lgfr(Z_tmp_2, Z_tmp_2);   // Sign extend dividend.
+  __ z_dsgfr(Z_tmp_1, Z_tos);    // Do it.
+}
+
+void TemplateTable::idiv() {
+  transition(itos, itos);
+
+  idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry);
+  __ z_llgfr(Z_tos, Z_tmp_2);     // Result is in Z_tmp_2.
+}
+
+void TemplateTable::irem() {
+  transition(itos, itos);
+
+  idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry);
+  __ z_llgfr(Z_tos, Z_tmp_1);     // Result is in Z_tmp_1.
+}
+
+void TemplateTable::lmul() {
+  transition(ltos, ltos);
+
+  // Multiply with memory operand.
+  __ z_msg(Z_tos, __ stackTop());
+  __ pop_l();  // Pop operand.
+}
+
+// Common part of ldiv/lrem.
+//
+// Input:
+//     Z_tos := the divisor (dividend still on stack)
+//
+// Updated registers:
+//     Z_tmp_1 := pop_l() % Z_tos     ; if is_ldiv == false
+//     Z_tmp_2 := pop_l() / Z_tos     ; if is_ldiv == true
+//
+static void ldiv_helper(InterpreterMacroAssembler * _masm, address exception, bool is_ldiv) {
+  NearLabel not_null, done;
+
+  // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE.
+  assert(Z_tmp_1->successor() == Z_tmp_2,
+         " need even/odd register pair for idiv/irem");
+
+  // Get dividend.
+  __ pop_l(Z_tmp_2);
+
+  // If divisor == 0 throw exception.
+  __ compare64_and_branch(Z_tos, (intptr_t)0, Assembler::bcondNotEqual, not_null);
+  __ load_absolute_address(Z_R1_scratch, exception);
+  __ z_br(Z_R1_scratch);
+
+  __ bind(not_null);
+  // Special case for dividend == 0x8000 and divisor == -1.
+  if (is_ldiv) {
+    // result := Z_tmp_2 := - dividend
+    __ z_lcgr(Z_tmp_2, Z_tmp_2);
+  } else {
+    // result remainder := Z_tmp_1 := 0
+    __ clear_reg(Z_tmp_1, true, false);  // Don't set CC.
+  }
+
+  // if divisor == -1 goto done
+  __ compare64_and_branch(Z_tos, -1, Assembler::bcondEqual, done);
+  if (is_ldiv)
+    // Restore sign, because divisor != -1.
+    __ z_lcgr(Z_tmp_2, Z_tmp_2);
+  __ z_dsgr(Z_tmp_1, Z_tos);    // Do it.
+  __ bind(done);
+}
+
+void TemplateTable::ldiv() {
+  transition(ltos, ltos);
+
+  ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, true /*is_ldiv*/);
+  __ z_lgr(Z_tos, Z_tmp_2);     // Result is in Z_tmp_2.
+}
+
+void TemplateTable::lrem() {
+  transition(ltos, ltos);
+
+  ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, false /*is_ldiv*/);
+  __ z_lgr(Z_tos, Z_tmp_1);     // Result is in Z_tmp_1.
+}
+
+void TemplateTable::lshl() {
+  transition(itos, ltos);
+
+  // Z_tos: shift amount
+  __ pop_l(Z_tmp_1);              // Get shift value.
+  __ z_sllg(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::lshr() {
+  transition(itos, ltos);
+
+  // Z_tos: shift amount
+  __ pop_l(Z_tmp_1);              // Get shift value.
+  __ z_srag(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::lushr() {
+  transition(itos, ltos);
+
+  // Z_tos: shift amount
+  __ pop_l(Z_tmp_1);              // Get shift value.
+  __ z_srlg(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::fop2(Operation op) {
+  transition(ftos, ftos);
+
+  switch (op) {
+    case add:
+      // Add memory operand.
+      __ z_aeb(Z_ftos, __ stackTop()); __ pop_f(); return;
+    case sub:
+      // Sub memory operand.
+      __ z_ler(Z_F1, Z_ftos);    // first operand
+      __ pop_f(Z_ftos);          // second operand from stack
+      __ z_sebr(Z_ftos, Z_F1);
+      return;
+    case mul:
+      // Multiply with memory operand.
+      __ z_meeb(Z_ftos, __ stackTop()); __ pop_f(); return;
+    case div:
+      __ z_ler(Z_F1, Z_ftos);    // first operand
+      __ pop_f(Z_ftos);          // second operand from stack
+      __ z_debr(Z_ftos, Z_F1);
+      return;
+    case rem:
+      // Do runtime call.
+      __ z_ler(Z_FARG2, Z_ftos);  // divisor
+      __ pop_f(Z_FARG1);          // dividend
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
+      // Result should be in the right place (Z_ftos == Z_FRET).
+      return;
+    default:
+      ShouldNotReachHere();
+      return;
+  }
+}
+
+void TemplateTable::dop2(Operation op) {
+  transition(dtos, dtos);
+
+  switch (op) {
+    case add:
+      // Add memory operand.
+      __ z_adb(Z_ftos, __ stackTop()); __ pop_d(); return;
+    case sub:
+      // Sub memory operand.
+      __ z_ldr(Z_F1, Z_ftos);    // first operand
+      __ pop_d(Z_ftos);          // second operand from stack
+      __ z_sdbr(Z_ftos, Z_F1);
+      return;
+    case mul:
+      // Multiply with memory operand.
+      __ z_mdb(Z_ftos, __ stackTop()); __ pop_d(); return;
+    case div:
+      __ z_ldr(Z_F1, Z_ftos);    // first operand
+      __ pop_d(Z_ftos);          // second operand from stack
+      __ z_ddbr(Z_ftos, Z_F1);
+      return;
+    case rem:
+      // Do runtime call.
+      __ z_ldr(Z_FARG2, Z_ftos);  // divisor
+      __ pop_d(Z_FARG1);          // dividend
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
+      // Result should be in the right place (Z_ftos == Z_FRET).
+      return;
+    default:
+      ShouldNotReachHere();
+      return;
+  }
+}
+
+void TemplateTable::ineg() {
+  transition(itos, itos);
+  __ z_lcr(Z_tos);
+}
+
+void TemplateTable::lneg() {
+  transition(ltos, ltos);
+  __ z_lcgr(Z_tos);
+}
+
+void TemplateTable::fneg() {
+  transition(ftos, ftos);
+  __ z_lcebr(Z_ftos, Z_ftos);
+}
+
+void TemplateTable::dneg() {
+  transition(dtos, dtos);
+  __ z_lcdbr(Z_ftos, Z_ftos);
+}
+
+void TemplateTable::iinc() {
+  transition(vtos, vtos);
+
+  Address local;
+  __ z_lb(Z_R0_scratch, at_bcp(2)); // Get constant.
+  locals_index(Z_R1_scratch);
+  local = iaddress(_masm, Z_R1_scratch);
+  __ z_a(Z_R0_scratch, local);
+  __ reg2mem_opt(Z_R0_scratch, local, false);
+}
+
+void TemplateTable::wide_iinc() {
+  transition(vtos, vtos);
+
+  // Z_tmp_1 := increment
+  __ get_2_byte_integer_at_bcp(Z_tmp_1, 4, InterpreterMacroAssembler::Signed);
+  // Z_R1_scratch := index of local to increment
+  locals_index_wide(Z_tmp_2);
+  // Load, increment, and store.
+  __ access_local_int(Z_tmp_2, Z_tos);
+  __ z_agr(Z_tos,  Z_tmp_1);
+  // Shifted index is still in Z_tmp_2.
+  __ reg2mem_opt(Z_tos, Address(Z_locals, Z_tmp_2), false);
+}
+
+
+void TemplateTable::convert() {
+  // Checking
+#ifdef ASSERT
+  TosState   tos_in  = ilgl;
+  TosState   tos_out = ilgl;
+
+  switch (bytecode()) {
+    case Bytecodes::_i2l:
+    case Bytecodes::_i2f:
+    case Bytecodes::_i2d:
+    case Bytecodes::_i2b:
+    case Bytecodes::_i2c:
+    case Bytecodes::_i2s:
+      tos_in = itos;
+      break;
+    case Bytecodes::_l2i:
+    case Bytecodes::_l2f:
+    case Bytecodes::_l2d:
+      tos_in = ltos;
+      break;
+    case Bytecodes::_f2i:
+    case Bytecodes::_f2l:
+    case Bytecodes::_f2d:
+      tos_in = ftos;
+      break;
+    case Bytecodes::_d2i:
+    case Bytecodes::_d2l:
+    case Bytecodes::_d2f:
+      tos_in = dtos;
+      break;
+    default :
+      ShouldNotReachHere();
+  }
+  switch (bytecode()) {
+    case Bytecodes::_l2i:
+    case Bytecodes::_f2i:
+    case Bytecodes::_d2i:
+    case Bytecodes::_i2b:
+    case Bytecodes::_i2c:
+    case Bytecodes::_i2s:
+      tos_out = itos;
+      break;
+    case Bytecodes::_i2l:
+    case Bytecodes::_f2l:
+    case Bytecodes::_d2l:
+      tos_out = ltos;
+      break;
+    case Bytecodes::_i2f:
+    case Bytecodes::_l2f:
+    case Bytecodes::_d2f:
+      tos_out = ftos;
+      break;
+    case Bytecodes::_i2d:
+    case Bytecodes::_l2d:
+    case Bytecodes::_f2d:
+      tos_out = dtos;
+      break;
+    default :
+      ShouldNotReachHere();
+  }
+
+  transition(tos_in, tos_out);
+#endif // ASSERT
+
+  // Conversion
+  Label done;
+  switch (bytecode()) {
+    case Bytecodes::_i2l:
+      __ z_lgfr(Z_tos, Z_tos);
+      return;
+    case Bytecodes::_i2f:
+      __ z_cefbr(Z_ftos, Z_tos);
+      return;
+    case Bytecodes::_i2d:
+      __ z_cdfbr(Z_ftos, Z_tos);
+      return;
+    case Bytecodes::_i2b:
+      // Sign extend least significant byte.
+      __ move_reg_if_needed(Z_tos, T_BYTE, Z_tos, T_INT);
+      return;
+    case Bytecodes::_i2c:
+      // Zero extend 2 least significant bytes.
+      __ move_reg_if_needed(Z_tos, T_CHAR, Z_tos, T_INT);
+      return;
+    case Bytecodes::_i2s:
+      // Sign extend 2 least significant bytes.
+      __ move_reg_if_needed(Z_tos, T_SHORT, Z_tos, T_INT);
+      return;
+    case Bytecodes::_l2i:
+      // Sign-extend not needed here, upper 4 bytes of int value in register are ignored.
+      return;
+    case Bytecodes::_l2f:
+      __ z_cegbr(Z_ftos, Z_tos);
+      return;
+    case Bytecodes::_l2d:
+      __ z_cdgbr(Z_ftos, Z_tos);
+      return;
+    case Bytecodes::_f2i:
+    case Bytecodes::_f2l:
+      __ clear_reg(Z_tos, true, false);  // Don't set CC.
+      __ z_cebr(Z_ftos, Z_ftos);
+      __ z_brno(done); // NaN -> 0
+      if (bytecode() == Bytecodes::_f2i)
+        __ z_cfebr(Z_tos, Z_ftos, Assembler::to_zero);
+      else // bytecode() == Bytecodes::_f2l
+        __ z_cgebr(Z_tos, Z_ftos, Assembler::to_zero);
+      break;
+    case Bytecodes::_f2d:
+      __ move_freg_if_needed(Z_ftos, T_DOUBLE, Z_ftos, T_FLOAT);
+      return;
+    case Bytecodes::_d2i:
+    case Bytecodes::_d2l:
+      __ clear_reg(Z_tos, true, false);  // Ddon't set CC.
+      __ z_cdbr(Z_ftos, Z_ftos);
+      __ z_brno(done); // NaN -> 0
+      if (bytecode() == Bytecodes::_d2i)
+        __ z_cfdbr(Z_tos, Z_ftos, Assembler::to_zero);
+      else // Bytecodes::_d2l
+        __ z_cgdbr(Z_tos, Z_ftos, Assembler::to_zero);
+      break;
+    case Bytecodes::_d2f:
+      __ move_freg_if_needed(Z_ftos, T_FLOAT, Z_ftos, T_DOUBLE);
+      return;
+    default:
+      ShouldNotReachHere();
+  }
+  __ bind(done);
+}
+
+void TemplateTable::lcmp() {
+  transition(ltos, itos);
+
+  Label   done;
+  Register val1 = Z_R0_scratch;
+  Register val2 = Z_R1_scratch;
+
+  if (VM_Version::has_LoadStoreConditional()) {
+    __ pop_l(val1);           // pop value 1.
+    __ z_lghi(val2,  -1);     // lt value
+    __ z_cgr(val1, Z_tos);    // Compare with Z_tos (value 2). Protect CC under all circumstances.
+    __ z_lghi(val1,   1);     // gt value
+    __ z_lghi(Z_tos,  0);     // eq value
+
+    __ z_locgr(Z_tos, val1, Assembler::bcondHigh);
+    __ z_locgr(Z_tos, val2, Assembler::bcondLow);
+  } else {
+    __ pop_l(val1);           // Pop value 1.
+    __ z_cgr(val1, Z_tos);    // Compare with Z_tos (value 2). Protect CC under all circumstances.
+
+    __ z_lghi(Z_tos,  0);     // eq value
+    __ z_bre(done);
+
+    __ z_lghi(Z_tos,  1);     // gt value
+    __ z_brh(done);
+
+    __ z_lghi(Z_tos, -1);     // lt value
+  }
+
+  __ bind(done);
+}
+
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+  Label done;
+
+  if (is_float) {
+    __ pop_f(Z_FARG2);
+    __ z_cebr(Z_FARG2, Z_ftos);
+  } else {
+    __ pop_d(Z_FARG2);
+    __ z_cdbr(Z_FARG2, Z_ftos);
+  }
+
+  if (VM_Version::has_LoadStoreConditional()) {
+    Register one       = Z_R0_scratch;
+    Register minus_one = Z_R1_scratch;
+    __ z_lghi(minus_one,  -1);
+    __ z_lghi(one,  1);
+    __ z_lghi(Z_tos, 0);
+    __ z_locgr(Z_tos, one,       unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh);
+    __ z_locgr(Z_tos, minus_one, unordered_result == 1 ? Assembler::bcondLow              : Assembler::bcondLowOrNotOrdered);
+  } else {
+    // Z_FARG2 == Z_ftos
+    __ clear_reg(Z_tos, false, false);
+    __ z_bre(done);
+
+    // F_ARG2 > Z_Ftos, or unordered
+    __ z_lhi(Z_tos, 1);
+    __ z_brc(unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh, done);
+
+    // F_ARG2 < Z_FTOS, or unordered
+    __ z_lhi(Z_tos, -1);
+
+    __ bind(done);
+  }
+}
+
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+  const Register   bumped_count = Z_tmp_1;
+  const Register   method       = Z_tmp_2;
+  const Register   m_counters   = Z_R1_scratch;
+  const Register   mdo          = Z_tos;
+
+  BLOCK_COMMENT("TemplateTable::branch {");
+  __ get_method(method);
+  __ profile_taken_branch(mdo, bumped_count);
+
+  const ByteSize ctr_offset = InvocationCounter::counter_offset();
+  const ByteSize be_offset  = MethodCounters::backedge_counter_offset()   + ctr_offset;
+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ctr_offset;
+
+  // Get (wide) offset to disp.
+  const Register disp = Z_ARG5;
+  if (is_wide) {
+    __ get_4_byte_integer_at_bcp(disp, 1);
+  } else {
+    __ get_2_byte_integer_at_bcp(disp, 1, InterpreterMacroAssembler::Signed);
+  }
+
+  // Handle all the JSR stuff here, then exit.
+  // It's much shorter and cleaner than intermingling with the
+  // non-JSR normal-branch stuff occurring below.
+  if (is_jsr) {
+    // Compute return address as bci in Z_tos.
+    __ z_lgr(Z_R1_scratch, Z_bcp);
+    __ z_sg(Z_R1_scratch, Address(method, Method::const_offset()));
+    __ add2reg(Z_tos, (is_wide ? 5 : 3) - in_bytes(ConstMethod::codes_offset()), Z_R1_scratch);
+
+    // Bump bcp to target of JSR.
+    __ z_agr(Z_bcp, disp);
+    // Push return address for "ret" on stack.
+    __ push_ptr(Z_tos);
+    // And away we go!
+    __ dispatch_next(vtos);
+    return;
+  }
+
+  // Normal (non-jsr) branch handling.
+
+  // Bump bytecode pointer by displacement (take the branch).
+  __ z_agr(Z_bcp, disp);
+
+  assert(UseLoopCounter || !UseOnStackReplacement,
+         "on-stack-replacement requires loop counters");
+
+  NearLabel backedge_counter_overflow;
+  NearLabel profile_method;
+  NearLabel dispatch;
+  int       increment = InvocationCounter::count_increment;
+
+  if (UseLoopCounter) {
+    // Increment backedge counter for backward branches.
+    // disp: target offset
+    // Z_bcp: target bcp
+    // Z_locals: locals pointer
+    //
+    // Count only if backward branch.
+    __ compare32_and_branch(disp, (intptr_t)0, Assembler::bcondHigh, dispatch);
+
+    if (TieredCompilation) {
+      Label noCounters;
+
+      if (ProfileInterpreter) {
+        NearLabel   no_mdo;
+
+        // Are we profiling?
+        __ load_and_test_long(mdo, Address(method, Method::method_data_offset()));
+        __ branch_optimized(Assembler::bcondZero, no_mdo);
+
+        // Increment the MDO backedge counter.
+        const Address mdo_backedge_counter(mdo, MethodData::backedge_counter_offset() + InvocationCounter::counter_offset());
+
+        const Address mask(mdo, MethodData::backedge_mask_offset());
+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+                                   Z_ARG2, false, Assembler::bcondZero,
+                                   UseOnStackReplacement ? &backedge_counter_overflow : NULL);
+        __ z_bru(dispatch);
+        __ bind(no_mdo);
+      }
+
+      // Increment backedge counter in MethodCounters*.
+      __ get_method_counters(method, m_counters, noCounters);
+      const Address mask(m_counters, MethodCounters::backedge_mask_offset());
+      __ increment_mask_and_jump(Address(m_counters, be_offset),
+                                 increment, mask,
+                                 Z_ARG2, false, Assembler::bcondZero,
+                                 UseOnStackReplacement ? &backedge_counter_overflow : NULL);
+      __ bind(noCounters);
+    } else {
+      Register counter = Z_tos;
+      Label    noCounters;
+      // Get address of MethodCounters object.
+      __ get_method_counters(method, m_counters, noCounters);
+      // Increment backedge counter.
+      __ increment_backedge_counter(m_counters, counter);
+
+      if (ProfileInterpreter) {
+        // Test to see if we should create a method data obj.
+        __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_profile_limit_offset()));
+        __ z_brl(dispatch);
+
+        // If no method data exists, go to profile method.
+        __ test_method_data_pointer(Z_ARG4/*result unused*/, profile_method);
+
+        if (UseOnStackReplacement) {
+          // Check for overflow against 'bumped_count' which is the MDO taken count.
+          __ z_cl(bumped_count, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset()));
+          __ z_brl(dispatch);
+
+          // When ProfileInterpreter is on, the backedge_count comes
+          // from the methodDataOop, which value does not get reset on
+          // the call to frequency_counter_overflow(). To avoid
+          // excessive calls to the overflow routine while the method is
+          // being compiled, add a second test to make sure the overflow
+          // function is called only once every overflow_frequency.
+          const int overflow_frequency = 1024;
+          __ and_imm(bumped_count, overflow_frequency - 1);
+          __ z_brz(backedge_counter_overflow);
+
+        }
+      } else {
+        if (UseOnStackReplacement) {
+          // Check for overflow against 'counter', which is the sum of the
+          // counters.
+          __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset()));
+          __ z_brh(backedge_counter_overflow);
+        }
+      }
+      __ bind(noCounters);
+    }
+
+    __ bind(dispatch);
+  }
+
+  // Pre-load the next target bytecode into rbx.
+  __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0));
+
+  // Continue with the bytecode @ target.
+  // Z_tos: Return bci for jsr's, unused otherwise.
+  // Z_bytecode: target bytecode
+  // Z_bcp: target bcp
+  __ dispatch_only(vtos);
+
+  // Out-of-line code runtime calls.
+  if (UseLoopCounter) {
+    if (ProfileInterpreter) {
+      // Out-of-line code to allocate method data oop.
+      __ bind(profile_method);
+
+      __ call_VM(noreg,
+                 CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0));  // Restore target bytecode.
+      __ set_method_data_pointer_for_bcp();
+      __ z_bru(dispatch);
+    }
+
+    if (UseOnStackReplacement) {
+
+      // invocation counter overflow
+      __ bind(backedge_counter_overflow);
+
+      __ z_lcgr(Z_ARG2, disp); // Z_ARG2 := -disp
+      __ z_agr(Z_ARG2, Z_bcp); // Z_ARG2 := branch target bcp - disp == branch bcp
+      __ call_VM(noreg,
+                 CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
+                 Z_ARG2);
+
+      // Z_RET: osr nmethod (osr ok) or NULL (osr not possible).
+      __ compare64_and_branch(Z_RET, (intptr_t) 0, Assembler::bcondEqual, dispatch);
+
+      // Nmethod may have been invalidated (VM may block upon call_VM return).
+      __ z_cliy(nmethod::state_offset(), Z_RET, nmethod::in_use);
+      __ z_brne(dispatch);
+
+      // Migrate the interpreter frame off of the stack.
+
+      __ z_lgr(Z_tmp_1, Z_RET); // Save the nmethod.
+
+      call_VM(noreg,
+              CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+      // Z_RET is OSR buffer, move it to expected parameter location.
+      __ lgr_if_needed(Z_ARG1, Z_RET);
+
+      // Pop the interpreter frame ...
+      __ pop_interpreter_frame(Z_R14, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/);
+
+      // ... and begin the OSR nmethod.
+      __ z_lg(Z_R1_scratch, Address(Z_tmp_1, nmethod::osr_entry_point_offset()));
+      __ z_br(Z_R1_scratch);
+    }
+  }
+  BLOCK_COMMENT("} TemplateTable::branch");
+}
+
+void TemplateTable::if_0cmp(Condition cc) {
+  transition(itos, vtos);
+
+  // Assume branch is more often taken than not (loops use backward branches).
+  NearLabel not_taken;
+  __ compare32_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_icmp(Condition cc) {
+  transition(itos, vtos);
+
+  // Assume branch is more often taken than not (loops use backward branches).
+  NearLabel not_taken;
+  __ pop_i(Z_R0_scratch);
+  __ compare32_and_branch(Z_R0_scratch, Z_tos, j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_nullcmp(Condition cc) {
+  transition(atos, vtos);
+
+  // Assume branch is more often taken than not (loops use backward branches) .
+  NearLabel not_taken;
+  __ compare64_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_acmp(Condition cc) {
+  transition(atos, vtos);
+  // Assume branch is more often taken than not (loops use backward branches).
+  NearLabel not_taken;
+  __ pop_ptr(Z_ARG2);
+  __ verify_oop(Z_ARG2);
+  __ verify_oop(Z_tos);
+  __ compareU64_and_branch(Z_tos, Z_ARG2, j_not(cc), not_taken);
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(Z_ARG3);
+}
+
+void TemplateTable::ret() {
+  transition(vtos, vtos);
+
+  locals_index(Z_tmp_1);
+  // Get return bci, compute return bcp. Must load 64 bits.
+  __ mem2reg_opt(Z_tmp_1, iaddress(_masm, Z_tmp_1));
+  __ profile_ret(Z_tmp_1, Z_tmp_2);
+  __ get_method(Z_tos);
+  __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
+  __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
+  __ dispatch_next(vtos);
+}
+
+void TemplateTable::wide_ret() {
+  transition(vtos, vtos);
+
+  locals_index_wide(Z_tmp_1);
+  // Get return bci, compute return bcp.
+  __ mem2reg_opt(Z_tmp_1, aaddress(_masm, Z_tmp_1));
+  __ profile_ret(Z_tmp_1, Z_tmp_2);
+  __ get_method(Z_tos);
+  __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
+  __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
+  __ dispatch_next(vtos);
+}
+
+void TemplateTable::tableswitch () {
+  transition(itos, vtos);
+
+  NearLabel default_case, continue_execution;
+  Register  bcp = Z_ARG5;
+  // Align bcp.
+  __ load_address(bcp, at_bcp(BytesPerInt));
+  __ z_nill(bcp, (-BytesPerInt) & 0xffff);
+
+  // Load lo & hi.
+  Register low  = Z_tmp_1;
+  Register high = Z_tmp_2;
+
+  // Load low into 64 bits, since used for address calculation.
+  __ mem2reg_signed_opt(low, Address(bcp, BytesPerInt));
+  __ mem2reg_opt(high, Address(bcp, 2 * BytesPerInt), false);
+  // Sign extend "label" value for address calculation.
+  __ z_lgfr(Z_tos, Z_tos);
+
+  // Check against lo & hi.
+  __ compare32_and_branch(Z_tos, low, Assembler::bcondLow, default_case);
+  __ compare32_and_branch(Z_tos, high, Assembler::bcondHigh, default_case);
+
+  // Lookup dispatch offset.
+  __ z_sgr(Z_tos, low);
+  Register jump_table_offset = Z_ARG3;
+  // Index2offset; index in Z_tos is killed by profile_switch_case.
+  __ z_sllg(jump_table_offset, Z_tos, LogBytesPerInt);
+  __ profile_switch_case(Z_tos, Z_ARG4 /*tmp for mdp*/, low/*tmp*/, Z_bytecode/*tmp*/);
+
+  Register index = Z_tmp_2;
+
+  // Load index sign extended for addressing.
+  __ mem2reg_signed_opt(index, Address(bcp, jump_table_offset, 3 * BytesPerInt));
+
+  // Continue execution.
+  __ bind(continue_execution);
+
+  // Load next bytecode.
+  __ z_llgc(Z_bytecode, Address(Z_bcp, index));
+  __ z_agr(Z_bcp, index); // Advance bcp.
+  __ dispatch_only(vtos);
+
+  // Handle default.
+  __ bind(default_case);
+
+  __ profile_switch_default(Z_tos);
+  __ mem2reg_signed_opt(index, Address(bcp));
+  __ z_bru(continue_execution);
+}
+
+void TemplateTable::lookupswitch () {
+  transition(itos, itos);
+  __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+void TemplateTable::fast_linearswitch () {
+  transition(itos, vtos);
+
+  Label    loop_entry, loop, found, continue_execution;
+  Register bcp = Z_ARG5;
+
+  // Align bcp.
+  __ load_address(bcp, at_bcp(BytesPerInt));
+  __ z_nill(bcp, (-BytesPerInt) & 0xffff);
+
+  // Start search with last case.
+  Register current_case_offset = Z_tmp_1;
+
+  __ mem2reg_signed_opt(current_case_offset, Address(bcp, BytesPerInt));
+  __ z_sllg(current_case_offset, current_case_offset, LogBytesPerWord);   // index2bytes
+  __ z_bru(loop_entry);
+
+  // table search
+  __ bind(loop);
+
+  __ z_c(Z_tos, Address(bcp, current_case_offset, 2 * BytesPerInt));
+  __ z_bre(found);
+
+  __ bind(loop_entry);
+  __ z_aghi(current_case_offset, -2 * BytesPerInt);  // Decrement.
+  __ z_brnl(loop);
+
+  // default case
+  Register   offset = Z_tmp_2;
+
+  __ profile_switch_default(Z_tos);
+  // Load offset sign extended for addressing.
+  __ mem2reg_signed_opt(offset, Address(bcp));
+  __ z_bru(continue_execution);
+
+  // Entry found -> get offset.
+  __ bind(found);
+  __ mem2reg_signed_opt(offset, Address(bcp, current_case_offset, 3 * BytesPerInt));
+  // Profile that this case was taken.
+  Register current_case_idx = Z_ARG4;
+  __ z_srlg(current_case_idx, current_case_offset, LogBytesPerWord); // bytes2index
+  __ profile_switch_case(current_case_idx, Z_tos, bcp, Z_bytecode);
+
+  // Continue execution.
+  __ bind(continue_execution);
+
+  // Load next bytecode.
+  __ z_llgc(Z_bytecode, Address(Z_bcp, offset, 0));
+  __ z_agr(Z_bcp, offset); // Advance bcp.
+  __ dispatch_only(vtos);
+}
+
+
+void TemplateTable::fast_binaryswitch() {
+
+  transition(itos, vtos);
+
+  // Implementation using the following core algorithm:
+  //
+  // int binary_search(int key, LookupswitchPair* array, int n) {
+  //   // Binary search according to "Methodik des Programmierens" by
+  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+  //   int i = 0;
+  //   int j = n;
+  //   while (i+1 < j) {
+  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+  //     // with      Q: for all i: 0 <= i < n: key < a[i]
+  //     // where a stands for the array and assuming that the (inexisting)
+  //     // element a[n] is infinitely big.
+  //     int h = (i + j) >> 1;
+  //     // i < h < j
+  //     if (key < array[h].fast_match()) {
+  //       j = h;
+  //     } else {
+  //       i = h;
+  //     }
+  //   }
+  //   // R: a[i] <= key < a[i+1] or Q
+  //   // (i.e., if key is within array, i is the correct index)
+  //   return i;
+  // }
+
+  // Register allocation
+  // Note: Since we use the indices in address operands, we do all the
+  // computation in 64 bits.
+  const Register key   = Z_tos; // Already set (tosca).
+  const Register array = Z_tmp_1;
+  const Register i     = Z_tmp_2;
+  const Register j     = Z_ARG5;
+  const Register h     = Z_ARG4;
+  const Register temp  = Z_R1_scratch;
+
+  // Find array start.
+  __ load_address(array, at_bcp(3 * BytesPerInt));
+  __ z_nill(array, (-BytesPerInt) & 0xffff);   // align
+
+  // Initialize i & j.
+  __ clear_reg(i, true, false);  // i = 0;  Don't set CC.
+  __ mem2reg_signed_opt(j, Address(array, -BytesPerInt)); // j = length(array);
+
+  // And start.
+  Label entry;
+  __ z_bru(entry);
+
+  // binary search loop
+  {
+    NearLabel   loop;
+
+    __ bind(loop);
+
+    // int h = (i + j) >> 1;
+    __ add2reg_with_index(h, 0, i, j); // h = i + j;
+    __ z_srag(h, h, 1);                // h = (i + j) >> 1;
+
+    // if (key < array[h].fast_match()) {
+    //   j = h;
+    // } else {
+    //   i = h;
+    // }
+
+    // Convert array[h].match to native byte-ordering before compare.
+    __ z_sllg(temp, h, LogBytesPerWord);   // index2bytes
+    __ mem2reg_opt(temp, Address(array, temp), false);
+
+    NearLabel  else_;
+
+    __ compare32_and_branch(key, temp, Assembler::bcondNotLow, else_);
+    // j = h if (key <  array[h].fast_match())
+    __ z_lgr(j, h);
+    __ z_bru(entry); // continue
+
+    __ bind(else_);
+
+    // i = h if (key >= array[h].fast_match())
+    __ z_lgr(i, h);  // and fallthrough
+
+    // while (i+1 < j)
+    __ bind(entry);
+
+    // if (i + 1 < j) continue search
+    __ add2reg(h, 1, i);
+    __ compare64_and_branch(h, j, Assembler::bcondLow, loop);
+  }
+
+  // End of binary search, result index is i (must check again!).
+  NearLabel default_case;
+
+  // h is no longer needed, so use it to hold the byte offset.
+  __ z_sllg(h, i, LogBytesPerWord);   // index2bytes
+  __ mem2reg_opt(temp, Address(array, h), false);
+  __ compare32_and_branch(key, temp, Assembler::bcondNotEqual, default_case);
+
+  // entry found -> j = offset
+  __ mem2reg_signed_opt(j, Address(array, h, BytesPerInt));
+  __ profile_switch_case(i, key, array, Z_bytecode);
+  // Load next bytecode.
+  __ z_llgc(Z_bytecode, Address(Z_bcp, j));
+  __ z_agr(Z_bcp, j);       // Advance bcp.
+  __ dispatch_only(vtos);
+
+  // default case -> j = default offset
+  __ bind(default_case);
+
+  __ profile_switch_default(i);
+  __ mem2reg_signed_opt(j, Address(array, -2 * BytesPerInt));
+  // Load next bytecode.
+  __ z_llgc(Z_bytecode, Address(Z_bcp, j));
+  __ z_agr(Z_bcp, j);       // Advance bcp.
+  __ dispatch_only(vtos);
+}
+
+void TemplateTable::_return(TosState state) {
+  transition(state, state);
+  assert(_desc->calls_vm(),
+         "inconsistent calls_vm information"); // call in remove_activation
+
+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+    Register Rthis  = Z_ARG2;
+    Register Rklass = Z_ARG5;
+    Label skip_register_finalizer;
+    assert(state == vtos, "only valid state");
+    __ z_lg(Rthis, aaddress(0));
+    __ load_klass(Rklass, Rthis);
+    __ testbit(Address(Rklass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER));
+    __ z_bfalse(skip_register_finalizer);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Rthis);
+    __ bind(skip_register_finalizer);
+  }
+
+  __ remove_activation(state, Z_R14);
+  __ z_br(Z_R14);
+}
+
+// ----------------------------------------------------------------------------
+// NOTE: Cpe_offset is already computed as byte offset, so we must not
+// shift it afterwards!
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register Rcache,
+                                            Register cpe_offset,
+                                            size_t index_size) {
+  BLOCK_COMMENT("resolve_cache_and_index {");
+  NearLabel      resolved;
+  const Register bytecode_in_cpcache = Z_R1_scratch;
+  const int      total_f1_offset = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset());
+  assert_different_registers(Rcache, cpe_offset, bytecode_in_cpcache);
+
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+  }
+
+  {
+    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, cpe_offset, bytecode_in_cpcache, byte_no, 1, index_size);
+    // Have we resolved this bytecode?
+    __ compare32_and_branch(bytecode_in_cpcache, (int)code, Assembler::bcondEqual, resolved);
+  }
+
+  // Resolve first time through.
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+  __ load_const_optimized(Z_ARG2, (int) code);
+  __ call_VM(noreg, entry, Z_ARG2);
+
+  // Update registers with resolved info.
+  __ get_cache_and_index_at_bcp(Rcache, cpe_offset, 1, index_size);
+  __ bind(resolved);
+  BLOCK_COMMENT("} resolve_cache_and_index");
+}
+
+// The Rcache and index registers must be set before call.
+// Index is already a byte offset, don't shift!
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+  assert_different_registers(cache, index, flags, off);
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+  // Field offset
+  __ mem2reg_opt(off, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+  // Flags. Must load 64 bits.
+  __ mem2reg_opt(flags, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+  // klass overwrite register
+  if (is_static) {
+    __ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
+    __ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
+  }
+}
+
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal, // unused
+                                               bool is_invokedynamic) {
+  BLOCK_COMMENT("load_invoke_cp_cache_entry {");
+  // Setup registers.
+  const Register cache     = Z_ARG1;
+  const Register cpe_offset= flags;
+  const ByteSize base_off  = ConstantPoolCache::base_offset();
+  const ByteSize f1_off    = ConstantPoolCacheEntry::f1_offset();
+  const ByteSize f2_off    = ConstantPoolCacheEntry::f2_offset();
+  const ByteSize flags_off = ConstantPoolCacheEntry::flags_offset();
+  const int method_offset  = in_bytes(base_off + ((byte_no == f2_byte) ? f2_off : f1_off));
+  const int flags_offset   = in_bytes(base_off + flags_off);
+  // Access constant pool cache fields.
+  const int index_offset   = in_bytes(base_off + f2_off);
+
+  assert_different_registers(method, itable_index, flags, cache);
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+
+  if (is_invokevfinal) {
+    // Already resolved.
+     assert(itable_index == noreg, "register not used");
+     __ get_cache_and_index_at_bcp(cache, cpe_offset, 1);
+  } else {
+    // Need to resolve.
+    resolve_cache_and_index(byte_no, cache, cpe_offset, is_invokedynamic ? sizeof(u4) : sizeof(u2));
+  }
+  __ z_lg(method, Address(cache, cpe_offset, method_offset));
+
+  if (itable_index != noreg) {
+    __ z_lg(itable_index, Address(cache, cpe_offset, index_offset));
+  }
+
+  // Only load the lower 4 bytes and fill high bytes of flags with zeros.
+  // Callers depend on this zero-extension!!!
+  // Attention: overwrites cpe_offset == flags
+  __ z_llgf(flags, Address(cache, cpe_offset, flags_offset + (BytesPerLong-BytesPerInt)));
+
+  BLOCK_COMMENT("} load_invoke_cp_cache_entry");
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+
+  // Do the JVMTI work here to avoid disturbing the register state below.
+  // We use c_rarg registers here because we want to use the register used in
+  // the call to the VM
+  if (!JvmtiExport::can_post_field_access()) {
+    return;
+  }
+
+  // Check to see if a field access watch has been set before we
+  // take the time to call into the VM.
+  Label exit;
+  assert_different_registers(cache, index, Z_tos);
+  __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_access_count_addr());
+  __ load_and_test_int(Z_R0, Address(Z_tos));
+  __ z_brz(exit);
+
+  // Index is returned as byte offset, do not shift!
+  __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1);
+
+  // cache entry pointer
+  __ add2reg_with_index(Z_ARG3,
+                        in_bytes(ConstantPoolCache::base_offset()),
+                        Z_ARG3, Z_R1_scratch);
+
+  if (is_static) {
+    __ clear_reg(Z_ARG2, true, false); // NULL object reference. Don't set CC.
+  } else {
+    __ mem2reg_opt(Z_ARG2, at_tos());  // Get object pointer without popping it.
+    __ verify_oop(Z_ARG2);
+  }
+  // Z_ARG2: object pointer or NULL
+  // Z_ARG3: cache entry pointer
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+             Z_ARG2, Z_ARG3);
+  __ get_cache_and_index_at_bcp(cache, index, 1);
+
+  __ bind(exit);
+}
+
+void TemplateTable::pop_and_check_object(Register r) {
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+  transition(vtos, vtos);
+
+  const Register cache = Z_tmp_1;
+  const Register index = Z_tmp_2;
+  const Register obj   = Z_tmp_1;
+  const Register off   = Z_ARG2;
+  const Register flags = Z_ARG1;
+  const Register bc    = Z_tmp_1;  // Uses same reg as obj, so don't mix them.
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_access(cache, index, is_static, false);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  if (!is_static) {
+    // Obj is on the stack.
+    pop_and_check_object(obj);
+  }
+
+  // Displacement is 0, so any store instruction will be fine on any CPU.
+  const Address field(obj, off);
+
+  Label    is_Byte, is_Bool, is_Int, is_Short, is_Char,
+           is_Long, is_Float, is_Object, is_Double;
+  Label    is_badState8, is_badState9, is_badStateA, is_badStateB,
+           is_badStateC, is_badStateD, is_badStateE, is_badStateF,
+           is_badState;
+  Label    branchTable, atosHandler,  Done;
+  Register br_tab       = Z_R1_scratch;
+  bool     do_rewrite   = !is_static && (rc == may_rewrite);
+  bool     dont_rewrite = (is_static || (rc == may_not_rewrite));
+
+  assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that");
+  assert(btos == 0, "change code, btos != 0");
+
+  // Calculate branch table size. Generated code size depends on ASSERT and on bytecode rewriting.
+#ifdef ASSERT
+  const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#else
+  const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#endif
+
+  // Calculate address of branch table entry and branch there.
+  {
+    const int bit_shift = exact_log2(bsize); // Size of each branch table entry.
+    const int r_bitpos  = 63 - bit_shift;
+    const int l_bitpos  = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+    const int n_rotate  = (bit_shift-ConstantPoolCacheEntry::tos_state_shift);
+    __ z_larl(br_tab, branchTable);
+    __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true);
+  }
+  __ z_bc(Assembler::bcondAlways, 0, flags, br_tab);
+
+  __ align_address(bsize);
+  BIND(branchTable);
+
+  // btos
+  BTB_BEGIN(is_Byte, bsize, "getfield_or_static:is_Byte");
+  __ z_lb(Z_tos, field);
+  __ push(btos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Byte, bsize, "getfield_or_static:is_Byte");
+
+  // ztos
+  BTB_BEGIN(is_Bool, bsize, "getfield_or_static:is_Bool");
+  __ z_lb(Z_tos, field);
+  __ push(ztos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    // Use btos rewriting, no truncating to t/f bit is needed for getfield.
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Bool, bsize, "getfield_or_static:is_Bool");
+
+  // ctos
+  BTB_BEGIN(is_Char, bsize, "getfield_or_static:is_Char");
+  // Load into 64 bits, works on all CPUs.
+  __ z_llgh(Z_tos, field);
+  __ push(ctos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Char, bsize, "getfield_or_static:is_Char");
+
+  // stos
+  BTB_BEGIN(is_Short, bsize, "getfield_or_static:is_Short");
+  __ z_lh(Z_tos, field);
+  __ push(stos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Short, bsize, "getfield_or_static:is_Short");
+
+  // itos
+  BTB_BEGIN(is_Int, bsize, "getfield_or_static:is_Int");
+  __ mem2reg_opt(Z_tos, field, false);
+  __ push(itos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_igetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Int, bsize, "getfield_or_static:is_Int");
+
+  // ltos
+  BTB_BEGIN(is_Long, bsize, "getfield_or_static:is_Long");
+  __ mem2reg_opt(Z_tos, field);
+  __ push(ltos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_lgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Long, bsize, "getfield_or_static:is_Long");
+
+  // ftos
+  BTB_BEGIN(is_Float, bsize, "getfield_or_static:is_Float");
+  __ mem2freg_opt(Z_ftos, field, false);
+  __ push(ftos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Float, bsize, "getfield_or_static:is_Float");
+
+  // dtos
+  BTB_BEGIN(is_Double, bsize, "getfield_or_static:is_Double");
+  __ mem2freg_opt(Z_ftos, field);
+  __ push(dtos);
+  // Rewrite bytecode to be faster.
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, bc, Z_ARG5);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Double, bsize, "getfield_or_static:is_Double");
+
+  // atos
+  BTB_BEGIN(is_Object, bsize, "getfield_or_static:is_Object");
+  __ z_bru(atosHandler);
+  BTB_END(is_Object, bsize, "getfield_or_static:is_Object");
+
+  // Bad state detection comes at no extra runtime cost.
+  BTB_BEGIN(is_badState8, bsize, "getfield_or_static:is_badState8");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badState8, bsize, "getfield_or_static:is_badState8");
+  BTB_BEGIN(is_badState9, bsize, "getfield_or_static:is_badState9");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badState9, bsize, "getfield_or_static:is_badState9");
+  BTB_BEGIN(is_badStateA, bsize, "getfield_or_static:is_badStateA");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateA, bsize, "getfield_or_static:is_badStateA");
+  BTB_BEGIN(is_badStateB, bsize, "getfield_or_static:is_badStateB");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateB, bsize, "getfield_or_static:is_badStateB");
+  BTB_BEGIN(is_badStateC, bsize, "getfield_or_static:is_badStateC");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateC, bsize, "getfield_or_static:is_badStateC");
+  BTB_BEGIN(is_badStateD, bsize, "getfield_or_static:is_badStateD");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateD, bsize, "getfield_or_static:is_badStateD");
+  BTB_BEGIN(is_badStateE, bsize, "getfield_or_static:is_badStateE");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateE, bsize, "getfield_or_static:is_badStateE");
+  BTB_BEGIN(is_badStateF, bsize, "getfield_or_static:is_badStateF");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateF, bsize, "getfield_or_static:is_badStateF");
+
+  __ align_address(64);
+  BIND(is_badState);  // Do this outside branch table. Needs a lot of space.
+  {
+    unsigned int b_off = __ offset();
+    if (is_static) {
+      __ stop_static("Bad state in getstatic");
+    } else {
+      __ stop_static("Bad state in getfield");
+    }
+    unsigned int e_off = __ offset();
+  }
+
+  __ align_address(64);
+  BIND(atosHandler);  // Oops are really complicated to handle.
+                      // There is a lot of code generated.
+                      // Therefore: generate the handler outside of branch table.
+                      // There is no performance penalty. The additional branch
+                      // to here is compensated for by the fallthru to "Done".
+  {
+    unsigned int b_off = __ offset();
+    __ load_heap_oop(Z_tos, field);
+    __ verify_oop(Z_tos);
+    __ push(atos);
+    if (do_rewrite) {
+      patch_bytecode(Bytecodes::_fast_agetfield, bc, Z_ARG5);
+    }
+    unsigned int e_off = __ offset();
+  }
+
+  BIND(Done);
+}
+
+void TemplateTable::getfield(int byte_no) {
+  BLOCK_COMMENT("getfield  {");
+  getfield_or_static(byte_no, false);
+  BLOCK_COMMENT("} getfield");
+}
+
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+  BLOCK_COMMENT("getstatic {");
+  getfield_or_static(byte_no, true);
+  BLOCK_COMMENT("} getstatic");
+}
+
+// The registers cache and index expected to be set before call.  The
+// function may destroy various registers, just not the cache and
+// index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache,
+                                         Register index, bool is_static) {
+  transition(vtos, vtos);
+
+  if (!JvmtiExport::can_post_field_modification()) {
+    return;
+  }
+
+  BLOCK_COMMENT("jvmti_post_field_mod {");
+
+  // Check to see if a field modification watch has been set before
+  // we take the time to call into the VM.
+  Label    L1;
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+  assert_different_registers(cache, index, Z_tos);
+
+  __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_modification_count_addr());
+  __ load_and_test_int(Z_R0, Address(Z_tos));
+  __ z_brz(L1);
+
+  // Index is returned as byte offset, do not shift!
+  __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1);
+
+  if (is_static) {
+    // Life is simple. Null out the object pointer.
+    __ clear_reg(Z_ARG2, true, false);   // Don't set CC.
+  } else {
+    // Life is harder. The stack holds the value on top, followed by
+    // the object. We don't know the size of the value, though. It
+    // could be one or two words depending on its type. As a result,
+    // we must find the type to determine where the object is.
+    __ mem2reg_opt(Z_ARG4,
+                   Address(Z_ARG3, Z_R1_scratch,
+                           in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()) +
+                           (BytesPerLong - BytesPerInt)),
+                   false);
+    __ z_srl(Z_ARG4, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask Z_ARG4 for tos_state after the above shift.
+    ConstantPoolCacheEntry::verify_tos_state_shift();
+    __ mem2reg_opt(Z_ARG2, at_tos(1));  // Initially assume a one word jvalue.
+
+    NearLabel   load_dtos, cont;
+
+    __ compareU32_and_branch(Z_ARG4, (intptr_t) ltos,
+                              Assembler::bcondNotEqual, load_dtos);
+    __ mem2reg_opt(Z_ARG2, at_tos(2)); // ltos (two word jvalue)
+    __ z_bru(cont);
+
+    __ bind(load_dtos);
+    __ compareU32_and_branch(Z_ARG4, (intptr_t)dtos, Assembler::bcondNotEqual, cont);
+    __ mem2reg_opt(Z_ARG2, at_tos(2)); // dtos (two word jvalue)
+
+    __ bind(cont);
+  }
+  // cache entry pointer
+
+  __ add2reg_with_index(Z_ARG3, in_bytes(cp_base_offset), Z_ARG3, Z_R1_scratch);
+
+  // object(tos)
+  __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize));
+  // Z_ARG2: object pointer set up above (NULL if static)
+  // Z_ARG3: cache entry pointer
+  // Z_ARG4: jvalue object on the stack
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
+             Z_ARG2, Z_ARG3, Z_ARG4);
+  __ get_cache_and_index_at_bcp(cache, index, 1);
+
+  __ bind(L1);
+  BLOCK_COMMENT("} jvmti_post_field_mod");
+}
+
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+  transition(vtos, vtos);
+
+  const Register cache         = Z_tmp_1;
+  const Register index         = Z_ARG5;
+  const Register obj           = Z_tmp_1;
+  const Register off           = Z_tmp_2;
+  const Register flags         = Z_R1_scratch;
+  const Register br_tab        = Z_ARG5;
+  const Register bc            = Z_tmp_1;
+  const Register oopStore_tmp1 = Z_R1_scratch;
+  const Register oopStore_tmp2 = Z_ARG5;
+  const Register oopStore_tmp3 = Z_R0_scratch;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_mod(cache, index, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+  // begin of life for:
+  //   obj, off   long life range
+  //   flags      short life range, up to branch into branch table
+  // end of life for:
+  //   cache, index
+
+  const Address field(obj, off);
+  Label is_Byte, is_Bool, is_Int, is_Short, is_Char,
+        is_Long, is_Float, is_Object, is_Double;
+  Label is_badState8, is_badState9, is_badStateA, is_badStateB,
+        is_badStateC, is_badStateD, is_badStateE, is_badStateF,
+        is_badState;
+  Label branchTable, atosHandler, Done;
+  bool  do_rewrite   = !is_static && (rc == may_rewrite);
+  bool  dont_rewrite = (is_static || (rc == may_not_rewrite));
+
+  assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that");
+
+  assert(btos == 0, "change code, btos != 0");
+
+#ifdef ASSERT
+  const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#else
+  const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*8;
+#endif
+
+  // Calculate address of branch table entry and branch there.
+  {
+    const int bit_shift = exact_log2(bsize); // Size of each branch table entry.
+    const int r_bitpos  = 63 - bit_shift;
+    const int l_bitpos  = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+    const int n_rotate  = (bit_shift-ConstantPoolCacheEntry::tos_state_shift);
+    __ z_larl(br_tab, branchTable);
+    __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true);
+    __ z_bc(Assembler::bcondAlways, 0, flags, br_tab);
+  }
+  // end of life for:
+  //   flags, br_tab
+
+  __ align_address(bsize);
+  BIND(branchTable);
+
+  // btos
+  BTB_BEGIN(is_Byte, bsize, "putfield_or_static:is_Byte");
+  __ pop(btos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ z_stc(Z_tos, field);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_bputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Byte, bsize, "putfield_or_static:is_Byte");
+
+  // ztos
+  BTB_BEGIN(is_Bool, bsize, "putfield_or_static:is_Bool");
+  __ pop(ztos);
+  if (do_rewrite) {
+    pop_and_check_object(obj);
+  }
+  __ z_nilf(Z_tos, 0x1);
+  __ z_stc(Z_tos, field);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_zputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END(is_Bool, bsize, "putfield_or_static:is_Bool");
+
+  // ctos
+  BTB_BEGIN(is_Char, bsize, "putfield_or_static:is_Char");
+  __ pop(ctos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ z_sth(Z_tos, field);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_cputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Char, bsize, "putfield_or_static:is_Char");
+
+  // stos
+  BTB_BEGIN(is_Short, bsize, "putfield_or_static:is_Short");
+  __ pop(stos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ z_sth(Z_tos, field);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_sputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Short, bsize, "putfield_or_static:is_Short");
+
+  // itos
+  BTB_BEGIN(is_Int, bsize, "putfield_or_static:is_Int");
+  __ pop(itos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ reg2mem_opt(Z_tos, field, false);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_iputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Int, bsize, "putfield_or_static:is_Int");
+
+  // ltos
+  BTB_BEGIN(is_Long, bsize, "putfield_or_static:is_Long");
+  __ pop(ltos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ reg2mem_opt(Z_tos, field);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_lputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Long, bsize, "putfield_or_static:is_Long");
+
+  // ftos
+  BTB_BEGIN(is_Float, bsize, "putfield_or_static:is_Float");
+  __ pop(ftos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ freg2mem_opt(Z_ftos, field, false);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_fputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Float, bsize, "putfield_or_static:is_Float");
+
+  // dtos
+  BTB_BEGIN(is_Double, bsize, "putfield_or_static:is_Double");
+  __ pop(dtos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ freg2mem_opt(Z_ftos, field);
+  if (do_rewrite) {
+    patch_bytecode(Bytecodes::_fast_dputfield, bc, Z_ARG5, true, byte_no);
+  }
+  __ z_bru(Done);
+  BTB_END( is_Double, bsize, "putfield_or_static:is_Double");
+
+  // atos
+  BTB_BEGIN(is_Object, bsize, "putfield_or_static:is_Object");
+  __ z_bru(atosHandler);
+  BTB_END( is_Object, bsize, "putfield_or_static:is_Object");
+
+  // Bad state detection comes at no extra runtime cost.
+  BTB_BEGIN(is_badState8, bsize, "putfield_or_static:is_badState8");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badState8, bsize, "putfield_or_static:is_badState8");
+  BTB_BEGIN(is_badState9, bsize, "putfield_or_static:is_badState9");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badState9, bsize, "putfield_or_static:is_badState9");
+  BTB_BEGIN(is_badStateA, bsize, "putfield_or_static:is_badStateA");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateA, bsize, "putfield_or_static:is_badStateA");
+  BTB_BEGIN(is_badStateB, bsize, "putfield_or_static:is_badStateB");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateB, bsize, "putfield_or_static:is_badStateB");
+  BTB_BEGIN(is_badStateC, bsize, "putfield_or_static:is_badStateC");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateC, bsize, "putfield_or_static:is_badStateC");
+  BTB_BEGIN(is_badStateD, bsize, "putfield_or_static:is_badStateD");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateD, bsize, "putfield_or_static:is_badStateD");
+  BTB_BEGIN(is_badStateE, bsize, "putfield_or_static:is_badStateE");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateE, bsize, "putfield_or_static:is_badStateE");
+  BTB_BEGIN(is_badStateF, bsize, "putfield_or_static:is_badStateF");
+  __ z_illtrap();
+  __ z_bru(is_badState);
+  BTB_END( is_badStateF, bsize, "putfield_or_static:is_badStateF");
+
+  __ align_address(64);
+  BIND(is_badState);  // Do this outside branch table. Needs a lot of space.
+  {
+    unsigned int b_off = __ offset();
+    if (is_static) __ stop_static("Bad state in putstatic");
+    else            __ stop_static("Bad state in putfield");
+    unsigned int e_off = __ offset();
+  }
+
+  __ align_address(64);
+  BIND(atosHandler);  // Oops are really complicated to handle.
+                      // There is a lot of code generated.
+                      // Therefore: generate the handler outside of branch table.
+                      // There is no performance penalty. The additional branch
+                      // to here is compensated for by the fallthru to "Done".
+  {
+    unsigned int b_off = __ offset();
+    __ pop(atos);
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    // Store into the field
+    do_oop_store(_masm, obj, off, Z_tos, false,
+                 oopStore_tmp1, oopStore_tmp2, oopStore_tmp3, _bs->kind(), false);
+    if (do_rewrite) {
+      patch_bytecode(Bytecodes::_fast_aputfield, bc, Z_ARG5, true, byte_no);
+    }
+    // __ z_bru(Done); // fallthru
+    unsigned int e_off = __ offset();
+  }
+
+  BIND(Done);
+
+  // Check for volatile store.
+  Label notVolatile;
+
+  __ testbit(Z_ARG4, ConstantPoolCacheEntry::is_volatile_shift);
+  __ z_brz(notVolatile);
+  __ z_fence();
+
+  BIND(notVolatile);
+}
+
+void TemplateTable::putfield(int byte_no) {
+  BLOCK_COMMENT("putfield  {");
+  putfield_or_static(byte_no, false);
+  BLOCK_COMMENT("} putfield");
+}
+
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+  BLOCK_COMMENT("putstatic {");
+  putfield_or_static(byte_no, true);
+  BLOCK_COMMENT("} putstatic");
+}
+
+// Push the tos value back to the stack.
+// gc will find oops there and update.
+void TemplateTable::jvmti_post_fast_field_mod() {
+
+  if (!JvmtiExport::can_post_field_modification()) {
+    return;
+  }
+
+  // Check to see if a field modification watch has been set before
+  // we take the time to call into the VM.
+  Label   exit;
+
+  BLOCK_COMMENT("jvmti_post_fast_field_mod {");
+
+  __ load_absolute_address(Z_R1_scratch,
+                           (address) JvmtiExport::get_field_modification_count_addr());
+  __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch));
+  __ z_brz(exit);
+
+  Register obj = Z_tmp_1;
+
+  __ pop_ptr(obj);                  // Copy the object pointer from tos.
+  __ verify_oop(obj);
+  __ push_ptr(obj);                 // Put the object pointer back on tos.
+
+  // Save tos values before call_VM() clobbers them. Since we have
+  // to do it for every data type, we use the saved values as the
+  // jvalue object.
+  switch (bytecode()) {          // Load values into the jvalue object.
+    case Bytecodes::_fast_aputfield:
+      __ push_ptr(Z_tos);
+      break;
+    case Bytecodes::_fast_bputfield:
+    case Bytecodes::_fast_zputfield:
+    case Bytecodes::_fast_sputfield:
+    case Bytecodes::_fast_cputfield:
+    case Bytecodes::_fast_iputfield:
+      __ push_i(Z_tos);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ push_d();
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ push_f();
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ push_l(Z_tos);
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  // jvalue on the stack
+  __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize));
+  // Access constant pool cache entry.
+  __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tos, 1);
+  __ verify_oop(obj);
+
+  // obj   : object pointer copied above
+  // Z_ARG3: cache entry pointer
+  // Z_ARG4: jvalue object on the stack
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
+             obj, Z_ARG3, Z_ARG4);
+
+  switch (bytecode()) {             // Restore tos values.
+    case Bytecodes::_fast_aputfield:
+      __ pop_ptr(Z_tos);
+      break;
+    case Bytecodes::_fast_bputfield:
+    case Bytecodes::_fast_zputfield:
+    case Bytecodes::_fast_sputfield:
+    case Bytecodes::_fast_cputfield:
+    case Bytecodes::_fast_iputfield:
+      __ pop_i(Z_tos);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ pop_d(Z_ftos);
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ pop_f(Z_ftos);
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ pop_l(Z_tos);
+      break;
+  }
+
+  __ bind(exit);
+  BLOCK_COMMENT("} jvmti_post_fast_field_mod");
+}
+
+void TemplateTable::fast_storefield(TosState state) {
+  transition(state, vtos);
+
+  ByteSize base = ConstantPoolCache::base_offset();
+  jvmti_post_fast_field_mod();
+
+  // Access constant pool cache.
+  Register cache = Z_tmp_1;
+  Register index = Z_tmp_2;
+  Register flags = Z_ARG5;
+
+  // Index comes in bytes, don't shift afterwards!
+  __ get_cache_and_index_at_bcp(cache, index, 1);
+
+  // Test for volatile.
+  assert(!flags->is_volatile(), "do_oop_store could perform leaf RT call");
+  __ z_lg(flags, Address(cache, index, base + ConstantPoolCacheEntry::flags_offset()));
+
+  // Replace index with field offset from cache entry.
+  Register field_offset = index;
+  __ z_lg(field_offset, Address(cache, index, base + ConstantPoolCacheEntry::f2_offset()));
+
+  // Get object from stack.
+  Register   obj = cache;
+
+  pop_and_check_object(obj);
+
+  // field address
+  const Address   field(obj, field_offset);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_aputfield:
+      do_oop_store(_masm, obj, field_offset, Z_tos, false,
+                   Z_ARG2, Z_ARG3, Z_ARG4, _bs->kind(), false);
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ reg2mem_opt(Z_tos, field);
+      break;
+    case Bytecodes::_fast_iputfield:
+      __ reg2mem_opt(Z_tos, field, false);
+      break;
+    case Bytecodes::_fast_zputfield:
+      __ z_nilf(Z_tos, 0x1);
+      // fall through to bputfield
+    case Bytecodes::_fast_bputfield:
+      __ z_stc(Z_tos, field);
+      break;
+    case Bytecodes::_fast_sputfield:
+      // fall through
+    case Bytecodes::_fast_cputfield:
+      __ z_sth(Z_tos, field);
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ freg2mem_opt(Z_ftos, field, false);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ freg2mem_opt(Z_ftos, field);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  //  Check for volatile store.
+  Label notVolatile;
+
+  __ testbit(flags, ConstantPoolCacheEntry::is_volatile_shift);
+  __ z_brz(notVolatile);
+  __ z_fence();
+
+  __ bind(notVolatile);
+}
+
+void TemplateTable::fast_accessfield(TosState state) {
+  transition(atos, state);
+
+  Register obj = Z_tos;
+
+  // Do the JVMTI work here to avoid disturbing the register state below
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label cont;
+
+    __ load_absolute_address(Z_R1_scratch,
+                             (address)JvmtiExport::get_field_access_count_addr());
+    __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch));
+    __ z_brz(cont);
+
+    // Access constant pool cache entry.
+
+    __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tmp_1, 1);
+    __ verify_oop(obj);
+    __ push_ptr(obj);  // Save object pointer before call_VM() clobbers it.
+    __ z_lgr(Z_ARG2, obj);
+
+    // Z_ARG2: object pointer copied above
+    // Z_ARG3: cache entry pointer
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+               Z_ARG2, Z_ARG3);
+    __ pop_ptr(obj); // Restore object pointer.
+
+    __ bind(cont);
+  }
+
+  // Access constant pool cache.
+  Register   cache = Z_tmp_1;
+  Register   index = Z_tmp_2;
+
+  // Index comes in bytes, don't shift afterwards!
+  __ get_cache_and_index_at_bcp(cache, index, 1);
+  // Replace index with field offset from cache entry.
+  __ mem2reg_opt(index,
+                 Address(cache, index,
+                         ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  __ verify_oop(obj);
+  __ null_check(obj);
+
+  Address field(obj, index);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_agetfield:
+      __ load_heap_oop(Z_tos, field);
+      __ verify_oop(Z_tos);
+      return;
+    case Bytecodes::_fast_lgetfield:
+      __ mem2reg_opt(Z_tos, field);
+      return;
+    case Bytecodes::_fast_igetfield:
+      __ mem2reg_opt(Z_tos, field, false);
+      return;
+    case Bytecodes::_fast_bgetfield:
+      __ z_lb(Z_tos, field);
+      return;
+    case Bytecodes::_fast_sgetfield:
+      __ z_lh(Z_tos, field);
+      return;
+    case Bytecodes::_fast_cgetfield:
+      __ z_llgh(Z_tos, field);   // Load into 64 bits, works on all CPUs.
+      return;
+    case Bytecodes::_fast_fgetfield:
+      __ mem2freg_opt(Z_ftos, field, false);
+      return;
+    case Bytecodes::_fast_dgetfield:
+      __ mem2freg_opt(Z_ftos, field);
+      return;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::fast_xaccess(TosState state) {
+  transition(vtos, state);
+
+  Register receiver = Z_tos;
+  // Get receiver.
+  __ mem2reg_opt(Z_tos, aaddress(0));
+
+  // Access constant pool cache.
+  Register cache = Z_tmp_1;
+  Register index = Z_tmp_2;
+
+  // Index comes in bytes, don't shift afterwards!
+  __ get_cache_and_index_at_bcp(cache, index, 2);
+  // Replace index with field offset from cache entry.
+  __ mem2reg_opt(index,
+                 Address(cache, index,
+                         ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  // Make sure exception is reported in correct bcp range (getfield is
+  // next instruction).
+  __ add2reg(Z_bcp, 1);
+  __ null_check(receiver);
+  switch (state) {
+    case itos:
+      __ mem2reg_opt(Z_tos, Address(receiver, index), false);
+      break;
+    case atos:
+      __ load_heap_oop(Z_tos, Address(receiver, index));
+      __ verify_oop(Z_tos);
+      break;
+    case ftos:
+      __ mem2freg_opt(Z_ftos, Address(receiver, index));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  // Reset bcp to original position.
+  __ add2reg(Z_bcp, -1);
+}
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // If caller wants to see it.
+                                   Register flags) { // If caller wants to test it.
+  // Determine flags.
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+
+  // Setup registers & access constant pool cache.
+  if (recv  == noreg) { recv  = Z_ARG1; }
+  if (flags == noreg) { flags = Z_ARG2; }
+  assert_different_registers(method, Z_R14, index, recv, flags);
+
+  BLOCK_COMMENT("prepare_invoke {");
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  // Maybe push appendix to arguments.
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    Register resolved_reference = Z_R1_scratch;
+    __ testbit(flags, ConstantPoolCacheEntry::has_appendix_shift);
+    __ z_bfalse(L_no_push);
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ load_resolved_reference_at_index(resolved_reference, index);
+    __ verify_oop(resolved_reference);
+    __ push_ptr(resolved_reference);  // Push appendix (MethodType, CallSite, etc.).
+    __ bind(L_no_push);
+  }
+
+  // Load receiver if needed (after appendix is pushed so parameter size is correct).
+  if (load_receiver) {
+    assert(!is_invokedynamic, "");
+    // recv := int2long(flags & ConstantPoolCacheEntry::parameter_size_mask) << 3
+    // Flags is zero-extended int2long when loaded during load_invoke_cp_cache_entry().
+    // Only the least significant byte (psize) of flags is used.
+    {
+      const unsigned int logSES = Interpreter::logStackElementSize;
+      const int bit_shift = logSES;
+      const int r_bitpos  = 63 - bit_shift;
+      const int l_bitpos  = r_bitpos - ConstantPoolCacheEntry::parameter_size_bits + 1;
+      const int n_rotate  = bit_shift;
+      assert(ConstantPoolCacheEntry::parameter_size_mask == 255, "adapt bitpositions");
+      __ rotate_then_insert(recv, flags, l_bitpos, r_bitpos, n_rotate, true);
+    }
+    // Recv now contains #arguments * StackElementSize.
+
+    Address recv_addr(Z_esp, recv);
+    __ z_lg(recv, recv_addr);
+    __ verify_oop(recv);
+  }
+
+  // Compute return type.
+  // ret_type is used by callers (invokespecial, invokestatic) at least.
+  Register ret_type = Z_R1_scratch;
+  assert_different_registers(ret_type, method);
+
+  const address table_addr = (address)Interpreter::invoke_return_entry_table_for(code);
+  __ load_absolute_address(Z_R14, table_addr);
+
+  {
+    const int bit_shift = LogBytesPerWord;           // Size of each table entry.
+    const int r_bitpos  = 63 - bit_shift;
+    const int l_bitpos  = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+    const int n_rotate  = bit_shift-ConstantPoolCacheEntry::tos_state_shift;
+    __ rotate_then_insert(ret_type, flags, l_bitpos, r_bitpos, n_rotate, true);
+    // Make sure we don't need to mask flags for tos_state after the above shift.
+    ConstantPoolCacheEntry::verify_tos_state_shift();
+  }
+
+    __ z_lg(Z_R14, Address(Z_R14, ret_type)); // Load return address.
+  BLOCK_COMMENT("} prepare_invoke");
+}
+
+
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags) {
+  // Uses temporary registers Z_tmp_2, Z_ARG4.
+  assert_different_registers(index, recv, Z_tmp_2, Z_ARG4);
+
+  // Test for an invoke of a final method.
+  Label notFinal;
+
+  BLOCK_COMMENT("invokevirtual_helper {");
+
+  __ testbit(flags, ConstantPoolCacheEntry::is_vfinal_shift);
+  __ z_brz(notFinal);
+
+  const Register method = index;  // Method must be Z_ARG3.
+  assert(method == Z_ARG3, "method must be second argument for interpreter calling convention");
+
+  // Do the call - the index is actually the method to call.
+  // That is, f2 is a vtable index if !is_vfinal, else f2 is a method.
+
+  // It's final, need a null check here!
+  __ null_check(recv);
+
+  // Profile this call.
+  __ profile_final_call(Z_tmp_2);
+  __ profile_arguments_type(Z_tmp_2, method, Z_ARG5, true); // Argument type profiling.
+  __ jump_from_interpreted(method, Z_tmp_2);
+
+  __ bind(notFinal);
+
+  // Get receiver klass.
+  __ null_check(recv, Z_R0_scratch, oopDesc::klass_offset_in_bytes());
+  __ load_klass(Z_tmp_2, recv);
+
+  // Profile this call.
+  __ profile_virtual_call(Z_tmp_2, Z_ARG4, Z_ARG5);
+
+  // Get target method & entry point.
+  __ z_sllg(index, index, exact_log2(vtableEntry::size_in_bytes()));
+  __ mem2reg_opt(method,
+                 Address(Z_tmp_2, index,
+                         InstanceKlass::vtable_start_offset() + in_ByteSize(vtableEntry::method_offset_in_bytes())));
+  __ profile_arguments_type(Z_ARG4, method, Z_ARG5, true);
+  __ jump_from_interpreted(method, Z_ARG4);
+  BLOCK_COMMENT("} invokevirtual_helper");
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+  transition(vtos, vtos);
+
+  assert(byte_no == f2_byte, "use this argument");
+  prepare_invoke(byte_no,
+                 Z_ARG3,  // method or vtable index
+                 noreg,   // unused itable index
+                 Z_ARG1,  // recv
+                 Z_ARG2); // flags
+
+  // Z_ARG3 : index
+  // Z_ARG1 : receiver
+  // Z_ARG2 : flags
+  invokevirtual_helper(Z_ARG3, Z_ARG1, Z_ARG2);
+}
+
+void TemplateTable::invokespecial(int byte_no) {
+  transition(vtos, vtos);
+
+  assert(byte_no == f1_byte, "use this argument");
+  Register Rmethod = Z_tmp_2;
+  prepare_invoke(byte_no, Rmethod, noreg, // Get f1 method.
+                 Z_ARG3);   // Get receiver also for null check.
+  __ verify_oop(Z_ARG3);
+  __ null_check(Z_ARG3);
+  // Do the call.
+  __ profile_call(Z_ARG2);
+  __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+  __ jump_from_interpreted(Rmethod, Z_R1_scratch);
+}
+
+void TemplateTable::invokestatic(int byte_no) {
+  transition(vtos, vtos);
+
+  assert(byte_no == f1_byte, "use this argument");
+  Register Rmethod = Z_tmp_2;
+  prepare_invoke(byte_no, Rmethod);   // Get f1 method.
+  // Do the call.
+  __ profile_call(Z_ARG2);
+  __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+  __ jump_from_interpreted(Rmethod, Z_R1_scratch);
+}
+
+// Outdated feature, and we don't support it.
+void TemplateTable::fast_invokevfinal(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+  __ stop("fast_invokevfinal not used on linuxs390x");
+}
+
+void TemplateTable::invokeinterface(int byte_no) {
+  transition(vtos, vtos);
+
+  assert(byte_no == f1_byte, "use this argument");
+  Register interface = Z_tos;
+  Register index = Z_ARG3;
+  Register receiver = Z_tmp_1;
+  Register flags = Z_ARG5;
+
+  BLOCK_COMMENT("invokeinterface {");
+
+  // Destroys Z_ARG1 and Z_ARG2, thus use Z_ARG4 and copy afterwards.
+  prepare_invoke(byte_no, Z_ARG4, index,  // Get f1 klassOop, f2 itable index.
+                 receiver, flags);
+
+  // Z_R14 (== Z_bytecode) : return entry
+
+  __ z_lgr(interface, Z_ARG4);
+
+  // Special case of invokeinterface called for virtual method of
+  // java.lang.Object. See cpCacheOop.cpp for details.
+  // This code isn't produced by javac, but could be produced by
+  // another compliant java compiler.
+  Label notMethod;
+  __ testbit(flags, ConstantPoolCacheEntry::is_forced_virtual_shift);
+  __ z_brz(notMethod);
+  invokevirtual_helper(index, receiver, flags);
+  __ bind(notMethod);
+
+  // Get receiver klass into klass - also a null check.
+  Register klass = flags;
+
+  __ restore_locals();
+  __ load_klass(klass, receiver);
+
+  // Profile this call.
+  __ profile_virtual_call(klass, Z_ARG2/*mdp*/, Z_ARG4/*scratch*/);
+
+  NearLabel  no_such_interface, no_such_method;
+  Register   method = Z_tmp_2;
+
+  // TK 2010-08-24: save the index to Z_ARG4. needed in case of an error
+  //                in throw_AbstractMethodErrorByTemplateTable
+  __ z_lgr(Z_ARG4, index);
+  // TK 2011-03-24: copy also klass because it could be changed in
+  //                lookup_interface_method
+  __ z_lgr(Z_ARG2, klass);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                              klass, interface, index,
+                              // outputs: method, scan temp. reg
+                              method, Z_tmp_2, Z_R1_scratch,
+                              no_such_interface);
+
+  // Check for abstract method error.
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  // interpreter entry point and a conditional jump to it in case of a null
+  // method.
+  __ compareU64_and_branch(method, (intptr_t) 0,
+                            Assembler::bcondZero, no_such_method);
+
+  __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true);
+
+  // Do the call.
+  __ jump_from_interpreted(method, Z_ARG5);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // Note: Must restore interpreter registers to canonical
+  // state for exception handling to work correctly!
+
+  __ bind(no_such_method);
+
+  // Throw exception.
+  __ restore_bcp();      // Bcp must be correct for exception handler   (was destroyed).
+  __ restore_locals();   // Make sure locals pointer is correct as well (was destroyed).
+  // TK 2010-08-24: Call throw_AbstractMethodErrorByTemplateTable now with the
+  //                relevant information for generating a better error message
+  __ call_VM(noreg,
+              CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_AbstractMethodError),
+              Z_ARG2, interface, Z_ARG4);
+  // The call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+
+  // Throw exception.
+  __ restore_bcp();      // Bcp must be correct for exception handler   (was destroyed).
+  __ restore_locals();   // Make sure locals pointer is correct as well (was destroyed).
+  // TK 2010-08-24: Call throw_IncompatibleClassChangeErrorByTemplateTable now with the
+  //                relevant information for generating a better error message
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::throw_IncompatibleClassChangeError),
+             Z_ARG2, interface);
+  // The call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  BLOCK_COMMENT("} invokeinterface");
+  return;
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+
+  const Register method = Z_tmp_2;
+  const Register recv   = Z_ARG5;
+  const Register mtype  = Z_tmp_1;
+  prepare_invoke(byte_no,
+                 method, mtype,   // Get f2 method, f1 MethodType.
+                 recv);
+  __ verify_method_ptr(method);
+  __ verify_oop(recv);
+  __ null_check(recv);
+
+  // Note: Mtype is already pushed (if necessary) by prepare_invoke.
+
+  // FIXME: profile the LambdaForm also.
+  __ profile_final_call(Z_ARG2);
+  __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true);
+
+  __ jump_from_interpreted(method, Z_ARG3);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+  transition(vtos, vtos);
+
+  const Register Rmethod   = Z_tmp_2;
+  const Register Rcallsite = Z_tmp_1;
+
+  prepare_invoke(byte_no, Rmethod, Rcallsite);
+
+  // Rmethod: CallSite object (from f1)
+  // Rcallsite: MH.linkToCallSite method (from f2)
+
+  // Note: Callsite is already pushed by prepare_invoke.
+
+  // TODO: should make a type profile for any invokedynamic that takes a ref argument.
+  // Profile this call.
+  __ profile_call(Z_ARG2);
+  __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+  __ jump_from_interpreted(Rmethod, Z_ARG2);
+}
+
+//-----------------------------------------------------------------------------
+// Allocation
+
+// Original comment on "allow_shared_alloc":
+// Always go the slow path.
+//  + Eliminated optimization within the template-based interpreter:
+//    If an allocation is done within the interpreter without using
+//    tlabs, the interpreter tries to do the allocation directly
+//    on the heap.
+//  + That means the profiling hooks are not considered and allocations
+//    get lost for the profiling framework.
+//  + However, we do not think that this optimization is really needed,
+//    so we always go now the slow path through the VM in this case --
+//    spec jbb2005 shows no measurable performance degradation.
+void TemplateTable::_new() {
+  transition(vtos, atos);
+  address prev_instr_address = NULL;
+  Register tags  = Z_tmp_1;
+  Register RallocatedObject   = Z_tos;
+  Register cpool = Z_ARG2;
+  Register tmp = Z_ARG3; // RobjectFields==tmp and Rsize==offset must be a register pair.
+  Register offset = Z_ARG4;
+  Label slow_case;
+  Label done;
+  Label initialize_header;
+  Label initialize_object; // Including clearing the fields.
+  Label allocate_shared;
+
+  BLOCK_COMMENT("TemplateTable::_new {");
+  __ get_2_byte_integer_at_bcp(offset/*dest*/, 1, InterpreterMacroAssembler::Unsigned);
+  __ get_cpool_and_tags(cpool, tags);
+  // Make sure the class we're about to instantiate has been resolved.
+  // This is done before loading InstanceKlass to be consistent with the order
+  // how Constant Pool is updated (see ConstantPool::klass_at_put).
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+  __ load_address(tmp, Address(tags, offset, tags_offset));
+  __ z_cli(0, tmp, JVM_CONSTANT_Class);
+  __ z_brne(slow_case);
+
+  __ z_sllg(offset, offset, LogBytesPerWord); // Convert to to offset.
+  // Get InstanceKlass.
+  Register iklass = cpool;
+  __ z_lg(iklass, Address(cpool, offset, sizeof(ConstantPool)));
+
+  // Make sure klass is initialized & doesn't have finalizer.
+  // Make sure klass is fully initialized.
+  const int state_offset = in_bytes(InstanceKlass::init_state_offset());
+  if (Immediate::is_uimm12(state_offset)) {
+    __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized);
+  } else {
+    __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized);
+  }
+  __ z_brne(slow_case);
+
+  // Get instance_size in InstanceKlass (scaled to a count of bytes).
+  Register Rsize = offset;
+  const int mask = 1 << Klass::_lh_instance_slow_path_bit;
+  __ z_llgf(Rsize, Address(iklass, Klass::layout_helper_offset()));
+  __ z_tmll(Rsize, mask);
+  __ z_btrue(slow_case);
+
+  // Allocate the instance
+  // 1) Try to allocate in the TLAB.
+  // 2) If fail and the object is large allocate in the shared Eden.
+  // 3) If the above fails (or is not applicable), go to a slow case
+  // (creates a new TLAB, etc.).
+
+  // Always go the slow path. See comment above this template.
+  const bool allow_shared_alloc = false;
+
+  if (UseTLAB) {
+    Register RoldTopValue = RallocatedObject;
+    Register RnewTopValue = tmp;
+    __ z_lg(RoldTopValue, Address(Z_thread, JavaThread::tlab_top_offset()));
+    __ load_address(RnewTopValue, Address(RoldTopValue, Rsize));
+    __ z_cg(RnewTopValue, Address(Z_thread, JavaThread::tlab_end_offset()));
+    __ z_brh(allow_shared_alloc ? allocate_shared : slow_case);
+    __ z_stg(RnewTopValue, Address(Z_thread, JavaThread::tlab_top_offset()));
+    if (ZeroTLAB) {
+      // The fields have been already cleared.
+      __ z_bru(initialize_header);
+    } else {
+      // Initialize both the header and fields.
+      if (allow_shared_alloc) {
+        __ z_bru(initialize_object);
+      } else {
+        // Fallthrough to initialize_object, but assert that it is on fall through path.
+        prev_instr_address = __ pc();
+      }
+    }
+  }
+
+  if (allow_shared_alloc) {
+    // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it.
+    Unimplemented();
+  }
+
+  if (UseTLAB) {
+    Register RobjectFields = tmp;
+    Register Rzero = Z_R1_scratch;
+
+    assert(ZeroTLAB || prev_instr_address == __ pc(),
+           "must not omit jump to initialize_object above, as it is not on the fall through path");
+    __ clear_reg(Rzero, true /*whole reg*/, false); // Load 0L into Rzero. Don't set CC.
+
+    // The object is initialized before the header. If the object size is
+    // zero, go directly to the header initialization.
+    __ bind(initialize_object);
+    __ z_aghi(Rsize, (int)-sizeof(oopDesc)); // Subtract header size, set CC.
+    __ z_bre(initialize_header);             // Jump if size of fields is zero.
+
+    // Initialize object fields.
+    // See documentation for MVCLE instruction!!!
+    assert(RobjectFields->encoding() % 2 == 0, "RobjectFields must be an even register");
+    assert(Rsize->encoding() == (RobjectFields->encoding()+1),
+           "RobjectFields and Rsize must be a register pair");
+    assert(Rzero->encoding() % 2 == 1, "Rzero must be an odd register");
+
+    // Set Rzero to 0 and use it as src length, then mvcle will copy nothing
+    // and fill the object with the padding value 0.
+    __ add2reg(RobjectFields, sizeof(oopDesc), RallocatedObject);
+    __ move_long_ext(RobjectFields, as_Register(Rzero->encoding() - 1), 0);
+
+    // Initialize object header only.
+    __ bind(initialize_header);
+    if (UseBiasedLocking) {
+      Register prototype = RobjectFields;
+      __ z_lg(prototype, Address(iklass, Klass::prototype_header_offset()));
+      __ z_stg(prototype, Address(RallocatedObject, oopDesc::mark_offset_in_bytes()));
+    } else {
+      __ store_const(Address(RallocatedObject, oopDesc::mark_offset_in_bytes()),
+                     (long)markOopDesc::prototype());
+    }
+
+    __ store_klass_gap(Rzero, RallocatedObject);  // Zero klass gap for compressed oops.
+    __ store_klass(iklass, RallocatedObject);     // Store klass last.
+
+    {
+      SkipIfEqual skip(_masm, &DTraceAllocProbes, false, Z_ARG5 /*scratch*/);
+      // Trigger dtrace event for fastpath.
+      __ push(atos); // Save the return value.
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), RallocatedObject);
+      __ pop(atos); // Restore the return value.
+    }
+    __ z_bru(done);
+  }
+
+  // slow case
+  __ bind(slow_case);
+  __ get_constant_pool(Z_ARG2);
+  __ get_2_byte_integer_at_bcp(Z_ARG3/*dest*/, 1, InterpreterMacroAssembler::Unsigned);
+  call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Z_ARG2, Z_ARG3);
+  __ verify_oop(Z_tos);
+
+  // continue
+  __ bind(done);
+
+  BLOCK_COMMENT("} TemplateTable::_new");
+}
+
+void TemplateTable::newarray() {
+  transition(itos, atos);
+
+  // Call runtime.
+  __ z_llgc(Z_ARG2, at_bcp(1));   // type
+  // size in Z_tos
+  call_VM(Z_RET,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
+          Z_ARG2, Z_tos);
+}
+
+void TemplateTable::anewarray() {
+  transition(itos, atos);
+  __ get_2_byte_integer_at_bcp(Z_ARG3, 1, InterpreterMacroAssembler::Unsigned);
+  __ get_constant_pool(Z_ARG2);
+  __ z_llgfr(Z_ARG4, Z_tos);
+  call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
+          Z_ARG2, Z_ARG3, Z_ARG4);
+}
+
+void TemplateTable::arraylength() {
+  transition(atos, itos);
+
+  int offset = arrayOopDesc::length_offset_in_bytes();
+
+  __ null_check(Z_tos, Z_R0_scratch, offset);
+  __ mem2reg_opt(Z_tos, Address(Z_tos, offset), false);
+}
+
+void TemplateTable::checkcast() {
+  transition(atos, atos);
+
+  NearLabel done, is_null, ok_is_subtype, quicked, resolved;
+
+  BLOCK_COMMENT("checkcast {");
+  // If object is NULL, we are almost done.
+  __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null);
+
+  // Get cpool & tags index.
+  Register cpool = Z_tmp_1;
+  Register tags = Z_tmp_2;
+  Register index = Z_ARG5;
+
+  __ get_cpool_and_tags(cpool, tags);
+  __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned);
+  // See if bytecode has already been quicked.
+  // Note: For CLI, we would have to add the index to the tags pointer first,
+  // thus load and compare in a "classic" manner.
+  __ z_llgc(Z_R0_scratch,
+            Address(tags, index, Array<u1>::base_offset_in_bytes()));
+  __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class,
+                           Assembler::bcondEqual, quicked);
+
+  __ push(atos); // Save receiver for result, and for GC.
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(Z_tos);
+
+  Register   receiver = Z_ARG4;
+  Register   klass = Z_tos;
+  Register   subklass = Z_ARG5;
+
+  __ pop_ptr(receiver); // restore receiver
+  __ z_bru(resolved);
+
+  // Get superklass in klass and subklass in subklass.
+  __ bind(quicked);
+
+  __ z_lgr(Z_ARG4, Z_tos);  // Save receiver.
+  __ z_sllg(index, index, LogBytesPerWord);  // index2bytes for addressing
+  __ mem2reg_opt(klass, Address(cpool, index, sizeof(ConstantPool)));
+
+  __ bind(resolved);
+
+  __ load_klass(subklass, receiver);
+
+  // Generate subtype check. Object in receiver.
+  // Superklass in klass. Subklass in subklass.
+  __ gen_subtype_check(subklass, klass, Z_ARG3, Z_tmp_1, ok_is_subtype);
+
+  // Come here on failure.
+  __ push_ptr(receiver);
+  // Object is at TOS, target klass oop expected in rax by convention.
+  __ z_brul((address) Interpreter::_throw_ClassCastException_entry);
+
+  // Come here on success.
+  __ bind(ok_is_subtype);
+
+  __ z_lgr(Z_tos, receiver); // Restore object.
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ z_bru(done);
+    __ bind(is_null);
+    __ profile_null_seen(Z_tmp_1);
+  } else {
+    __ bind(is_null);   // Same as 'done'.
+  }
+
+  __ bind(done);
+  BLOCK_COMMENT("} checkcast");
+}
+
+void TemplateTable::instanceof() {
+  transition(atos, itos);
+
+  NearLabel done, is_null, ok_is_subtype, quicked, resolved;
+
+  BLOCK_COMMENT("instanceof {");
+  // If object is NULL, we are almost done.
+  __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null);
+
+  // Get cpool & tags index.
+  Register cpool = Z_tmp_1;
+  Register tags = Z_tmp_2;
+  Register index = Z_ARG5;
+
+  __ get_cpool_and_tags(cpool, tags);
+  __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned);
+  // See if bytecode has already been quicked.
+  // Note: For CLI, we would have to add the index to the tags pointer first,
+  // thus load and compare in a "classic" manner.
+  __ z_llgc(Z_R0_scratch,
+            Address(tags, index, Array<u1>::base_offset_in_bytes()));
+  __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class, Assembler::bcondEqual, quicked);
+
+  __ push(atos); // Save receiver for result, and for GC.
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(Z_tos);
+
+  Register receiver = Z_tmp_2;
+  Register klass = Z_tos;
+  Register subklass = Z_tmp_2;
+
+  __ pop_ptr(receiver); // Restore receiver.
+  __ verify_oop(receiver);
+  __ load_klass(subklass, subklass);
+  __ z_bru(resolved);
+
+  // Get superklass in klass and subklass in subklass.
+  __ bind(quicked);
+
+  __ load_klass(subklass, Z_tos);
+  __ z_sllg(index, index, LogBytesPerWord);  // index2bytes for addressing
+  __ mem2reg_opt(klass,
+                 Address(cpool, index, sizeof(ConstantPool)));
+
+  __ bind(resolved);
+
+  // Generate subtype check.
+  // Superklass in klass. Subklass in subklass.
+  __ gen_subtype_check(subklass, klass, Z_ARG4, Z_ARG5, ok_is_subtype);
+
+  // Come here on failure.
+  __ clear_reg(Z_tos, true, false);
+  __ z_bru(done);
+
+  // Come here on success.
+  __ bind(ok_is_subtype);
+  __ load_const_optimized(Z_tos, 1);
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ z_bru(done);
+    __ bind(is_null);
+    __ profile_null_seen(Z_tmp_1);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+
+  __ bind(done);
+  // tos = 0: obj == NULL or  obj is not an instanceof the specified klass
+  // tos = 1: obj != NULL and obj is     an instanceof the specified klass
+  BLOCK_COMMENT("} instanceof");
+}
+
+//-----------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+
+  // Note: We get here even if we are single stepping.
+  // Jbug insists on setting breakpoints at every bytecode
+  // even if we are in single step mode.
+
+  transition(vtos, vtos);
+
+  // Get the unpatched byte code.
+  __ get_method(Z_ARG2);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at),
+             Z_ARG2, Z_bcp);
+  // Save the result to a register that is preserved over C-function calls.
+  __ z_lgr(Z_tmp_1, Z_RET);
+
+  // Post the breakpoint event.
+  __ get_method(Z_ARG2);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
+             Z_ARG2, Z_bcp);
+
+  // Must restore the bytecode, because call_VM destroys Z_bytecode.
+  __ z_lgr(Z_bytecode, Z_tmp_1);
+
+  // Complete the execution of original bytecode.
+  __ dispatch_only_normal(vtos);
+}
+
+
+// Exceptions
+
+void TemplateTable::athrow() {
+  transition(atos, vtos);
+  __ null_check(Z_tos);
+  __ load_absolute_address(Z_ARG2, Interpreter::throw_exception_entry());
+  __ z_br(Z_ARG2);
+}
+
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+//               callers_sp        <- Z_SP (callers_sp == Z_fp (own fp))
+//               return_pc
+//               [rest of ABI_160]
+//              /slot o:   free
+//             / ...       free
+//       oper. | slot n+1: free    <- Z_esp points to first free slot
+//       stack | slot n:   val                      caches IJAVA_STATE.esp
+//             | ...
+//              \slot 0:   val
+//              /slot m            <- IJAVA_STATE.monitors = monitor block top
+//             | ...
+//     monitors| slot 2
+//             | slot 1
+//              \slot 0
+//              /slot l            <- monitor block bot
+// ijava_state | ...
+//             | slot 2
+//              \slot 0
+//                                 <- Z_fp
+void TemplateTable::monitorenter() {
+  transition(atos, vtos);
+
+  BLOCK_COMMENT("monitorenter {");
+
+  // Check for NULL object.
+  __ null_check(Z_tos);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+  NearLabel allocated;
+  // Initialize entry pointer.
+  const Register Rfree_slot = Z_tmp_1;
+  __ clear_reg(Rfree_slot, true, false); // Points to free slot or NULL. Don't set CC.
+
+  // Find a free slot in the monitor block from top to bot (result in Rfree_slot).
+  {
+    const Register Rcurr_monitor = Z_ARG2;
+    const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block.
+    const Register Rlocked_obj = Z_ARG4;
+    NearLabel loop, exit, not_free;
+    // Starting with top-most entry.
+    __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors
+    __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp);
+
+#ifdef ASSERT
+    address reentry = NULL;
+    { NearLabel ok;
+      __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok);
+      reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom");
+      __ bind(ok);
+    }
+    { NearLabel ok;
+      __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok);
+      reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp");
+      __ bind(ok);
+    }
+#endif
+
+    // Check if bottom reached, i.e. if there is at least one monitor.
+    __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, exit);
+
+    __ bind(loop);
+    // Check if current entry is used.
+    __ load_and_test_long(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes()));
+    __ z_brne(not_free);
+    // If not used then remember entry in Rfree_slot.
+    __ z_lgr(Rfree_slot, Rcurr_monitor);
+    __ bind(not_free);
+    // Exit if current entry is for same object; this guarantees, that new monitor
+    // used for recursive lock is above the older one.
+    __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, exit);
+    // otherwise advance to next entry
+    __ add2reg(Rcurr_monitor, entry_size);
+    // Check if bottom reached, if not at bottom then check this entry.
+    __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop);
+    __ bind(exit);
+  }
+
+  // Rfree_slot != NULL -> found one
+  __ compareU64_and_branch(Rfree_slot, (intptr_t)0L, Assembler::bcondNotEqual, allocated);
+
+  // Allocate one if there's no free slot.
+  __ add_monitor_to_stack(false, Z_ARG3, Z_ARG4, Z_ARG5);
+  __ get_monitors(Rfree_slot);
+
+  // Rfree_slot: points to monitor entry.
+  __ bind(allocated);
+
+  // Increment bcp to point to the next bytecode, so exception
+  // handling for async. exceptions work correctly.
+  // The object has already been poped from the stack, so the
+  // expression stack looks correct.
+  __ add2reg(Z_bcp, 1, Z_bcp);
+
+  // Store object.
+  __ z_stg(Z_tos, BasicObjectLock::obj_offset_in_bytes(), Rfree_slot);
+  __ lock_object(Rfree_slot, Z_tos);
+
+  // Check to make sure this monitor doesn't cause stack overflow after locking.
+  __ save_bcp();  // in case of exception
+  __ generate_stack_overflow_check(0);
+
+  // The bcp has already been incremented. Just need to dispatch to
+  // next instruction.
+  __ dispatch_next(vtos);
+
+  BLOCK_COMMENT("} monitorenter");
+}
+
+
+void TemplateTable::monitorexit() {
+  transition(atos, vtos);
+
+  BLOCK_COMMENT("monitorexit {");
+
+  // Check for NULL object.
+  __ null_check(Z_tos);
+
+  NearLabel found, not_found;
+  const Register Rcurr_monitor = Z_ARG2;
+
+  // Find matching slot.
+  {
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    NearLabel entry, loop;
+
+    const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block.
+    const Register Rlocked_obj = Z_ARG4;
+    // Starting with top-most entry.
+    __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors
+    __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp);
+
+#ifdef ASSERT
+    address reentry = NULL;
+    { NearLabel ok;
+      __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok);
+      reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom");
+      __ bind(ok);
+    }
+    { NearLabel ok;
+      __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok);
+      reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp");
+      __ bind(ok);
+    }
+#endif
+
+    // Check if bottom reached, i.e. if there is at least one monitor.
+    __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, not_found);
+
+    __ bind(loop);
+    // Check if current entry is for same object.
+    __ z_lg(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes()));
+    // If same object then stop searching.
+    __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, found);
+    // Otherwise advance to next entry.
+    __ add2reg(Rcurr_monitor, entry_size);
+    // Check if bottom reached, if not at bottom then check this entry.
+    __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop);
+  }
+
+  __ bind(not_found);
+  // Error handling. Unlocking was not block-structured.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+  __ should_not_reach_here();
+
+  __ bind(found);
+  __ push_ptr(Z_tos); // Make sure object is on stack (contract with oopMaps).
+  __ unlock_object(Rcurr_monitor, Z_tos);
+  __ pop_ptr(Z_tos); // Discard object.
+  BLOCK_COMMENT("} monitorexit");
+}
+
+// Wide instructions
+void TemplateTable::wide() {
+  transition(vtos, vtos);
+
+  __ z_llgc(Z_R1_scratch, at_bcp(1));
+  __ z_sllg(Z_R1_scratch, Z_R1_scratch, LogBytesPerWord);
+  __ load_absolute_address(Z_tmp_1, (address) Interpreter::_wentry_point);
+  __ mem2reg_opt(Z_tmp_1, Address(Z_tmp_1, Z_R1_scratch));
+  __ z_br(Z_tmp_1);
+  // Note: the bcp increment step is part of the individual wide
+  // bytecode implementations.
+}
+
+// Multi arrays
+void TemplateTable::multianewarray() {
+  transition(vtos, atos);
+
+  __ z_llgc(Z_tmp_1, at_bcp(3)); // Get number of dimensions.
+  // Slot count to byte offset.
+  __ z_sllg(Z_tmp_1, Z_tmp_1, Interpreter::logStackElementSize);
+  // Z_esp points past last_dim, so set to Z_ARG2 to first_dim address.
+  __ load_address(Z_ARG2, Address(Z_esp, Z_tmp_1));
+  call_VM(Z_RET,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
+          Z_ARG2);
+  // Pop dimensions from expression stack.
+  __ z_agr(Z_esp, Z_tmp_1);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateTable_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_TEMPLATETABLE_S390_HPP
+#define CPU_S390_VM_TEMPLATETABLE_S390_HPP
+
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // If caller wants to see it.
+                             Register flags = noreg); // If caller wants to test it.
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+
+  // Helpers
+  static void index_check(Register array, Register index, unsigned int shift);
+  static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_S390_VM_TEMPLATETABLE_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmStructs_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMSTRUCTS_S390_HPP
+#define CPU_S390_VM_VMSTRUCTS_S390_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // CPU_S390_VM_VMSTRUCTS_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1182 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "code/compiledIC.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_s390.hpp"
+
+# include <sys/sysinfo.h>
+
+bool VM_Version::_is_determine_features_test_running  = false;
+
+unsigned long VM_Version::_features[_features_buffer_len]           = {0, 0, 0, 0};
+unsigned long VM_Version::_cipher_features[_features_buffer_len]    = {0, 0, 0, 0};
+unsigned long VM_Version::_msgdigest_features[_features_buffer_len] = {0, 0, 0, 0};
+unsigned int  VM_Version::_nfeatures                                = 0;
+unsigned int  VM_Version::_ncipher_features                         = 0;
+unsigned int  VM_Version::_nmsgdigest_features                      = 0;
+unsigned int  VM_Version::_Dcache_lineSize                          = 256;
+unsigned int  VM_Version::_Icache_lineSize                          = 256;
+
+static const char* z_gen[]     = {"  ",   "G1",   "G2", "G3",    "G4",     "G5",      "G6",   "G7"   };
+static const char* z_machine[] = {"  ", "2064", "2084", "2094",  "2097",   "2817",    "  ",   "2964" };
+static const char* z_name[]    = {"  ", "z900", "z990", "z9 EC", "z10 EC", "z196 EC", "ec12", "z13"  };
+
+void VM_Version::initialize() {
+  determine_features();      // Get processor capabilities.
+  set_features_string();     // Set a descriptive feature indication.
+
+  if (Verbose) {
+    print_features();
+  }
+
+  intx cache_line_size = Dcache_lineSize(0);
+
+  MaxVectorSize = 8;
+
+  if (has_PrefetchRaw()) {
+    if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {  // not preset
+      // 0 = no prefetch.
+      // 1 = Prefetch instructions for each allocation.
+      // 2 = Use TLAB watermark to gate allocation prefetch.
+      AllocatePrefetchStyle = 1;
+    }
+
+    if (AllocatePrefetchStyle > 0) {  // Prefetching turned on at all?
+      // Distance to prefetch ahead of allocation pointer.
+      if (FLAG_IS_DEFAULT(AllocatePrefetchDistance) || (AllocatePrefetchDistance < 0)) {  // not preset
+        AllocatePrefetchDistance = 0;
+      }
+
+      // Number of lines to prefetch ahead of allocation pointer.
+      if (FLAG_IS_DEFAULT(AllocatePrefetchLines) || (AllocatePrefetchLines <= 0)) {      // not preset
+        AllocatePrefetchLines = 3;
+      }
+
+      // Step size in bytes of sequential prefetch instructions.
+      if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) || (AllocatePrefetchStepSize <= 0)) { // not preset
+        FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+      } else if (AllocatePrefetchStepSize < cache_line_size) {
+        FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+      } else {
+        FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+      }
+    } else {
+      FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+      AllocatePrefetchDistance = 0;
+      AllocatePrefetchLines    = 0;
+      // Can't be zero. Will SIGFPE during constraints checking.
+      FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+    }
+
+  } else {
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+    AllocatePrefetchDistance = 0;
+    AllocatePrefetchLines    = 0;
+    // Can't be zero. Will SIGFPE during constraints checking.
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+  }
+
+  // TODO:
+  // On z/Architecture, cache line size is significantly large (256 bytes). Do we really need
+  // to keep contended members that far apart? Performance tests are required.
+  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) {
+    ContendedPaddingWidth = cache_line_size;
+  }
+
+  // On z/Architecture, the CRC32 intrinsics had to be implemented "by hand".
+  // They cannot be based on the CHECKSUM instruction which has been there
+  // since the very beginning (of z/Architecture). It computes "some kind of" a checksum
+  // which has nothing to do with the CRC32 algorithm.
+  if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
+  }
+
+  // On z/Architecture, we take UseAES as the general switch to enable/disable the AES intrinsics.
+  // The specific, and yet to be defined, switches UseAESxxxIntrinsics will then be set
+  // depending on the actual machine capabilities.
+  // Explicitly setting them via CmdLine option takes precedence, of course.
+  // TODO: UseAESIntrinsics must be made keylength specific.
+  // As of March 2015 and Java8, only AES128 is supported by the Java Cryptographic Extensions.
+  // Therefore, UseAESIntrinsics is of minimal use at the moment.
+  if (FLAG_IS_DEFAULT(UseAES) && has_Crypto_AES()) {
+    FLAG_SET_DEFAULT(UseAES, true);
+  }
+  if (UseAES && !has_Crypto_AES()) {
+    warning("AES instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAES, false);
+  }
+  if (UseAES) {
+    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+    }
+  }
+  if (UseAESIntrinsics && !has_Crypto_AES()) {
+    warning("AES intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+  }
+
+  // TODO: implement AES/CTR intrinsics
+  if (UseAESCTRIntrinsics) {
+    warning("AES/CTR intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+  }
+
+  // TODO: implement GHASH intrinsics
+  if (UseGHASHIntrinsics) {
+    warning("GHASH intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
+  if (UseFMA) {
+    warning("FMA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseFMA, false);
+  }
+
+  // On z/Architecture, we take UseSHA as the general switch to enable/disable the SHA intrinsics.
+  // The specific switches UseSHAxxxIntrinsics will then be set depending on the actual
+  // machine capabilities.
+  // Explicitly setting them via CmdLine option takes precedence, of course.
+  if (FLAG_IS_DEFAULT(UseSHA) && has_Crypto_SHA()) {
+    FLAG_SET_DEFAULT(UseSHA, true);
+  }
+  if (UseSHA && !has_Crypto_SHA()) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+  if (UseSHA && has_Crypto_SHA1()) {
+    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+    }
+  } else if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+  if (UseSHA && has_Crypto_SHA256()) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+  if (UseSHA && has_Crypto_SHA512()) {
+    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+    }
+  } else if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+  }
+  if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+    FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+  }
+
+  // z/Architecture supports 8-byte compare-exchange operations
+  // (see Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
+  // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
+  _supports_cx8 = true;
+
+  _supports_atomic_getadd4 = VM_Version::has_LoadAndALUAtomicV1();
+  _supports_atomic_getadd8 = VM_Version::has_LoadAndALUAtomicV1();
+
+  // z/Architecture supports unaligned memory accesses.
+  // Performance penalty is negligible. An additional tick or so
+  // is lost if the accessed data spans a cache line boundary.
+  // Unaligned accesses are not atomic, of course.
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+}
+
+
+void VM_Version::set_features_string() {
+
+  unsigned int ambiguity = 0;
+  if (is_z13()) {
+    _features_string = "System z G7-z13  (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM, VectorInstr)";
+    ambiguity++;
+  }
+  if (is_ec12()) {
+    _features_string = "System z G6-EC12 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM)";
+    ambiguity++;
+  }
+  if (is_z196()) {
+    _features_string = "System z G5-z196 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update)";
+    ambiguity++;
+  }
+  if (is_z10()) {
+    _features_string = "System z G4-z10  (LDISP_fast, ExtImm, PCrel Load/Store, CmpB)";
+    ambiguity++;
+  }
+  if (is_z9()) {
+    _features_string = "System z G3-z9   (LDISP_fast, ExtImm), out-of-support as of 2016-04-01";
+    ambiguity++;
+  }
+  if (is_z990()) {
+    _features_string = "System z G2-z990 (LDISP_fast), out-of-support as of 2014-07-01";
+    ambiguity++;
+  }
+  if (is_z900()) {
+    _features_string = "System z G1-z900 (LDISP), out-of-support as of 2014-07-01";
+    ambiguity++;
+  }
+
+  if (ambiguity == 0) {
+    _features_string = "z/Architecture (unknown generation)";
+  } else if (ambiguity > 1) {
+    tty->print_cr("*** WARNING *** Ambiguous z/Architecture detection, ambiguity = %d", ambiguity);
+    tty->print_cr("                oldest detected generation is %s", _features_string);
+    _features_string = "z/Architecture (ambiguous detection)";
+  }
+}
+
+// featureBuffer - bit array indicating availability of various features
+// featureNum    - bit index of feature to be tested
+//                 Featurenum < 0 requests test for any nonzero bit in featureBuffer.
+// bufLen        - length of featureBuffer in bits
+bool VM_Version::test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen) {
+  assert(bufLen > 0,             "buffer len must be positive");
+  assert((bufLen & 0x0007) == 0, "unaligned buffer len");
+  assert(((intptr_t)featureBuffer&0x0007) == 0, "unaligned feature buffer");
+  if (featureNum < 0) {
+    // Any bit set at all?
+    bool anyBit = false;
+    for (size_t i = 0; i < bufLen/(8*sizeof(long)); i++) {
+      anyBit = anyBit || (featureBuffer[i] != 0);
+    }
+    return anyBit;
+  } else {
+    assert((unsigned int)featureNum < bufLen,    "feature index out of range");
+    unsigned char* byteBuffer = (unsigned char*)featureBuffer;
+    int   byteIndex  = featureNum/(8*sizeof(char));
+    int   bitIndex   = featureNum%(8*sizeof(char));
+    // Indexed bit set?
+    return (byteBuffer[byteIndex] & (1U<<(7-bitIndex))) != 0;
+  }
+}
+
+void VM_Version::print_features_internal(const char* text, bool print_anyway) {
+  tty->print_cr("%s %s",       text, features_string());
+  tty->print("%s", text);
+  for (unsigned int i = 0; i < _nfeatures; i++) {
+    tty->print("  0x%16.16lx", _features[i]);
+  }
+  tty->cr();
+
+  if (Verbose || print_anyway) {
+    // z900
+    if (has_long_displacement()        ) tty->print_cr("available: %s", "LongDispFacility");
+    // z990
+    if (has_long_displacement_fast()   ) tty->print_cr("available: %s", "LongDispFacilityHighPerf");
+    if (has_ETF2() && has_ETF3()       ) tty->print_cr("available: %s", "ETF2 and ETF3");
+    if (has_Crypto()                   ) tty->print_cr("available: %s", "CryptoFacility");
+    // z9
+    if (has_extended_immediate()       ) tty->print_cr("available: %s", "ExtImmedFacility");
+    if (has_StoreFacilityListExtended()) tty->print_cr("available: %s", "StoreFacilityListExtended");
+    if (has_StoreClockFast()           ) tty->print_cr("available: %s", "StoreClockFast");
+    if (has_ETF2Enhancements()         ) tty->print_cr("available: %s", "ETF2 Enhancements");
+    if (has_ETF3Enhancements()         ) tty->print_cr("available: %s", "ETF3 Enhancements");
+    if (has_HFPUnnormalized()          ) tty->print_cr("available: %s", "HFPUnnormalizedFacility");
+    if (has_HFPMultiplyAndAdd()        ) tty->print_cr("available: %s", "HFPMultiplyAndAddFacility");
+    // z10
+    if (has_ParsingEnhancements()      ) tty->print_cr("available: %s", "Parsing Enhancements");
+    if (has_ExtractCPUtime()           ) tty->print_cr("available: %s", "ExtractCPUTime");
+    if (has_CompareSwapStore()         ) tty->print_cr("available: %s", "CompareSwapStore");
+    if (has_GnrlInstrExtensions()      ) tty->print_cr("available: %s", "General Instruction Extensions");
+    if (has_CompareBranch()            ) tty->print_cr("  available: %s", "Compare and Branch");
+    if (has_CompareTrap()              ) tty->print_cr("  available: %s", "Compare and Trap");
+    if (has_RelativeLoadStore()        ) tty->print_cr("  available: %s", "Relative Load/Store");
+    if (has_MultiplySingleImm32()      ) tty->print_cr("  available: %s", "MultiplySingleImm32");
+    if (has_Prefetch()                 ) tty->print_cr("  available: %s", "Prefetch");
+    if (has_MoveImmToMem()             ) tty->print_cr("  available: %s", "Direct Moves Immediate to Memory");
+    if (has_MemWithImmALUOps()         ) tty->print_cr("  available: %s", "Direct ALU Ops Memory .op. Immediate");
+    if (has_ExtractCPUAttributes()     ) tty->print_cr("  available: %s", "Extract CPU Atributes");
+    if (has_ExecuteExtensions()        ) tty->print_cr("available: %s", "ExecuteExtensions");
+    if (has_FPSupportEnhancements()    ) tty->print_cr("available: %s", "FPSupportEnhancements");
+    if (has_DecimalFloatingPoint()     ) tty->print_cr("available: %s", "DecimalFloatingPoint");
+    // z196
+    if (has_DistinctOpnds()            ) tty->print_cr("available: %s", "Distinct Operands");
+    if (has_InterlockedAccessV1()      ) tty->print_cr("  available: %s", "InterlockedAccess V1 (fast)");
+    if (has_PopCount()                 ) tty->print_cr("  available: %s", "PopCount");
+    if (has_LoadStoreConditional()     ) tty->print_cr("  available: %s", "LoadStoreConditional");
+    if (has_HighWordInstr()            ) tty->print_cr("  available: %s", "HighWord Instructions");
+    if (has_FastSync()                 ) tty->print_cr("  available: %s", "FastSync (bcr 14,0)");
+    if (has_AtomicMemWithImmALUOps()   ) tty->print_cr("available: %s", "Atomic Direct ALU Ops Memory .op. Immediate");
+    if (has_FPExtensions()             ) tty->print_cr("available: %s", "Floatingpoint Extensions");
+    if (has_CryptoExt3()               ) tty->print_cr("available: %s", "Crypto Extensions 3");
+    if (has_CryptoExt4()               ) tty->print_cr("available: %s", "Crypto Extensions 4");
+    // EC12
+    if (has_MiscInstrExt()             ) tty->print_cr("available: %s", "Miscelaneous Instruction Extensions");
+    if (has_ExecutionHint()            ) tty->print_cr("  available: %s", "Execution Hints (branch prediction)");
+    if (has_ProcessorAssist()          ) tty->print_cr("  available: %s", "Processor Assists");
+    if (has_LoadAndTrap()              ) tty->print_cr("  available: %s", "Load and Trap");
+    if (has_TxMem()                    ) tty->print_cr("available: %s", "Transactional Memory");
+    if (has_InterlockedAccessV2()      ) tty->print_cr("  available: %s", "InterlockedAccess V2 (fast)");
+    if (has_DFPZonedConversion()       ) tty->print_cr("  available: %s", "DFP Zoned Conversions");
+    // z13
+    if (has_LoadStoreConditional2()    ) tty->print_cr("available: %s", "Load/Store Conditional 2");
+    if (has_CryptoExt5()               ) tty->print_cr("available: %s", "Crypto Extensions 5");
+    if (has_DFPPackedConversion()      ) tty->print_cr("available: %s", "DFP Packed Conversions");
+    if (has_VectorFacility()           ) tty->print_cr("available: %s", "Vector Facility");
+    // test switches
+    if (has_TestFeature1Impl()         ) tty->print_cr("available: %s", "TestFeature1Impl");
+    if (has_TestFeature2Impl()         ) tty->print_cr("available: %s", "TestFeature2Impl");
+    if (has_TestFeature4Impl()         ) tty->print_cr("available: %s", "TestFeature4Impl");
+    if (has_TestFeature8Impl()         ) tty->print_cr("available: %s", "TestFeature8Impl");
+
+    if (has_Crypto()) {
+      tty->cr();
+      tty->print_cr("detailled availability of %s capabilities:", "CryptoFacility");
+      if (test_feature_bit(&_cipher_features[0], -1, 2*Cipher::_featureBits)) {
+        tty->cr();
+        tty->print_cr("  available: %s", "Message Cipher Functions");
+      }
+      if (test_feature_bit(&_cipher_features[0], -1, (int)Cipher::_featureBits)) {
+        tty->print_cr("    available Crypto Features of KM  (Cipher Message):");
+        for (unsigned int i = 0; i < Cipher::_featureBits; i++) {
+          if (test_feature_bit(&_cipher_features[0], i, (int)Cipher::_featureBits)) {
+            switch (i) {
+              case Cipher::_Query:              tty->print_cr("      available: KM   Query");                  break;
+              case Cipher::_DEA:                tty->print_cr("      available: KM   DEA");                    break;
+              case Cipher::_TDEA128:            tty->print_cr("      available: KM   TDEA-128");               break;
+              case Cipher::_TDEA192:            tty->print_cr("      available: KM   TDEA-192");               break;
+              case Cipher::_EncryptedDEA:       tty->print_cr("      available: KM   Encrypted DEA");          break;
+              case Cipher::_EncryptedDEA128:    tty->print_cr("      available: KM   Encrypted DEA-128");      break;
+              case Cipher::_EncryptedDEA192:    tty->print_cr("      available: KM   Encrypted DEA-192");      break;
+              case Cipher::_AES128:             tty->print_cr("      available: KM   AES-128");                break;
+              case Cipher::_AES192:             tty->print_cr("      available: KM   AES-192");                break;
+              case Cipher::_AES256:             tty->print_cr("      available: KM   AES-256");                break;
+              case Cipher::_EnccryptedAES128:   tty->print_cr("      available: KM   Encrypted-AES-128");      break;
+              case Cipher::_EnccryptedAES192:   tty->print_cr("      available: KM   Encrypted-AES-192");      break;
+              case Cipher::_EnccryptedAES256:   tty->print_cr("      available: KM   Encrypted-AES-256");      break;
+              case Cipher::_XTSAES128:          tty->print_cr("      available: KM   XTS-AES-128");            break;
+              case Cipher::_XTSAES256:          tty->print_cr("      available: KM   XTS-AES-256");            break;
+              case Cipher::_EncryptedXTSAES128: tty->print_cr("      available: KM   XTS-Encrypted-AES-128");  break;
+              case Cipher::_EncryptedXTSAES256: tty->print_cr("      available: KM   XTS-Encrypted-AES-256");  break;
+              default: tty->print_cr("      available: unknown KM  code %d", i);      break;
+            }
+          }
+        }
+      }
+      if (test_feature_bit(&_cipher_features[2], -1, (int)Cipher::_featureBits)) {
+        tty->print_cr("    available Crypto Features of KMC (Cipher Message with Chaining):");
+        for (unsigned int i = 0; i < Cipher::_featureBits; i++) {
+            if (test_feature_bit(&_cipher_features[2], i, (int)Cipher::_featureBits)) {
+            switch (i) {
+              case Cipher::_Query:              tty->print_cr("      available: KMC  Query");                  break;
+              case Cipher::_DEA:                tty->print_cr("      available: KMC  DEA");                    break;
+              case Cipher::_TDEA128:            tty->print_cr("      available: KMC  TDEA-128");               break;
+              case Cipher::_TDEA192:            tty->print_cr("      available: KMC  TDEA-192");               break;
+              case Cipher::_EncryptedDEA:       tty->print_cr("      available: KMC  Encrypted DEA");          break;
+              case Cipher::_EncryptedDEA128:    tty->print_cr("      available: KMC  Encrypted DEA-128");      break;
+              case Cipher::_EncryptedDEA192:    tty->print_cr("      available: KMC  Encrypted DEA-192");      break;
+              case Cipher::_AES128:             tty->print_cr("      available: KMC  AES-128");                break;
+              case Cipher::_AES192:             tty->print_cr("      available: KMC  AES-192");                break;
+              case Cipher::_AES256:             tty->print_cr("      available: KMC  AES-256");                break;
+              case Cipher::_EnccryptedAES128:   tty->print_cr("      available: KMC  Encrypted-AES-128");      break;
+              case Cipher::_EnccryptedAES192:   tty->print_cr("      available: KMC  Encrypted-AES-192");      break;
+              case Cipher::_EnccryptedAES256:   tty->print_cr("      available: KMC  Encrypted-AES-256");      break;
+              case Cipher::_PRNG:               tty->print_cr("      available: KMC  PRNG");                   break;
+              default: tty->print_cr("      available: unknown KMC code %d", i);      break;
+            }
+          }
+        }
+      }
+
+      if (test_feature_bit(&_msgdigest_features[0], -1, 2*MsgDigest::_featureBits)) {
+        tty->cr();
+        tty->print_cr("  available: %s", "Message Digest Functions for SHA");
+      }
+      if (test_feature_bit(&_msgdigest_features[0], -1, (int)MsgDigest::_featureBits)) {
+        tty->print_cr("    available Features of KIMD (Msg Digest):");
+        for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) {
+            if (test_feature_bit(&_msgdigest_features[0], i, (int)MsgDigest::_featureBits)) {
+            switch (i) {
+              case MsgDigest::_Query:  tty->print_cr("      available: KIMD Query");   break;
+              case MsgDigest::_SHA1:   tty->print_cr("      available: KIMD SHA-1");   break;
+              case MsgDigest::_SHA256: tty->print_cr("      available: KIMD SHA-256"); break;
+              case MsgDigest::_SHA512: tty->print_cr("      available: KIMD SHA-512"); break;
+              case MsgDigest::_GHASH:  tty->print_cr("      available: KIMD GHASH");   break;
+              default: tty->print_cr("      available: unknown code %d", i);  break;
+            }
+          }
+        }
+      }
+      if (test_feature_bit(&_msgdigest_features[2], -1, (int)MsgDigest::_featureBits)) {
+        tty->print_cr("    available Features of KLMD (Msg Digest):");
+        for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) {
+          if (test_feature_bit(&_msgdigest_features[2], i, (int)MsgDigest::_featureBits)) {
+            switch (i) {
+              case MsgDigest::_Query:  tty->print_cr("      available: KLMD Query");   break;
+              case MsgDigest::_SHA1:   tty->print_cr("      available: KLMD SHA-1");   break;
+              case MsgDigest::_SHA256: tty->print_cr("      available: KLMD SHA-256"); break;
+              case MsgDigest::_SHA512: tty->print_cr("      available: KLMD SHA-512"); break;
+              default: tty->print_cr("      available: unknown code %d", i);  break;
+            }
+          }
+        }
+      }
+    }
+    if (ContendedPaddingWidth > 0) {
+      tty->cr();
+      tty->print_cr("ContendedPaddingWidth " INTX_FORMAT, ContendedPaddingWidth);
+    }
+  }
+}
+
+void VM_Version::print_features() {
+  print_features_internal("Version:");
+}
+
+void VM_Version::reset_features(bool reset) {
+  if (reset) {
+    for (unsigned int i = 0; i < _features_buffer_len; i++) {
+      VM_Version::_features[i] = 0;
+    }
+  }
+}
+
+
+void VM_Version::set_features_z900(bool reset) {
+  reset_features(reset);
+
+  set_has_long_displacement();
+  set_has_ETF2();
+}
+
+void VM_Version::set_features_z990(bool reset) {
+  reset_features(reset);
+
+  set_features_z900(false);
+  set_has_ETF3();
+  set_has_long_displacement_fast();
+  set_has_HFPMultiplyAndAdd();
+}
+
+void VM_Version::set_features_z9(bool reset) {
+  reset_features(reset);
+
+  set_features_z990(false);
+  set_has_StoreFacilityListExtended();
+  // set_has_Crypto();   // Do not set, crypto features must be retrieved separately.
+  set_has_ETF2Enhancements();
+  set_has_ETF3Enhancements();
+  set_has_extended_immediate();
+  set_has_StoreClockFast();
+  set_has_HFPUnnormalized();
+}
+
+void VM_Version::set_features_z10(bool reset) {
+  reset_features(reset);
+
+  set_features_z9(false);
+  set_has_CompareSwapStore();
+  set_has_RelativeLoadStore();
+  set_has_CompareBranch();
+  set_has_CompareTrap();
+  set_has_MultiplySingleImm32();
+  set_has_Prefetch();
+  set_has_MoveImmToMem();
+  set_has_MemWithImmALUOps();
+  set_has_ExecuteExtensions();
+  set_has_FPSupportEnhancements();
+  set_has_DecimalFloatingPoint();
+  set_has_ExtractCPUtime();
+  set_has_CryptoExt3();
+}
+
+void VM_Version::set_features_z196(bool reset) {
+  reset_features(reset);
+
+  set_features_z10(false);
+  set_has_InterlockedAccessV1();
+  set_has_PopCount();
+  set_has_LoadStoreConditional();
+  set_has_HighWordInstr();
+  set_has_FastSync();
+  set_has_FPExtensions();
+  set_has_DistinctOpnds();
+  set_has_CryptoExt4();
+}
+
+void VM_Version::set_features_ec12(bool reset) {
+  reset_features(reset);
+
+  set_features_z196(false);
+  set_has_MiscInstrExt();
+  set_has_InterlockedAccessV2();
+  set_has_LoadAndALUAtomicV2();
+  set_has_TxMem();
+}
+
+void VM_Version::set_features_z13(bool reset) {
+  reset_features(reset);
+
+  set_features_ec12(false);
+  set_has_LoadStoreConditional2();
+  set_has_CryptoExt5();
+  set_has_VectorFacility();
+}
+
+void VM_Version::set_features_from(const char* march) {
+  bool err = false;
+  bool prt = false;
+
+  if ((march != NULL) && (march[0] != '\0')) {
+    const int buf_len = 16;
+    const int hdr_len =  5;
+    char buf[buf_len];
+    if (strlen(march) >= hdr_len) {
+      memcpy(buf, march, hdr_len);
+      buf[hdr_len] = '\00';
+    } else {
+      buf[0]       = '\00';
+    }
+
+    if (!strcmp(march, "z900")) {
+      set_features_z900();
+    } else if (!strcmp(march, "z990")) {
+        set_features_z990();
+    } else if (!strcmp(march, "z9")) {
+        set_features_z9();
+    } else if (!strcmp(march, "z10")) {
+        set_features_z10();
+    } else if (!strcmp(march, "z196")) {
+        set_features_z196();
+    } else if (!strcmp(march, "ec12")) {
+        set_features_ec12();
+    } else if (!strcmp(march, "z13")) {
+        set_features_z13();
+    } else if (!strcmp(buf, "ztest")) {
+      assert(!has_TestFeaturesImpl(), "possible facility list flag conflict");
+      if (strlen(march) > hdr_len) {
+        int itest = 0;
+        if ((strlen(march)-hdr_len) >= buf_len) err = true;
+        if (!err) {
+          memcpy(buf, &march[hdr_len], strlen(march)-hdr_len);
+          buf[strlen(march)-hdr_len] = '\00';
+          for (size_t i = 0; !err && (i < strlen(buf)); i++) {
+            itest = itest*10 + buf[i]-'0';
+            err   = err || ((buf[i]-'0') < 0) || ((buf[i]-'0') > 9) || (itest > 15);
+          }
+        }
+        if (!err) {
+          prt = true;
+          if (itest & 0x01) { set_has_TestFeature1Impl(); }
+          if (itest & 0x02) { set_has_TestFeature2Impl(); }
+          if (itest & 0x04) { set_has_TestFeature4Impl(); }
+          if (itest & 0x08) { set_has_TestFeature8Impl(); }
+        }
+      } else {
+        prt = true;
+        set_has_TestFeature1Impl();
+        set_has_TestFeature2Impl();
+        set_has_TestFeature4Impl();
+        set_has_TestFeature8Impl();
+      }
+    } else {
+      err = true;
+    }
+    if (!err) {
+      set_features_string();
+      if (prt || PrintAssembly) {
+        print_features_internal("CPU Version as set by cmdline option:", prt);
+      }
+    } else {
+      tty->print_cr("***Warning: Unsupported ProcessorArchitecture: %s, internal settings left undisturbed.", march);
+    }
+  }
+
+}
+
+static long (*getFeatures)(unsigned long*, int, int) = NULL;
+
+void VM_Version::set_getFeatures(address entryPoint) {
+  if (getFeatures == NULL) {
+    getFeatures = (long(*)(unsigned long*, int, int))entryPoint;
+  }
+}
+
+long VM_Version::call_getFeatures(unsigned long* buffer, int buflen, int functionCode) {
+  VM_Version::_is_determine_features_test_running = true;
+  long functionResult = (*getFeatures)(buffer, buflen, functionCode);
+  VM_Version::_is_determine_features_test_running = false;
+  return functionResult;
+}
+
+// Helper function for "extract cache attribute" instruction.
+int VM_Version::calculate_ECAG_functionCode(unsigned int attributeIndication,
+                                            unsigned int levelIndication,
+                                            unsigned int typeIndication) {
+  return (attributeIndication<<4) | (levelIndication<<1) | typeIndication;
+}
+
+void VM_Version::determine_features() {
+
+  const int      cbuf_size = _code_buffer_len;
+  const int      buf_len   = _features_buffer_len;
+
+  // Allocate code buffer space for the detection code.
+  ResourceMark    rm;
+  CodeBuffer      cbuf("determine CPU features", cbuf_size, 0);
+  MacroAssembler* a = new MacroAssembler(&cbuf);
+
+  // Emit code.
+  set_getFeatures(a->pc());
+  address   code = a->pc();
+
+  // Try STFLE. Possible INVOP will cause defaults to be used.
+  Label    getFEATURES;
+  Label    getCPUFEATURES;                   // fcode = -1 (cache)
+  Label    getCIPHERFEATURES;                // fcode = -2 (cipher)
+  Label    getMSGDIGESTFEATURES;             // fcode = -3 (SHA)
+  Label    checkLongDispFast;
+  Label    noLongDisp;
+  Label    posDisp, negDisp;
+  Label    errRTN;
+  a->z_ltgfr(Z_R0, Z_ARG2);                  // Buf len to r0 and test.
+  a->z_brl(getFEATURES);                     // negative -> Get machine features.
+  a->z_brz(checkLongDispFast);               // zero -> Check for high-speed Long Displacement Facility.
+  a->z_aghi(Z_R0, -1);
+  a->z_stfle(0, Z_ARG1);
+  a->z_lg(Z_R1, 0, Z_ARG1);                  // Get first DW of facility list.
+  a->z_lgr(Z_RET, Z_R0);                     // Calculate rtn value for success.
+  a->z_la(Z_RET, 1, Z_RET);
+  a->z_brnz(errRTN);                         // Instr failed if non-zero CC.
+  a->z_ltgr(Z_R1, Z_R1);                     // Instr failed if first DW == 0.
+  a->z_bcr(Assembler::bcondNotZero, Z_R14);  // Successful return.
+
+  a->bind(errRTN);
+  a->z_lngr(Z_RET, Z_RET);
+  a->z_ltgr(Z_R1, Z_R1);
+  a->z_bcr(Assembler::bcondNotZero, Z_R14);  // Return "buffer too small".
+  a->z_xgr(Z_RET, Z_RET);
+  a->z_br(Z_R14);                            // Return "operation aborted".
+
+  a->bind(getFEATURES);
+  a->z_cghi(Z_R0, -1);                       // -1: Extract CPU attributes, currently: cache layout only.
+  a->z_bre(getCPUFEATURES);
+  a->z_cghi(Z_R0, -2);                       // -2: Extract detailed crypto capabilities (cipher instructions).
+  a->z_bre(getCIPHERFEATURES);
+  a->z_cghi(Z_R0, -3);                       // -3: Extract detailed crypto capabilities (msg digest instructions).
+  a->z_bre(getMSGDIGESTFEATURES);
+
+  a->z_xgr(Z_RET, Z_RET);                    // Not a valid function code.
+  a->z_br(Z_R14);                            // Return "operation aborted".
+
+  // Try KIMD/KLMD query function to get details about msg digest (secure hash, SHA) instructions.
+  a->bind(getMSGDIGESTFEATURES);
+  a->z_lghi(Z_R0,(int)MsgDigest::_Query);    // query function code
+  a->z_lgr(Z_R1,Z_R2);                       // param block addr, 2*16 bytes min size
+  a->z_kimd(Z_R2,Z_R2);                      // Get available KIMD functions (bit pattern in param blk).
+  a->z_la(Z_R1,16,Z_R1);                     // next param block addr
+  a->z_klmd(Z_R2,Z_R2);                      // Get available KLMD functions (bit pattern in param blk).
+  a->z_lghi(Z_RET,4);
+  a->z_br(Z_R14);
+
+  // Try KM/KMC query function to get details about crypto instructions.
+  a->bind(getCIPHERFEATURES);
+  a->z_lghi(Z_R0,(int)Cipher::_Query);       // query function code
+  a->z_lgr(Z_R1,Z_R2);                       // param block addr, 2*16 bytes min size (KIMD/KLMD output)
+  a->z_km(Z_R2,Z_R2);                        // get available KM functions
+  a->z_la(Z_R1,16,Z_R1);                     // next param block addr
+  a->z_kmc(Z_R2,Z_R2);                       // get available KMC functions
+  a->z_lghi(Z_RET,4);
+  a->z_br(Z_R14);
+
+  // Use EXTRACT CPU ATTRIBUTE instruction to get information about cache layout.
+  a->bind(getCPUFEATURES);
+  a->z_xgr(Z_R0,Z_R0);                       // as recommended in instruction documentation
+  a->z_ecag(Z_RET,Z_R0,0,Z_ARG3);            // Extract information as requested by Z_ARG1 contents.
+  a->z_br(Z_R14);
+
+  // Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
+  a->bind(checkLongDispFast);
+  a->z_llill(Z_R0, 0xffff);                  // preset #iterations
+  a->z_larl(Z_R1, posDisp);
+  a->z_stck(0, Z_ARG1);                      // Get begin timestamp.
+
+  a->bind(posDisp);                          // Positive disp loop.
+  a->z_lg(Z_ARG2, 0, Z_ARG1);
+  a->z_bctgr(Z_R0, Z_R1);
+
+  a->z_stck(0, Z_ARG1);                      // Get end timestamp.
+  a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1);          // Calculate elapsed time.
+  a->z_lcgr(Z_ARG2, Z_ARG2);
+  a->z_srlg(Z_ARG2, Z_ARG2, 12);             // LSB: now microseconds
+  a->z_stg(Z_ARG2, 8, Z_ARG1);               // Store difference in buffer[1].
+
+  a->z_llill(Z_R0, 0xffff);                  // preset #iterations
+  a->z_larl(Z_R1, negDisp);
+  a->z_xgr(Z_ARG2, Z_ARG2);                  // Clear to detect absence of LongDisp facility.
+  a->z_stck(0, Z_ARG1);                      // Get begin timestamp.
+  a->z_la(Z_ARG1, 8, Z_ARG1);
+
+  a->bind(negDisp);                          // Negative disp loop.
+  a->z_lg(Z_ARG2, -8, Z_ARG1);
+  a->z_bctgr(Z_R0, Z_R1);
+
+  a->z_aghi(Z_ARG1, -8);
+  a->z_stck(0, Z_ARG1);                      // Get end timestamp.
+  a->z_ltgr(Z_ARG2, Z_ARG2);                 // Check for absence of LongDisp facility.
+  a->z_brz(noLongDisp);
+  a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1);          // Calc elapsed time.
+  a->z_lcgr(Z_ARG2, Z_ARG2);
+  a->z_srlg(Z_ARG2, Z_ARG2, 12);             // LSB: now microseconds
+  a->z_stg(Z_ARG2, 0, Z_ARG1);               // store difference in buffer[0]
+
+  a->z_llill(Z_RET,0xffff);
+  a->z_br(Z_R14);
+
+  a->bind(noLongDisp);
+  a->z_lghi(Z_RET,-1);
+  a->z_br(Z_R14);
+
+  address code_end = a->pc();
+  a->flush();
+
+  // Print the detection code.
+  bool printVerbose = Verbose || PrintAssembly || PrintStubCode;
+  if (printVerbose) {
+    ttyLocker ttyl;
+    tty->print_cr("Decoding CPU feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
+    tty->print_cr("Stub length is %ld bytes, codebuffer reserves %d bytes, %ld bytes spare.",
+                  code_end-code, cbuf_size, cbuf_size-(code_end-code));
+
+    // Use existing decode function. This enables the [Code] format which is needed to DecodeErrorFile.
+    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+  }
+
+  // Prepare for detection code execution and clear work buffer.
+  _nfeatures        = 0;
+  _ncipher_features = 0;
+  unsigned long  buffer[buf_len];
+
+  for (int i = 0; i < buf_len; i++) {
+    buffer[i] = 0L;
+  }
+
+  // execute code
+  // Illegal instructions will be replaced by 0 in signal handler.
+  // In case of problems, call_getFeatures will return a not-positive result.
+  long used_len = call_getFeatures(buffer, buf_len, 0);
+
+  bool ok;
+  if (used_len == 1) {
+    ok = true;
+  } else if (used_len > 1) {
+    unsigned int used_lenU = (unsigned int)used_len;
+    ok = true;
+    for (unsigned int i = 1; i < used_lenU; i++) {
+      ok = ok && (buffer[i] == 0L);
+    }
+    if (printVerbose && !ok) {
+      bool compact = false;
+      tty->print_cr("Note: feature list has %d (i.e. more than one) array elements.", used_lenU);
+      if (compact) {
+        tty->print("non-zero feature list elements:");
+        for (unsigned int i = 0; i < used_lenU; i++) {
+          tty->print("  [%d]: 0x%16.16lx", i, buffer[i]);
+        }
+        tty->cr();
+      } else {
+        for (unsigned int i = 0; i < used_lenU; i++) {
+          tty->print_cr("non-zero feature list[%d]: 0x%16.16lx", i, buffer[i]);
+        }
+      }
+
+      if (compact) {
+        tty->print_cr("Active features (compact view):");
+        for (unsigned int k = 0; k < used_lenU; k++) {
+          tty->print_cr("  buffer[%d]:", k);
+          for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) {
+            bool line = false;
+            for (unsigned int i = j*8; i < (j+1)*8; i++) {
+              bool bit  = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8);
+              if (bit) {
+                if (!line) {
+                  tty->print("    byte[%d]:", j);
+                  line = true;
+                }
+                tty->print("  [%3.3d]", i);
+              }
+            }
+            if (line) {
+              tty->cr();
+            }
+          }
+        }
+      } else {
+        tty->print_cr("Active features (full view):");
+        for (unsigned int k = 0; k < used_lenU; k++) {
+          tty->print_cr("  buffer[%d]:", k);
+          for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) {
+            tty->print("    byte[%d]:", j);
+            for (unsigned int i = j*8; i < (j+1)*8; i++) {
+              bool bit  = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8);
+              if (bit) {
+                tty->print("  [%3.3d]", i);
+              } else {
+                tty->print("       ");
+              }
+            }
+            tty->cr();
+          }
+        }
+      }
+    }
+    ok = true;
+  } else {  // No features retrieved if we reach here. Buffer too short or instr not available.
+    if (used_len < 0) {
+      ok = false;
+      if (printVerbose) {
+        tty->print_cr("feature list buffer[%d] too short, required: buffer[%ld]", buf_len, -used_len);
+      }
+    } else {
+      if (printVerbose) {
+        tty->print_cr("feature list could not be retrieved. Running on z900 or z990? Trying to find out...");
+      }
+      used_len = call_getFeatures(buffer, 0, 0);       // Must provide at least two DW buffer elements!!!!
+
+      ok = used_len > 0;
+      if (ok) {
+        if (buffer[1]*10 < buffer[0]) {
+          set_features_z900();
+        } else {
+          set_features_z990();
+        }
+
+        if (printVerbose) {
+          tty->print_cr("Note: high-speed long displacement test used %ld iterations.", used_len);
+          tty->print_cr("      Positive displacement loads took %8.8lu microseconds.", buffer[1]);
+          tty->print_cr("      Negative displacement loads took %8.8lu microseconds.", buffer[0]);
+          if (has_long_displacement_fast()) {
+            tty->print_cr("      assuming high-speed long displacement IS     available.");
+          } else {
+            tty->print_cr("      assuming high-speed long displacement is NOT available.");
+          }
+        }
+      } else {
+        if (printVerbose) {
+          tty->print_cr("Note: high-speed long displacement test was not successful.");
+          tty->print_cr("      assuming long displacement is NOT available.");
+        }
+      }
+      return; // Do not copy buffer to _features, no test for cipher features.
+    }
+  }
+
+  if (ok) {
+    // Fill features buffer.
+    // Clear work buffer.
+    for (int i = 0; i < buf_len; i++) {
+      _features[i]           = buffer[i];
+      _cipher_features[i]    = 0;
+      _msgdigest_features[i] = 0;
+      buffer[i]              = 0L;
+    }
+    _nfeatures = used_len;
+  } else {
+    for (int i = 0; i < buf_len; i++) {
+      _features[i]           = 0;
+      _cipher_features[i]    = 0;
+      _msgdigest_features[i] = 0;
+      buffer[i]              = 0L;
+    }
+    _nfeatures = 0;
+  }
+
+  // Extract Crypto Facility details.
+  if (has_Crypto()) {
+    // Get cipher features.
+    used_len = call_getFeatures(buffer, -2, 0);
+    for (int i = 0; i < buf_len; i++) {
+      _cipher_features[i] = buffer[i];
+    }
+    _ncipher_features = used_len;
+
+    // Get msg digest features.
+    used_len = call_getFeatures(buffer, -3, 0);
+    for (int i = 0; i < buf_len; i++) {
+      _msgdigest_features[i] = buffer[i];
+    }
+    _nmsgdigest_features = used_len;
+  }
+
+  static int   levelProperties[_max_cache_levels];     // All property indications per level.
+  static int   levelScope[_max_cache_levels];          // private/shared
+  static const char* levelScopeText[4] = {"No cache   ",
+                                          "CPU private",
+                                          "shared     ",
+                                          "reserved   "};
+
+  static int   levelType[_max_cache_levels];           // D/I/mixed
+  static const char* levelTypeText[4]  = {"separate D and I caches",
+                                          "I cache only           ",
+                                          "D-cache only           ",
+                                          "combined D/I cache     "};
+
+  static unsigned int levelReserved[_max_cache_levels];    // reserved property bits
+  static unsigned int levelLineSize[_max_cache_levels];
+  static unsigned int levelTotalSize[_max_cache_levels];
+  static unsigned int levelAssociativity[_max_cache_levels];
+
+
+  // Extract Cache Layout details.
+  if (has_ExtractCPUAttributes() && printVerbose) { // For information only, as of now.
+    bool         lineSize_mismatch;
+    bool         print_something;
+    long         functionResult;
+    unsigned int attributeIndication = 0; // 0..15
+    unsigned int levelIndication     = 0; // 0..8
+    unsigned int typeIndication      = 0; // 0..1 (D-Cache, I-Cache)
+    int          functionCode        = calculate_ECAG_functionCode(attributeIndication, levelIndication, typeIndication);
+
+    // Get cache topology.
+    functionResult = call_getFeatures(buffer, -1, functionCode);
+
+    for (unsigned int i = 0; i < _max_cache_levels; i++) {
+      if (functionResult > 0) {
+        int shiftVal          = 8*(_max_cache_levels-(i+1));
+        levelProperties[i]    = (functionResult & (0xffUL<<shiftVal)) >> shiftVal;
+        levelReserved[i]      = (levelProperties[i] & 0xf0) >> 4;
+        levelScope[i]         = (levelProperties[i] & 0x0c) >> 2;
+        levelType[i]          = (levelProperties[i] & 0x03);
+      } else {
+        levelProperties[i]    = 0;
+        levelReserved[i]      = 0;
+        levelScope[i]         = 0;
+        levelType[i]          = 0;
+      }
+      levelLineSize[i]      = 0;
+      levelTotalSize[i]     = 0;
+      levelAssociativity[i] = 0;
+    }
+
+    tty->cr();
+    tty->print_cr("------------------------------------");
+    tty->print_cr("---  Cache Topology Information  ---");
+    tty->print_cr("------------------------------------");
+    for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+      tty->print_cr("  Cache Level %d: <scope>  %s | <type>  %s",
+                    i+1, levelScopeText[levelScope[i]], levelTypeText[levelType[i]]);
+    }
+
+    // Get D-cache details per level.
+    _Dcache_lineSize   = 0;
+    lineSize_mismatch  = false;
+    print_something    = false;
+    typeIndication     = 0; // 0..1 (D-Cache, I-Cache)
+    for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+      if ((levelType[i] == 0) || (levelType[i] == 2)) {
+        print_something     = true;
+
+        // Get cache line size of level i.
+        attributeIndication   = 1;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelLineSize[i]      = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache total size of level i.
+        attributeIndication   = 2;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelTotalSize[i]     = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache associativity of level i.
+        attributeIndication   = 3;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        _Dcache_lineSize      = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize;
+        lineSize_mismatch     = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i]);
+      } else {
+        levelLineSize[i]      = 0;
+      }
+    }
+
+    if (print_something) {
+      tty->cr();
+      tty->print_cr("------------------------------------");
+      tty->print_cr("---  D-Cache Detail Information  ---");
+      tty->print_cr("------------------------------------");
+      if (lineSize_mismatch) {
+        tty->print_cr("WARNING: D-Cache line size mismatch!");
+      }
+      for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+        if (levelLineSize[i] > 0) {
+          tty->print_cr("  D-Cache Level %d: line size = %4d,  total size = %6dKB,  associativity = %2d",
+                        i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+        }
+      }
+    }
+
+    // Get I-cache details per level.
+    _Icache_lineSize   = 0;
+    lineSize_mismatch  = false;
+    print_something    = false;
+    typeIndication     = 1; // 0..1 (D-Cache, I-Cache)
+    for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+      if ((levelType[i] == 0) || (levelType[i] == 1)) {
+        print_something     = true;
+
+        // Get cache line size of level i.
+        attributeIndication   = 1;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelLineSize[i]      = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache total size of level i.
+        attributeIndication   = 2;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelTotalSize[i]     = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache associativity of level i.
+        attributeIndication   = 3;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        _Icache_lineSize      = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize;
+        lineSize_mismatch     = lineSize_mismatch || (_Icache_lineSize != levelLineSize[i]);
+      } else {
+        levelLineSize[i]      = 0;
+      }
+    }
+
+    if (print_something) {
+      tty->cr();
+      tty->print_cr("------------------------------------");
+      tty->print_cr("---  I-Cache Detail Information  ---");
+      tty->print_cr("------------------------------------");
+      if (lineSize_mismatch) {
+        tty->print_cr("WARNING: I-Cache line size mismatch!");
+      }
+      for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+        if (levelLineSize[i] > 0) {
+          tty->print_cr("  I-Cache Level %d: line size = %4d,  total size = %6dKB,  associativity = %2d",
+                        i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+        }
+      }
+    }
+
+    // Get D/I-cache details per level.
+    lineSize_mismatch  = false;
+    print_something    = false;
+    typeIndication     = 0; // 0..1 (D-Cache, I-Cache)
+    for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+      if (levelType[i] == 3) {
+        print_something     = true;
+
+        // Get cache line size of level i.
+        attributeIndication   = 1;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelLineSize[i]      = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache total size of level i.
+        attributeIndication   = 2;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelTotalSize[i]     = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        // Get cache associativity of level i.
+        attributeIndication   = 3;
+        functionCode          = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+        levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+        _Dcache_lineSize      = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize;
+        _Icache_lineSize      = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize;
+        lineSize_mismatch     = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i])
+                                                  || (_Icache_lineSize != levelLineSize[i]);
+      } else {
+        levelLineSize[i]      = 0;
+      }
+    }
+
+    if (print_something) {
+      tty->cr();
+      tty->print_cr("--------------------------------------");
+      tty->print_cr("---  D/I-Cache Detail Information  ---");
+      tty->print_cr("--------------------------------------");
+      if (lineSize_mismatch) {
+        tty->print_cr("WARNING: D/I-Cache line size mismatch!");
+      }
+      for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+        if (levelLineSize[i] > 0) {
+          tty->print_cr("  D/I-Cache Level %d: line size = %4d,  total size = %6dKB,  associativity = %2d",
+                        i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+        }
+      }
+    }
+    tty->cr();
+  }
+  return;
+}
+
+unsigned long VM_Version::z_SIGILL() {
+  unsigned long   ZeroBuffer = 0;
+  unsigned long   work;
+  asm(
+    "     LA      %[work],%[buffer]  \n\t"   // Load address of buffer.
+    "     LARL    14,+6              \n\t"   // Load address of faulting instruction.
+    "     BCR     15,%[work]         \n\t"   // Branch into buffer, execute whatever is in there.
+    : [buffer]  "+Q"  (ZeroBuffer)   /* outputs   */
+    , [work]   "=&a"  (work)         /* outputs   */
+    :                                /* inputs    */
+    : "cc"                           /* clobbered */
+ );
+  return ZeroBuffer;
+}
+
+unsigned long VM_Version::z_SIGSEGV() {
+  unsigned long   ZeroBuffer = 0;
+  unsigned long   work;
+  asm(
+    "     LG      %[work],%[buffer]  \n\t"   // Load zero address.
+    "     STG     %[work],0(,%[work])\n\t"   // Store to address zero.
+    : [buffer]  "+Q"  (ZeroBuffer)   /* outputs   */
+    , [work]   "=&a"  (work)         /* outputs   */
+    :                                /* inputs    */
+    : "cc"                           /* clobbered */
+ );
+  return ZeroBuffer;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vm_version_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VM_VERSION_S390_HPP
+#define CPU_S390_VM_VM_VERSION_S390_HPP
+
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+class VM_Version: public Abstract_VM_Version {
+
+ protected:
+// The following list contains the (approximate) announcement/availability
+// dates of the many System z generations in existence as of now which
+// implement the z/Architecture.
+//   z900: 2000-10
+//   z990: 2003-06
+//   z9:   2005-09
+//   z10:  2007-04
+//   z10:  2008-02
+//   z196: 2010-08
+//   ec12: 2012-09
+//   z13:  2015-03
+//
+// z/Architecture is the name of the 64-bit extension of the 31-bit s390
+// architecture.
+//
+// ----------------------------------------------
+// --- FeatureBitString Bits   0.. 63 (DW[0]) ---
+// ----------------------------------------------
+//                                           11222334445566
+//                                        04826048260482604
+#define  StoreFacilityListExtendedMask  0x0100000000000000UL  // z9
+#define  ETF2Mask                       0x0000800000000000UL  // z900
+#define  CryptoFacilityMask             0x0000400000000000UL  // z990
+#define  LongDispFacilityMask           0x0000200000000000UL  // z900 with microcode update
+#define  LongDispFacilityHighPerfMask   0x0000300000000000UL  // z990
+#define  HFPMultiplyAndAddMask          0x0000080000000000UL  // z990
+#define  ExtImmedFacilityMask           0x0000040000000000UL  // z9
+#define  ETF3Mask                       0x0000020000000000UL  // z990/z9 (?)
+#define  HFPUnnormalizedMask            0x0000010000000000UL  // z9
+#define  ETF2EnhancementMask            0x0000008000000000UL  // z9
+#define  StoreClockFastMask             0x0000004000000000UL  // z9
+#define  ParsingEnhancementsMask        0x0000002000000000UL  // z10(?)
+#define  ETF3EnhancementMask            0x0000000200000000UL  // z9
+#define  ExtractCPUTimeMask             0x0000000100000000UL  // z10
+#define  CompareSwapStoreMask           0x00000000c0000000UL  // z10
+#define  GnrlInstrExtFacilityMask       0x0000000020000000UL  // z10
+#define  ExecuteExtensionsMask          0x0000000010000000UL  // z10
+#define  FPExtensionsMask               0x0000000004000000UL  // z196
+#define  FPSupportEnhancementsMask      0x0000000000400000UL  // z10
+#define  DecimalFloatingPointMask       0x0000000000300000UL  // z10
+// z196 begin
+#define  DistinctOpndsMask              0x0000000000040000UL  // z196
+#define  FastBCRSerializationMask       DistinctOpndsMask
+#define  HighWordMask                   DistinctOpndsMask
+#define  LoadStoreConditionalMask       DistinctOpndsMask
+#define  PopulationCountMask            DistinctOpndsMask
+#define  InterlockedAccess1Mask         DistinctOpndsMask
+// z196 end
+// EC12 begin
+#define  DFPZonedConversionMask         0x0000000000008000UL  // ec12
+#define  MiscInstrExtMask               0x0000000000004000UL  // ec12
+#define  ExecutionHintMask              MiscInstrExtMask
+#define  LoadAndTrapMask                MiscInstrExtMask
+#define  ProcessorAssistMask            MiscInstrExtMask
+#define  ConstrainedTxExecutionMask     0x0000000000002000UL  // ec12
+#define  InterlockedAccess2Mask         0x0000000000000800UL  // ec12
+// EC12 end
+// z13 begin
+#define  LoadStoreConditional2Mask      0x0000000000000400UL  // z13
+#define  CryptoExtension5Mask           0x0000000000000040UL  // z13
+// z13 end
+// Feature-DW[0] starts to fill up. Use of these masks is risky.
+#define  TestFeature1ImplMask           0x0000000000000001UL
+#define  TestFeature2ImplMask           0x0000000000000002UL
+#define  TestFeature4ImplMask           0x0000000000000004UL
+#define  TestFeature8ImplMask           0x0000000000000008UL
+// ----------------------------------------------
+// --- FeatureBitString Bits  64..127 (DW[1]) ---
+// ----------------------------------------------
+//                                                 11111111
+//                                        66778889900011222
+//                                        48260482604826048
+#define  TransactionalExecutionMask     0x0040000000000000UL  // ec12
+#define  CryptoExtension3Mask           0x0008000000000000UL  // z196
+#define  CryptoExtension4Mask           0x0004000000000000UL  // z196
+#define  DFPPackedConversionMask        0x0000800000000000UL  // z13
+// ----------------------------------------------
+// --- FeatureBitString Bits 128..192 (DW[2]) ---
+// ----------------------------------------------
+//                                        11111111111111111
+//                                        23344455666778889
+//                                        82604826048260482
+#define  VectorFacilityMask             0x4000000000000000UL  // z13, not avail in VM guest mode!
+
+  enum {
+    _max_cache_levels = 8,    // As limited by ECAG instruction.
+    _features_buffer_len = 4, // in DW
+    _code_buffer_len = 2*256  // For feature detection code.
+  };
+  static unsigned long _features[_features_buffer_len];
+  static unsigned long _cipher_features[_features_buffer_len];
+  static unsigned long _msgdigest_features[_features_buffer_len];
+  static unsigned int  _nfeatures;
+  static unsigned int  _ncipher_features;
+  static unsigned int  _nmsgdigest_features;
+  static unsigned int  _Dcache_lineSize;
+  static unsigned int  _Icache_lineSize;
+  static bool          _is_determine_features_test_running;
+
+  static bool test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen);
+  static void set_features_string();
+  static void print_features_internal(const char* text, bool print_anyway=false);
+  static void determine_features();
+  static long call_getFeatures(unsigned long* buffer, int buflen, int functionCode);
+  static void set_getFeatures(address entryPoint);
+  static int  calculate_ECAG_functionCode(unsigned int attributeIndication,
+                                          unsigned int levelIndication,
+                                          unsigned int typeIndication);
+
+  // Setting features via march=z900|z990|z9|z10|z196|ec12|z13|ztest commandline option.
+  static void reset_features(bool reset);
+  static void set_features_z900(bool reset = true);
+  static void set_features_z990(bool reset = true);
+  static void set_features_z9(bool reset = true);
+  static void set_features_z10(bool reset = true);
+  static void set_features_z196(bool reset = true);
+  static void set_features_ec12(bool reset = true);
+  static void set_features_z13(bool reset = true);
+  static void set_features_from(const char* march);
+
+  // Get the CPU type from feature bit settings.
+  static bool is_z900() { return has_long_displacement()      && !has_long_displacement_fast(); }
+  static bool is_z990() { return has_long_displacement_fast() && !has_extended_immediate();  }
+  static bool is_z9()   { return has_extended_immediate()     && !has_GnrlInstrExtensions(); }
+  static bool is_z10()  { return has_GnrlInstrExtensions()    && !has_DistinctOpnds(); }
+  static bool is_z196() { return has_DistinctOpnds()          && !has_MiscInstrExt(); }
+  static bool is_ec12() { return has_MiscInstrExt()           && !has_CryptoExt5(); }
+  static bool is_z13()  { return has_CryptoExt5();}
+
+  // Get information about cache line sizes.
+  // As of now and the foreseeable future, line size of all levels will be the same and 256.
+  static unsigned int Dcache_lineSize(unsigned int level = 0) { return _Dcache_lineSize; }
+  static unsigned int Icache_lineSize(unsigned int level = 0) { return _Icache_lineSize; }
+
+ public:
+
+  // Need to use nested class with unscoped enum.
+  // C++11 declaration "enum class Cipher { ... } is not supported.
+  class CipherMode {
+    public:
+      enum {
+        cipher   = 0x00,
+        decipher = 0x80
+      };
+  };
+  class Cipher {
+   public:
+    enum { // KM only!!! KMC uses different parmBlk sizes.
+      _Query              =   0,
+      _DEA                =   1,
+      _TDEA128            =   2,
+      _TDEA192            =   3,
+      _EncryptedDEA       =   9,
+      _EncryptedDEA128    =  10,
+      _EncryptedDEA192    =  11,
+      _AES128             =  18,
+      _AES192             =  19,
+      _AES256             =  20,
+      _EnccryptedAES128   =  26,
+      _EnccryptedAES192   =  27,
+      _EnccryptedAES256   =  28,
+      _XTSAES128          =  50,
+      _XTSAES256          =  52,
+      _EncryptedXTSAES128 =  58,
+      _EncryptedXTSAES256 =  60,
+      _PRNG               =  67,
+      _featureBits        = 128,
+
+      // Parameter block sizes (in bytes) for KM instruction.
+      _Query_parmBlk              =  16,
+      _DEA_parmBlk                =   8,
+      _TDEA128_parmBlk            =  16,
+      _TDEA192_parmBlk            =  24,
+      _EncryptedDEA_parmBlk       =  32,
+      _EncryptedDEA128_parmBlk    =  40,
+      _EncryptedDEA192_parmBlk    =  48,
+      _AES128_parmBlk             =  16,
+      _AES192_parmBlk             =  24,
+      _AES256_parmBlk             =  32,
+      _EnccryptedAES128_parmBlk   =  48,
+      _EnccryptedAES192_parmBlk   =  56,
+      _EnccryptedAES256_parmBlk   =  64,
+      _XTSAES128_parmBlk          =  32,
+      _XTSAES256_parmBlk          =  48,
+      _EncryptedXTSAES128_parmBlk =  64,
+      _EncryptedXTSAES256_parmBlk =  80,
+
+      // Parameter block sizes (in bytes) for KMC instruction.
+      _Query_parmBlk_C              =  16,
+      _DEA_parmBlk_C                =  16,
+      _TDEA128_parmBlk_C            =  24,
+      _TDEA192_parmBlk_C            =  32,
+      _EncryptedDEA_parmBlk_C       =  40,
+      _EncryptedDEA128_parmBlk_C    =  48,
+      _EncryptedDEA192_parmBlk_C    =  56,
+      _AES128_parmBlk_C             =  32,
+      _AES192_parmBlk_C             =  40,
+      _AES256_parmBlk_C             =  48,
+      _EnccryptedAES128_parmBlk_C   =  64,
+      _EnccryptedAES192_parmBlk_C   =  72,
+      _EnccryptedAES256_parmBlk_C   =  80,
+      _XTSAES128_parmBlk_C          =  32,
+      _XTSAES256_parmBlk_C          =  48,
+      _EncryptedXTSAES128_parmBlk_C =  64,
+      _EncryptedXTSAES256_parmBlk_C =  80,
+      _PRNG_parmBlk_C               =  32,
+
+      // Data block sizes (in bytes).
+      _Query_dataBlk              =   0,
+      _DEA_dataBlk                =   8,
+      _TDEA128_dataBlk            =   8,
+      _TDEA192_dataBlk            =   8,
+      _EncryptedDEA_dataBlk       =   8,
+      _EncryptedDEA128_dataBlk    =   8,
+      _EncryptedDEA192_dataBlk    =   8,
+      _AES128_dataBlk             =  16,
+      _AES192_dataBlk             =  16,
+      _AES256_dataBlk             =  16,
+      _EnccryptedAES128_dataBlk   =  16,
+      _EnccryptedAES192_dataBlk   =  16,
+      _EnccryptedAES256_dataBlk   =  16,
+      _XTSAES128_dataBlk          =  16,
+      _XTSAES256_dataBlk          =  16,
+      _EncryptedXTSAES128_dataBlk =  16,
+      _EncryptedXTSAES256_dataBlk =  16,
+      _PRNG_dataBlk               =   8,
+    };
+  };
+  class MsgDigest {
+    public:
+      enum {
+        _Query            =   0,
+        _SHA1             =   1,
+        _SHA256           =   2,
+        _SHA512           =   3,
+        _GHASH            =  65,
+        _featureBits      = 128,
+
+        // Parameter block sizes (in bytes) for KIMD.
+        _Query_parmBlk_I  =  16,
+        _SHA1_parmBlk_I   =  20,
+        _SHA256_parmBlk_I =  32,
+        _SHA512_parmBlk_I =  64,
+        _GHASH_parmBlk_I  =  32,
+
+        // Parameter block sizes (in bytes) for KLMD.
+        _Query_parmBlk_L  =  16,
+        _SHA1_parmBlk_L   =  28,
+        _SHA256_parmBlk_L =  40,
+        _SHA512_parmBlk_L =  80,
+
+        // Data block sizes (in bytes).
+        _Query_dataBlk    =   0,
+        _SHA1_dataBlk     =  64,
+        _SHA256_dataBlk   =  64,
+        _SHA512_dataBlk   = 128,
+        _GHASH_dataBlk    =  16
+      };
+  };
+  class MsgAuthent {
+    public:
+      enum {
+        _Query              =   0,
+        _DEA                =   1,
+        _TDEA128            =   2,
+        _TDEA192            =   3,
+        _EncryptedDEA       =   9,
+        _EncryptedDEA128    =  10,
+        _EncryptedDEA192    =  11,
+        _AES128             =  18,
+        _AES192             =  19,
+        _AES256             =  20,
+        _EnccryptedAES128   =  26,
+        _EnccryptedAES192   =  27,
+        _EnccryptedAES256   =  28,
+        _featureBits        = 128,
+
+        _Query_parmBlk            =  16,
+        _DEA_parmBlk              =  16,
+        _TDEA128_parmBlk          =  24,
+        _TDEA192_parmBlk          =  32,
+        _EncryptedDEA_parmBlk     =  40,
+        _EncryptedDEA128_parmBlk  =  48,
+        _EncryptedDEA192_parmBlk  =  56,
+        _AES128_parmBlk           =  32,
+        _AES192_parmBlk           =  40,
+        _AES256_parmBlk           =  48,
+        _EnccryptedAES128_parmBlk =  64,
+        _EnccryptedAES192_parmBlk =  72,
+        _EnccryptedAES256_parmBlk =  80,
+
+        _Query_dataBlk            =   0,
+        _DEA_dataBlk              =   8,
+        _TDEA128_dataBlk          =   8,
+        _TDEA192_dataBlk          =   8,
+        _EncryptedDEA_dataBlk     =   8,
+        _EncryptedDEA128_dataBlk  =   8,
+        _EncryptedDEA192_dataBlk  =   8,
+        _AES128_dataBlk           =  16,
+        _AES192_dataBlk           =  16,
+        _AES256_dataBlk           =  16,
+        _EnccryptedAES128_dataBlk =  16,
+        _EnccryptedAES192_dataBlk =  16,
+        _EnccryptedAES256_dataBlk =  16
+      };
+  };
+
+  // Initialization
+  static void initialize();
+  static void print_features();
+  static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
+
+  // CPU feature query functions
+  static bool has_StoreFacilityListExtended() { return  (_features[0] & StoreFacilityListExtendedMask) == StoreFacilityListExtendedMask; }
+  static bool has_Crypto()                    { return  (_features[0] & CryptoFacilityMask)            == CryptoFacilityMask; }
+  static bool has_ETF2()                      { return  (_features[0] & ETF2Mask)                      == ETF2Mask; }
+  static bool has_ETF3()                      { return  (_features[0] & ETF3Mask)                      == ETF3Mask; }
+  static bool has_ETF2Enhancements()          { return  (_features[0] & ETF2EnhancementMask)           == ETF2EnhancementMask; }
+  static bool has_ETF3Enhancements()          { return  (_features[0] & ETF3EnhancementMask)           == ETF3EnhancementMask; }
+  static bool has_ParsingEnhancements()       { return  (_features[0] & ParsingEnhancementsMask)       == ParsingEnhancementsMask; }
+  static bool has_long_displacement()         { return  (_features[0] & LongDispFacilityMask)          == LongDispFacilityMask; }
+  static bool has_long_displacement_fast()    { return  (_features[0] & LongDispFacilityHighPerfMask)  == LongDispFacilityHighPerfMask; }
+  static bool has_extended_immediate()        { return  (_features[0] & ExtImmedFacilityMask)          == ExtImmedFacilityMask; }
+  static bool has_StoreClockFast()            { return  (_features[0] & StoreClockFastMask)            == StoreClockFastMask; }
+  static bool has_ExtractCPUtime()            { return  (_features[0] & ExtractCPUTimeMask)            == ExtractCPUTimeMask; }
+  static bool has_CompareSwapStore()          { return  (_features[0] & CompareSwapStoreMask)          == CompareSwapStoreMask; }
+
+  static bool has_HFPMultiplyAndAdd()         { return  (_features[0] & HFPMultiplyAndAddMask)         == HFPMultiplyAndAddMask; }
+  static bool has_HFPUnnormalized()           { return  (_features[0] & HFPUnnormalizedMask)           == HFPUnnormalizedMask; }
+
+  // Make sure we don't run on older ...
+  static bool has_GnrlInstrExtensions()       { guarantee((_features[0] & GnrlInstrExtFacilityMask)    == GnrlInstrExtFacilityMask, "We no more support older than z10."); return true; }
+  static bool has_CompareBranch()             { return  has_GnrlInstrExtensions() && is_z10(); } // Only z10 benefits from these.
+  static bool has_CompareTrap()               { return  has_GnrlInstrExtensions(); }
+  static bool has_RelativeLoadStore()         { return  has_GnrlInstrExtensions(); }
+  static bool has_MultiplySingleImm32()       { return  has_GnrlInstrExtensions(); }
+  static bool has_Prefetch()                  { return  has_GnrlInstrExtensions() && (AllocatePrefetchStyle > 0); }
+  static bool has_PrefetchRaw()               { return  has_GnrlInstrExtensions(); }
+  static bool has_MoveImmToMem()              { return  has_GnrlInstrExtensions(); }
+  static bool has_ExtractCPUAttributes()      { return  has_GnrlInstrExtensions(); }
+  static bool has_ExecuteExtensions()         { return  (_features[0] & ExecuteExtensionsMask)         == ExecuteExtensionsMask; }
+  // Memory-immediate arithmetic instructions. There is no performance penalty in using them.
+  // Moreover, these memory-immediate instructions are quasi-atomic (>99.99%) on z10
+  // and 100% atomic from z196 onwards, thanks to the specific operand serialization that comes new with z196.
+  static bool has_MemWithImmALUOps()          { return  has_GnrlInstrExtensions(); }
+  static bool has_AtomicMemWithImmALUOps()    { return   has_MemWithImmALUOps() && has_InterlockedAccessV1(); }
+  static bool has_FPExtensions()              { return  (_features[0] & FPExtensionsMask)              == FPExtensionsMask; }
+  static bool has_FPSupportEnhancements()     { return  (_features[0] & FPSupportEnhancementsMask)     == FPSupportEnhancementsMask; }
+  static bool has_DecimalFloatingPoint()      { return  (_features[0] & DecimalFloatingPointMask)      == DecimalFloatingPointMask; }
+  static bool has_InterlockedAccessV1()       { return  (_features[0] & InterlockedAccess1Mask)        == InterlockedAccess1Mask; }
+  static bool has_LoadAndALUAtomicV1()        { return  (_features[0] & InterlockedAccess1Mask)        == InterlockedAccess1Mask; }
+  static bool has_PopCount()                  { return  (_features[0] & PopulationCountMask)           == PopulationCountMask; }
+  static bool has_LoadStoreConditional()      { return  (_features[0] & LoadStoreConditionalMask)      == LoadStoreConditionalMask; }
+  static bool has_HighWordInstr()             { return  (_features[0] & HighWordMask)                  == HighWordMask; }
+  static bool has_FastSync()                  { return  (_features[0] & FastBCRSerializationMask)      == FastBCRSerializationMask; }
+  static bool has_DistinctOpnds()             { return  (_features[0] & DistinctOpndsMask)             == DistinctOpndsMask; }
+  static bool has_CryptoExt3()                { return  (_features[1] & CryptoExtension3Mask)          == CryptoExtension3Mask; }
+  static bool has_CryptoExt4()                { return  (_features[1] & CryptoExtension4Mask)          == CryptoExtension4Mask; }
+  static bool has_DFPZonedConversion()        { return  (_features[0] & DFPZonedConversionMask)        == DFPZonedConversionMask; }
+  static bool has_DFPPackedConversion()       { return  (_features[1] & DFPPackedConversionMask)       == DFPPackedConversionMask; }
+  static bool has_MiscInstrExt()              { return  (_features[0] & MiscInstrExtMask)              == MiscInstrExtMask; }
+  static bool has_ExecutionHint()             { return  (_features[0] & ExecutionHintMask)             == ExecutionHintMask; }
+  static bool has_LoadAndTrap()               { return  (_features[0] & LoadAndTrapMask)               == LoadAndTrapMask; }
+  static bool has_ProcessorAssist()           { return  (_features[0] & ProcessorAssistMask)           == ProcessorAssistMask; }
+  static bool has_InterlockedAccessV2()       { return  (_features[0] & InterlockedAccess2Mask)        == InterlockedAccess2Mask; }
+  static bool has_LoadAndALUAtomicV2()        { return  (_features[0] & InterlockedAccess2Mask)        == InterlockedAccess2Mask; }
+  static bool has_TxMem()                     { return ((_features[1] & TransactionalExecutionMask)    == TransactionalExecutionMask) &&
+                                                       ((_features[0] & ConstrainedTxExecutionMask)    == ConstrainedTxExecutionMask); }
+  static bool has_CryptoExt5()                { return  (_features[0] & CryptoExtension5Mask)          == CryptoExtension5Mask; }
+  static bool has_LoadStoreConditional2()     { return  (_features[0] & LoadStoreConditional2Mask)     == LoadStoreConditional2Mask; }
+  static bool has_VectorFacility()            { return  (_features[2] & VectorFacilityMask)            == VectorFacilityMask; }
+
+  static bool has_TestFeatureImpl()           { return  (_features[0] & TestFeature1ImplMask)          == TestFeature1ImplMask; }
+  static bool has_TestFeature1Impl()          { return  (_features[0] & TestFeature1ImplMask)          == TestFeature1ImplMask; }
+  static bool has_TestFeature2Impl()          { return  (_features[0] & TestFeature2ImplMask)          == TestFeature2ImplMask; }
+  static bool has_TestFeature4Impl()          { return  (_features[0] & TestFeature4ImplMask)          == TestFeature4ImplMask; }
+  static bool has_TestFeature8Impl()          { return  (_features[0] & TestFeature8ImplMask)          == TestFeature8ImplMask; }
+  static bool has_TestFeaturesImpl()          { return  has_TestFeature1Impl() || has_TestFeature2Impl() || has_TestFeature4Impl() || has_TestFeature8Impl(); }
+
+  // Crypto features query functions.
+  static bool has_Crypto_AES128()             { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES128, Cipher::_featureBits); }
+  static bool has_Crypto_AES192()             { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES192, Cipher::_featureBits); }
+  static bool has_Crypto_AES256()             { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES256, Cipher::_featureBits); }
+  static bool has_Crypto_AES()                { return has_Crypto_AES128() || has_Crypto_AES192() || has_Crypto_AES256(); }
+
+  static bool has_Crypto_SHA1()               { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA1,   MsgDigest::_featureBits); }
+  static bool has_Crypto_SHA256()             { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA256, MsgDigest::_featureBits); }
+  static bool has_Crypto_SHA512()             { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA512, MsgDigest::_featureBits); }
+  static bool has_Crypto_GHASH()              { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_GHASH,  MsgDigest::_featureBits); }
+  static bool has_Crypto_SHA()                { return has_Crypto_SHA1() || has_Crypto_SHA256() || has_Crypto_SHA512() || has_Crypto_GHASH(); }
+
+  // CPU feature setters (to force model-specific behaviour). Test/debugging only.
+  static void set_has_TestFeature1Impl()          { _features[0] |= TestFeature1ImplMask; }
+  static void set_has_TestFeature2Impl()          { _features[0] |= TestFeature2ImplMask; }
+  static void set_has_TestFeature4Impl()          { _features[0] |= TestFeature4ImplMask; }
+  static void set_has_TestFeature8Impl()          { _features[0] |= TestFeature8ImplMask; }
+  static void set_has_DecimalFloatingPoint()      { _features[0] |= DecimalFloatingPointMask; }
+  static void set_has_FPSupportEnhancements()     { _features[0] |= FPSupportEnhancementsMask; }
+  static void set_has_ExecuteExtensions()         { _features[0] |= ExecuteExtensionsMask; }
+  static void set_has_MemWithImmALUOps()          { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_MoveImmToMem()              { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_Prefetch()                  { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_MultiplySingleImm32()       { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_CompareBranch()             { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_CompareTrap()               { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_RelativeLoadStore()         { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_GnrlInstrExtensions()       { _features[0] |= GnrlInstrExtFacilityMask; }
+  static void set_has_CompareSwapStore()          { _features[0] |= CompareSwapStoreMask; }
+  static void set_has_HFPMultiplyAndAdd()         { _features[0] |= HFPMultiplyAndAddMask; }
+  static void set_has_HFPUnnormalized()           { _features[0] |= HFPUnnormalizedMask; }
+  static void set_has_ExtractCPUtime()            { _features[0] |= ExtractCPUTimeMask; }
+  static void set_has_StoreClockFast()            { _features[0] |= StoreClockFastMask; }
+  static void set_has_extended_immediate()        { _features[0] |= ExtImmedFacilityMask; }
+  static void set_has_long_displacement_fast()    { _features[0] |= LongDispFacilityHighPerfMask; }
+  static void set_has_long_displacement()         { _features[0] |= LongDispFacilityMask; }
+  static void set_has_ETF2()                      { _features[0] |= ETF2Mask; }
+  static void set_has_ETF3()                      { _features[0] |= ETF3Mask; }
+  static void set_has_ETF2Enhancements()          { _features[0] |= ETF2EnhancementMask; }
+  static void set_has_ETF3Enhancements()          { _features[0] |= ETF3EnhancementMask; }
+  static void set_has_Crypto()                    { _features[0] |= CryptoFacilityMask; }
+  static void set_has_StoreFacilityListExtended() { _features[0] |= StoreFacilityListExtendedMask; }
+
+  static void set_has_InterlockedAccessV1()       { _features[0] |= InterlockedAccess1Mask; }
+  static void set_has_PopCount()                  { _features[0] |= PopulationCountMask; }
+  static void set_has_LoadStoreConditional()      { _features[0] |= LoadStoreConditionalMask; }
+  static void set_has_HighWordInstr()             { _features[0] |= HighWordMask; }
+  static void set_has_FastSync()                  { _features[0] |= FastBCRSerializationMask; }
+  static void set_has_DistinctOpnds()             { _features[0] |= DistinctOpndsMask; }
+  static void set_has_FPExtensions()              { _features[0] |= FPExtensionsMask; }
+  static void set_has_MiscInstrExt()              { _features[0] |= MiscInstrExtMask; }
+  static void set_has_ProcessorAssist()           { _features[0] |= ProcessorAssistMask; }
+  static void set_has_InterlockedAccessV2()       { _features[0] |= InterlockedAccess2Mask; }
+  static void set_has_LoadAndALUAtomicV2()        { _features[0] |= InterlockedAccess2Mask; }
+  static void set_has_TxMem()                     { _features[0] |= ConstrainedTxExecutionMask; _features[1] |= TransactionalExecutionMask; }
+  static void set_has_CryptoExt3()                { _features[1] |= CryptoExtension3Mask; }
+  static void set_has_CryptoExt4()                { _features[1] |= CryptoExtension4Mask; }
+  static void set_has_LoadStoreConditional2()     { _features[0] |= LoadStoreConditional2Mask; }
+  static void set_has_CryptoExt5()                { _features[0] |= CryptoExtension5Mask; }
+  static void set_has_VectorFacility()            { _features[2] |= VectorFacilityMask; }
+
+  // Assembler testing.
+  static void allow_all();
+  static void revert();
+
+  // Generate trapping instructions into C-code.
+  // Sometimes helpful for debugging.
+  static unsigned long z_SIGILL();
+  static unsigned long z_SIGSEGV();
+};
+
+#endif // CPU_S390_VM_VM_VERSION_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+void VMRegImpl::set_regName() {
+  // Not clear why we have this duplication (triplication?)
+  Register reg = ::as_Register(0);
+  int i;
+  for (i = 0; i < ConcreteRegisterImpl::max_gpr;) {
+    regName[i++] = reg->name();
+    regName[i++] = reg->name();
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for (; i < ConcreteRegisterImpl::max_fpr;) {
+    regName[i++] = freg->name();
+    regName[i++] = freg->name();
+    freg = freg->successor();
+  }
+  for (; i < ConcreteRegisterImpl::number_of_registers; i ++) {
+    regName[i] = "NON-GPR-XMM";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMREG_S390_HPP
+#define CPU_S390_VM_VMREG_S390_HPP
+
+inline bool is_Register() {
+  return (unsigned int)value() < (unsigned int)ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool is_FloatRegister() {
+  return value() >= ConcreteRegisterImpl::max_gpr &&
+         value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register as_Register() {
+  assert(is_Register() && is_even(value()), "even-aligned GPR name");
+  return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister as_FloatRegister() {
+  assert(is_FloatRegister() && is_even(value()), "must be");
+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+}
+
+inline bool is_concrete() {
+  assert(is_reg(), "must be");
+  return is_even(value());
+}
+
+#endif // CPU_S390_VM_VMREG_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMREG_S390_INLINE_HPP
+#define CPU_S390_VM_VMREG_S390_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+  if (this == noreg) {
+    return VMRegImpl::Bad();
+  }
+  return VMRegImpl::as_VMReg(encoding() << 1);
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+}
+
+inline VMReg ConditionRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
+}
+
+#endif // CPU_S390_VM_VMREG_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_s390.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// Machine-dependent part of VtableStubs: create vtableStub of correct
+// size and initialize its code.
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
+#endif
+
+// Used by compiler only; may use only caller saved, non-argument registers.
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+
+  const int   code_length = VtableStub::pd_code_size_limit(true);
+  VtableStub *s = new(code_length) VtableStub(true, vtable_index);
+  if (s == NULL) { // Indicates OOM In the code cache.
+    return NULL;
+  }
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), code_length);
+  MacroAssembler *masm = new MacroAssembler(&cb);
+  address start_pc;
+  int     padding_bytes = 0;
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    // Count unused bytes
+    //                  worst case             actual size
+    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+
+    // Use generic emitter for direct memory increment.
+    // Abuse Z_method as scratch register for generic emitter.
+    // It is loaded further down anyway before it is first used.
+    __ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
+  }
+#endif
+
+  assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
+
+  // Get receiver klass.
+  // Must do an explicit check if implicit checks are disabled.
+  address npe_addr = __ pc(); // npe == NULL ptr exception
+  __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
+  const Register rcvr_klass = Z_R1_scratch;
+  __ load_klass(rcvr_klass, Z_ARG1);
+
+  // Set method (in case of interpreted method), and destination address.
+  int entry_offset = in_bytes(InstanceKlass::vtable_start_offset()) +
+                     vtable_index * vtableEntry::size_in_bytes();
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    // Check offset vs vtable length.
+    const Register vtable_idx = Z_R0_scratch;
+
+    // Count unused bytes.
+    //                  worst case             actual size
+    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true);
+
+    assert(Immediate::is_uimm12(in_bytes(InstanceKlass::vtable_length_offset())), "disp to large");
+    __ z_cl(vtable_idx, in_bytes(InstanceKlass::vtable_length_offset()), rcvr_klass);
+    __ z_brl(L);
+    __ z_lghi(Z_ARG3, vtable_index);  // Debug code, don't optimize.
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false);
+    // Count unused bytes (assume worst case here).
+    padding_bytes += 12;
+    __ bind(L);
+  }
+#endif
+
+  int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+
+  // Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC.
+  if (Displacement::is_validDisp(v_off)) {
+    __ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/);
+    // Account for the load_const in the else path.
+    padding_bytes += __ load_const_size();
+  } else {
+    // Worse case, offset does not fit in displacement field.
+    __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value.
+    __ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/);
+  }
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    __ z_ltgr(Z_method, Z_method);
+    __ z_brne(L);
+    __ stop("Vtable entry is ZERO",102);
+    __ bind(L);
+  }
+#endif
+
+  address ame_addr = __ pc(); // ame = abstract method error
+
+  // Must do an explicit check if implicit checks are disabled.
+  __ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset()));
+  __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
+  __ z_br(Z_R1_scratch);
+
+  masm->flush();
+
+  s->set_exception_points(npe_addr, ame_addr);
+
+  return s;
+}
+
+VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+  const int   code_length = VtableStub::pd_code_size_limit(false);
+  VtableStub *s = new(code_length) VtableStub(false, vtable_index);
+  if (s == NULL) { // Indicates OOM in the code cache.
+    return NULL;
+  }
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), code_length);
+  MacroAssembler *masm = new MacroAssembler(&cb);
+  address start_pc;
+  int     padding_bytes = 0;
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    // Count unused bytes
+    //                  worst case             actual size
+    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+
+    // Use generic emitter for direct memory increment.
+    // Use Z_tmp_1 as scratch register for generic emitter.
+    __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1);
+  }
+#endif
+
+  assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
+
+  // Entry arguments:
+  //  Z_method: Interface
+  //  Z_ARG1:   Receiver
+  const Register rcvr_klass = Z_tmp_1;    // Used to compute itable_entry_addr.
+                                          // Use extra reg to avoid re-load.
+  const Register vtable_len = Z_tmp_2;    // Used to compute itable_entry_addr.
+  const Register itable_entry_addr = Z_R1_scratch;
+  const Register itable_interface  = Z_R0_scratch;
+
+  // Get receiver klass.
+  // Must do an explicit check if implicit checks are disabled.
+  address npe_addr = __ pc(); // npe == NULL ptr exception
+  __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
+  __ load_klass(rcvr_klass, Z_ARG1);
+
+  // Load start of itable entries into itable_entry.
+  __ z_llgf(vtable_len, Address(rcvr_klass, InstanceKlass::vtable_length_offset()));
+  __ z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
+
+  // Loop over all itable entries until desired interfaceOop(Rinterface) found.
+  const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset());
+  // Count unused bytes.
+  start_pc = __ pc();
+  __ add2reg_with_index(itable_entry_addr, vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), rcvr_klass, vtable_len);
+  padding_bytes += 20 - (__ pc() - start_pc);
+
+  const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
+  Label search;
+  __ bind(search);
+
+  // Handle IncompatibleClassChangeError in itable stubs.
+  // If the entry is NULL then we've reached the end of the table
+  // without finding the expected interface, so throw an exception.
+  NearLabel   throw_icce;
+  __ load_and_test_long(itable_interface, Address(itable_entry_addr));
+  __ z_bre(throw_icce); // Throw the exception out-of-line.
+  // Count unused bytes.
+  start_pc = __ pc();
+  __ add2reg(itable_entry_addr, itable_offset_search_inc);
+  padding_bytes += 20 - (__ pc() - start_pc);
+  __ z_cgr(itable_interface, Z_method);
+  __ z_brne(search);
+
+  // Entry found. Itable_entry_addr points to the subsequent entry (itable_offset_search_inc too far).
+  // Get offset of vtable for interface.
+
+  const Register vtable_offset = Z_R1_scratch;
+  const Register itable_method = rcvr_klass;   // Calculated before.
+
+  const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
+                                    itableOffsetEntry::interface_offset_in_bytes()) -
+                                   itable_offset_search_inc;
+  __ z_llgf(vtable_offset, vtable_offset_offset, itable_entry_addr);
+
+  // Compute itableMethodEntry and get method and entry point for compiler.
+  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) +
+                            itableMethodEntry::method_offset_in_bytes();
+
+  __ z_lg(Z_method, method_offset, vtable_offset, itable_method);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label ok1;
+    __ z_ltgr(Z_method, Z_method);
+    __ z_brne(ok1);
+    __ stop("method is null",103);
+    __ bind(ok1);
+  }
+#endif
+
+  address ame_addr = __ pc();
+  // Must do an explicit check if implicit checks are disabled.
+  if (!ImplicitNullChecks) {
+    __ compare64_and_branch(Z_method, (intptr_t) 0, Assembler::bcondEqual, throw_icce);
+  }
+  __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
+  __ z_br(Z_R1_scratch);
+
+  // Handle IncompatibleClassChangeError in itable stubs.
+  __ bind(throw_icce);
+  // Count unused bytes
+  //                  worst case          actual size
+  // We force resolving of the call site by jumping to
+  // the "handle wrong method" stub, and so let the
+  // interpreter runtime do all the dirty work.
+  padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+  __ z_br(Z_R1_scratch);
+
+  masm->flush();
+
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+// In order to tune these parameters, run the JVM with VM options
+// +PrintMiscellaneous and +WizardMode to see information about
+// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+  int size = DebugVtables ? 216 : 0;
+  if (CountCompiledCalls) {
+    size += 6 * 4;
+  }
+  if (is_vtable_stub) {
+    size += 52;
+  } else {
+    size += 104;
+  }
+  if (Universe::narrow_klass_base() != NULL) {
+    size += 16; // A guess.
+  }
+  return size;
+}
+
+int VtableStub::pd_code_alignment() {
+  const unsigned int icache_line_size = 32;
+  return icache_line_size;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_s390.hpp"
+
+// Note that the compare-and-swap instructions on System z perform
+// a serialization function before the storage operand is fetched
+// and again after the operation is completed.
+//
+// Used constraint modifiers:
+// = write-only access: Value on entry to inline-assembler code irrelevant.
+// + read/write access: Value on entry is used; on exit value is changed.
+//   read-only  access: Value on entry is used and never changed.
+// & early-clobber access: Might be modified before all read-only operands
+//                         have been used.
+// a address register operand (not GR0).
+// d general register operand (including GR0)
+// Q memory operand w/o index register.
+// 0..9 operand reference (by operand position).
+//      Used for operands that fill multiple roles. One example would be a
+//      write-only operand receiving its initial value from a read-only operand.
+//      Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example.
+//
+
+// On System z, all store operations are atomic if the address where the data is stored into
+// is an integer multiple of the data length. Furthermore, all stores are ordered:
+// a store which occurs conceptually before another store becomes visible to other CPUs
+// before the other store becomes visible.
+inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
+
+
+//------------
+// Atomic::add
+//------------
+// These methods force the value in memory to be augmented by the passed increment.
+// Both, memory value and increment, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+//
+// The return value of the method is the value that was successfully stored. At the
+// time the caller receives back control, the value in memory may have changed already.
+
+inline jint Atomic::add(jint inc, volatile jint*dest) {
+  unsigned int old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+    __asm__ __volatile__ (
+      "   LGFR     0,%[inc]                \n\t" // save increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAA      2,0,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x20                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xf8                    \n\t" // LAA minor opcode
+      "   AR       2,0                     \n\t" // calc new value in register
+      "   LR       %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r0", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LLGF     %[old],%[mem]           \n\t" // get old value
+      "0: LA       %[upd],0(%[inc],%[old]) \n\t" // calc result
+      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      : [inc] "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+
+  return (jint)upd;
+}
+
+
+inline intptr_t Atomic::add_ptr(intptr_t inc, volatile intptr_t* dest) {
+  unsigned long old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+    __asm__ __volatile__ (
+      "   LGR      0,%[inc]                \n\t" // save increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAAG     2,0,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x20                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xe8                    \n\t" // LAA minor opcode
+      "   AGR      2,0                     \n\t" // calc new value in register
+      "   LGR      %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r0", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LG       %[old],%[mem]           \n\t" // get old value
+      "0: LA       %[upd],0(%[inc],%[old]) \n\t" // calc result
+      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      : [inc] "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+
+  return (intptr_t)upd;
+}
+
+inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) {
+  return (void*)add_ptr(add_value, (volatile intptr_t*)dest);
+}
+
+
+//------------
+// Atomic::inc
+//------------
+// These methods force the value in memory to be incremented (augmented by 1).
+// Both, memory value and increment, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+
+inline void Atomic::inc(volatile jint* dest) {
+  unsigned int old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+//  tty->print_cr("Atomic::inc     called... dest @%p", dest);
+    __asm__ __volatile__ (
+      "   LGHI     2,1                     \n\t" // load increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x22                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xf8                    \n\t" // LAA minor opcode
+      "   AGHI     2,1                     \n\t" // calc new value in register
+      "   LR       %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+//    : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LLGF     %[old],%[mem]           \n\t" // get old value
+      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
+      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+}
+
+inline void Atomic::inc_ptr(volatile intptr_t* dest) {
+  unsigned long old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+    __asm__ __volatile__ (
+      "   LGHI     2,1                     \n\t" // load increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x22                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xe8                    \n\t" // LAA minor opcode
+      "   AGHI     2,1                     \n\t" // calc new value in register
+      "   LR       %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+//    : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LG       %[old],%[mem]           \n\t" // get old value
+      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
+      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+}
+
+inline void Atomic::inc_ptr(volatile void* dest) {
+  inc_ptr((volatile intptr_t*)dest);
+}
+
+//------------
+// Atomic::dec
+//------------
+// These methods force the value in memory to be decremented (augmented by -1).
+// Both, memory value and decrement, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+
+inline void Atomic::dec(volatile jint* dest) {
+  unsigned int old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+    __asm__ __volatile__ (
+      "   LGHI     2,-1                    \n\t" // load increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x22                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xf8                    \n\t" // LAA minor opcode
+      "   AGHI     2,-1                    \n\t" // calc new value in register
+      "   LR       %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+//    : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LLGF     %[old],%[mem]           \n\t" // get old value
+  // LAY not supported by inline assembler
+  //  "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
+      "0: LR       %[upd],%[old]           \n\t" // calc result
+      "   AHI      %[upd],-1               \n\t"
+      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+}
+
+inline void Atomic::dec_ptr(volatile intptr_t* dest) {
+  unsigned long old, upd;
+
+  if (VM_Version::has_LoadAndALUAtomicV1()) {
+    __asm__ __volatile__ (
+      "   LGHI     2,-1                    \n\t" // load increment
+      "   LA       3,%[mem]                \n\t" // force data address into ARG2
+//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
+//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
+      "   .byte    0xeb                    \n\t" // LAA main opcode
+      "   .byte    0x22                    \n\t" // R1,R3
+      "   .byte    0x30                    \n\t" // R2,disp1
+      "   .byte    0x00                    \n\t" // disp2,disp3
+      "   .byte    0x00                    \n\t" // disp4,disp5
+      "   .byte    0xe8                    \n\t" // LAA minor opcode
+      "   AGHI     2,-1                    \n\t" // calc new value in register
+      "   LR       %[upd],2                \n\t" // move to result register
+      //---<  outputs  >---
+      : [upd]  "=&d" (upd)    // write-only, updated counter value
+      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+//    : [inc]  "a"   (inc)    // read-only.
+      //---<  clobbered  >---
+      : "cc", "r2", "r3"
+    );
+  } else {
+    __asm__ __volatile__ (
+      "   LG       %[old],%[mem]           \n\t" // get old value
+//    LAY not supported by inline assembler
+//    "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
+      "0: LGR      %[upd],%[old]           \n\t" // calc result
+      "   AGHI     %[upd],-1               \n\t"
+      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
+      "   JNE      0b                      \n\t" // no success? -> retry
+      //---<  outputs  >---
+      : [old] "=&a" (old)    // write-only, old counter value
+      , [upd] "=&d" (upd)    // write-only, updated counter value
+      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
+      //---<  inputs  >---
+      :
+      //---<  clobbered  >---
+      : "cc"
+    );
+  }
+}
+
+inline void Atomic::dec_ptr(volatile void* dest) {
+  dec_ptr((volatile intptr_t*)dest);
+}
+
+//-------------
+// Atomic::xchg
+//-------------
+// These methods force the value in memory to be replaced by the new value passed
+// in as argument.
+//
+// The value in memory is replaced by using a compare-and-swap instruction. The
+// instruction is retried as often as required. This makes sure that the new
+// value can be seen, at least for a very short period of time, by other CPUs.
+//
+// If we would use a normal "load(old value) store(new value)" sequence,
+// the new value could be lost unnoticed, due to a store(new value) from
+// another thread.
+//
+// The return value is the (unchanged) value from memory as it was when the
+// replacement succeeded.
+inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
+  unsigned int  old;
+
+  __asm__ __volatile__ (
+    "   LLGF     %[old],%[mem]           \n\t" // get old value
+    "0: CS       %[old],%[upd],%[mem]    \n\t" // try to xchg upd with mem
+    "   JNE      0b                      \n\t" // no success? -> retry
+    //---<  outputs  >---
+    : [old] "=&d" (old)      // write-only, prev value irrelevant
+    , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
+    //---<  inputs  >---
+    : [upd] "d"   (xchg_val) // read-only, value to be written to memory
+    //---<  clobbered  >---
+    : "cc"
+  );
+
+  return (jint)old;
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
+  unsigned long old;
+
+  __asm__ __volatile__ (
+    "   LG       %[old],%[mem]           \n\t" // get old value
+    "0: CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg upd with mem
+    "   JNE      0b                      \n\t" // no success? -> retry
+    //---<  outputs  >---
+    : [old] "=&d" (old)      // write-only, init from memory
+    , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
+    //---<  inputs  >---
+    : [upd] "d"   (xchg_val) // read-only, value to be written to memory
+    //---<  clobbered  >---
+    : "cc"
+  );
+
+  return (intptr_t)old;
+}
+
+inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
+  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+}
+
+//----------------
+// Atomic::cmpxchg
+//----------------
+// These methods compare the value in memory with a given compare value.
+// If both values compare equal, the value in memory is replaced with
+// the exchange value.
+//
+// The value in memory is compared and replaced by using a compare-and-swap
+// instruction. The instruction is NOT retried (one shot only).
+//
+// The return value is the (unchanged) value from memory as it was when the
+// compare-and-swap instruction completed. A successful exchange operation
+// is indicated by (return value == compare_value). If unsuccessful, a new
+// exchange value can be calculated based on the return value which is the
+// latest contents of the memory location.
+//
+// Inspecting the return value is the only way for the caller to determine
+// if the compare-and-swap instruction was successful:
+// - If return value and compare value compare equal, the compare-and-swap
+//   instruction was successful and the value in memory was replaced by the
+//   exchange value.
+// - If return value and compare value compare unequal, the compare-and-swap
+//   instruction was not successful. The value in memory was left unchanged.
+//
+// The s390 processors always fence before and after the csg instructions.
+// Thus we ignore the memory ordering argument. The docu says: "A serialization
+// function is performed before the operand is fetched and again after the
+// operation is completed."
+
+jint Atomic::cmpxchg(jint xchg_val, volatile jint* dest, jint cmp_val, cmpxchg_memory_order unused) {
+  unsigned long old;
+
+  __asm__ __volatile__ (
+    "   CS       %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
+    // outputs
+    : [old] "=&d" (old)      // Write-only, prev value irrelevant.
+    , [mem] "+Q"  (*dest)    // Read/write, memory to be updated atomically.
+    // inputs
+    : [upd] "d"   (xchg_val)
+    ,       "0"   (cmp_val)  // Read-only, initial value for [old] (operand #0).
+    // clobbered
+    : "cc"
+  );
+
+  return (jint)old;
+}
+
+jlong Atomic::cmpxchg(jlong xchg_val, volatile jlong* dest, jlong cmp_val, cmpxchg_memory_order unused) {
+  unsigned long old;
+
+  __asm__ __volatile__ (
+    "   CSG      %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
+    // outputs
+    : [old] "=&d" (old)      // Write-only, prev value irrelevant.
+    , [mem] "+Q"  (*dest)    // Read/write, memory to be updated atomically.
+    // inputs
+    : [upd] "d"   (xchg_val)
+    ,       "0"   (cmp_val)  // Read-only, initial value for [old] (operand #0).
+    // clobbered
+    : "cc"
+  );
+
+  return (jlong)old;
+}
+
+void* Atomic::cmpxchg_ptr(void *xchg_val, volatile void* dest, void* cmp_val, cmpxchg_memory_order unused) {
+  return (void*)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
+}
+
+intptr_t Atomic::cmpxchg_ptr(intptr_t xchg_val, volatile intptr_t* dest, intptr_t cmp_val, cmpxchg_memory_order unused) {
+  return (intptr_t)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
+}
+
+inline jlong Atomic::load(volatile jlong* src) { return *src; }
+
+#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+
+#include <byteswap.h>
+
+inline u2 swap_u2(u2 x) {
+  return bswap_16(x);
+}
+
+inline u4 swap_u4(u4 x) {
+  return bswap_32(x);
+}
+
+inline u8 swap_u8(u8 x) {
+  return bswap_64(x);
+}
+
+#endif // OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Sorted according to linux_x86.
+
+#ifndef OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+
+// Sets the default values for platform dependent flags used by the
+// runtime system (see globals.hpp).
+
+define_pd_global(bool, DontYieldALot,            false);
+define_pd_global(intx, ThreadStackSize,          1024); // 0 => Use system default.
+define_pd_global(intx, VMThreadStackSize,        1024);
+// Some jck tests in lang/fp/fpl038 run out of compile thread stack.
+// Observed in pure dbg build, running with -Xcomp -Xbatch on z990.
+// We also increase the stack size for opt builds to be on the safe side.
+#ifdef ASSERT
+define_pd_global(intx, CompilerThreadStackSize,   4096);
+#else
+define_pd_global(intx, CompilerThreadStackSize,   2048);
+#endif
+
+// Allow extra space in DEBUG builds for asserts.
+define_pd_global(size_t, JVMInvokeMethodSlack,    8192);
+
+// Only used on 64 bit platforms.
+define_pd_global(size_t, HeapBaseMinAddress,      2*G);
+
+#endif // OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+
+#include "runtime/orderAccess.hpp"
+#include "vm_version_s390.hpp"
+
+// Implementation of class OrderAccess.
+
+//
+// machine barrier instructions:
+//
+//   - z_sync            two-way memory barrier, aka fence
+//
+// semantic barrier instructions:
+// (as defined in orderAccess.hpp)
+//
+//   - z_release         orders Store|Store,    (maps to compiler barrier)
+//                               Load|Store
+//   - z_acquire         orders  Load|Store,    (maps to compiler barrier)
+//                               Load|Load
+//   - z_fence           orders Store|Store,    (maps to z_sync)
+//                               Load|Store,
+//                               Load|Load,
+//                              Store|Load
+//
+
+
+// Only load-after-store-order is not guaranteed on z/Architecture, i.e. only 'fence'
+// is needed.
+
+// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions.
+#define inlasm_compiler_barrier() __asm__ volatile ("" : : : "memory");
+// "bcr 15, 0" is used as two way memory barrier.
+#define inlasm_zarch_sync() __asm__ __volatile__ ("bcr 15, 0" : : : "memory");
+
+// Release and acquire are empty on z/Architecture, but potential
+// optimizations of gcc must be forbidden by OrderAccess::release and
+// OrderAccess::acquire.
+#define inlasm_zarch_release() inlasm_compiler_barrier()
+#define inlasm_zarch_acquire() inlasm_compiler_barrier()
+#define inlasm_zarch_fence()   inlasm_zarch_sync()
+
+inline void OrderAccess::loadload()   { inlasm_compiler_barrier(); }
+inline void OrderAccess::storestore() { inlasm_compiler_barrier(); }
+inline void OrderAccess::loadstore()  { inlasm_compiler_barrier(); }
+inline void OrderAccess::storeload()  { inlasm_zarch_sync(); }
+
+inline void OrderAccess::acquire()    { inlasm_zarch_acquire(); }
+inline void OrderAccess::release()    { inlasm_zarch_release(); }
+inline void OrderAccess::fence()      { inlasm_zarch_sync(); }
+
+template<> inline jbyte  OrderAccess::specialized_load_acquire<jbyte> (volatile jbyte*  p) { register jbyte  t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jint   OrderAccess::specialized_load_acquire<jint>  (volatile jint*   p) { register jint   t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jlong  OrderAccess::specialized_load_acquire<jlong> (volatile jlong*  p) { register jlong  t = *p; inlasm_zarch_acquire(); return t; }
+
+#undef inlasm_compiler_barrier
+#undef inlasm_zarch_sync
+#undef inlasm_zarch_release
+#undef inlasm_zarch_acquire
+#undef inlasm_zarch_fence
+
+#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
+
+#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// This file is organized as os_linux_x86.cpp.
+
+// no precompiled headers
+#include "asm/assembler.inline.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
+#include "code/vtableStubs.hpp"
+#include "compiler/disassembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "nativeInst_s390.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+
+address os::current_stack_pointer() {
+  intptr_t* csp;
+
+  // Inline assembly for `z_lgr regno(csp), Z_SP' (Z_SP = Z_R15):
+  __asm__ __volatile__ ("lgr %0, 15":"=r"(csp):);
+
+  assert(((uint64_t)csp & (frame::alignment_in_bytes-1)) == 0, "SP must be aligned");
+  return (address) csp;
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  // even in its subfields (as defined by the CPU immediate fields,
+  // if the CPU splits constants across multiple instructions).
+  return (char*) -1;
+}
+
+// OS specific thread initialization.
+void os::initialize_thread(Thread* thread) { }
+
+// Frame information (pc, sp, fp) retrieved via ucontext
+// always looks like a C-frame according to the frame
+// conventions in frame_s390.hpp.
+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
+  return (address)uc->uc_mcontext.psw.addr;
+}
+
+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
+  uc->uc_mcontext.psw.addr = (unsigned long)pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.gregs[15/*REG_SP*/];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
+  return NULL;
+}
+
+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC  epc;
+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp) { *ret_sp = os::Linux::ucontext_get_sp(uc); }
+    if (ret_fp) { *ret_fp = os::Linux::ucontext_get_fp(uc); }
+  } else {
+    // Construct empty ExtendedPC for return value checking.
+    epc = ExtendedPC(NULL);
+    if (ret_sp) { *ret_sp = (intptr_t *)NULL; }
+    if (ret_fp) { *ret_fp = (intptr_t *)NULL; }
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(const void* ucVoid) {
+  intptr_t* sp;
+  intptr_t* fp;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+  return frame(sp, epc.pc());
+}
+
+frame os::get_sender_for_C_frame(frame* fr) {
+  if (*fr->sp() == 0) {
+    // fr is the last C frame.
+    return frame();
+  }
+
+  // If its not one of our frames, the return pc is saved at gpr14
+  // stack slot. The call_stub stores the return_pc to the stack slot
+  // of gpr10.
+  if ((Interpreter::code() != NULL && Interpreter::contains(fr->pc())) ||
+      (CodeCache::contains(fr->pc()) && !StubRoutines::contains(fr->pc()))) {
+    return frame(fr->sender_sp(), fr->sender_pc());
+  } else {
+    if (StubRoutines::contains(fr->pc())) {
+      StubCodeDesc* desc = StubCodeDesc::desc_for(fr->pc());
+      if (desc && !strcmp(desc->name(),"call_stub")) {
+        return frame(fr->sender_sp(), fr->callstub_sender_pc());
+      } else {
+        return frame(fr->sender_sp(), fr->sender_pc());
+      }
+    } else {
+      return frame(fr->sender_sp(), fr->native_sender_pc());
+    }
+  }
+}
+
+frame os::current_frame() {
+  intptr_t* csp = (intptr_t*) *((intptr_t*) os::current_stack_pointer());
+  assert (csp != NULL, "sp should not be NULL");
+  // Pass a dummy pc. This way we don't have to load it from the
+  // stack, since we don't know in which slot we can find it.
+  frame topframe(csp, (address)0x8);
+  if (os::is_first_C_frame(&topframe)) {
+    // Stack is not walkable.
+    return frame();
+  } else {
+    frame senderFrame = os::get_sender_for_C_frame(&topframe);
+    assert(senderFrame.pc() != NULL, "Sender pc should not be NULL");
+    // Return sender of sender of current topframe which hopefully
+    // both have pc != NULL.
+    frame tmp = os::get_sender_for_C_frame(&topframe);
+    return os::get_sender_for_C_frame(&tmp);
+  }
+}
+
+// Utility functions
+
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = Thread::current_or_null_safe();
+
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run).
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  if (sig == SIGPIPE) {
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      if (PrintMiscellaneous && (WizardMode || Verbose)) {
+        warning("Ignoring SIGPIPE - see bug 4229104");
+      }
+      return true;
+    }
+  }
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL) {
+      if(t->is_Java_thread()) {
+        thread = (JavaThread*)t;
+      } else if(t->is_VM_thread()) {
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+
+  // Moved SafeFetch32 handling outside thread!=NULL conditional block to make
+  // it work if no associated JavaThread object exists.
+  if (uc) {
+    address const pc = os::Linux::ucontext_get_pc(uc);
+    if (pc && StubRoutines::is_safefetch_fault(pc)) {
+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
+      return true;
+    }
+  }
+
+  // Decide if this trap can be handled by a stub.
+  address stub    = NULL;
+  address pc      = NULL;  // Pc as retrieved from PSW. Usually points past failing instruction.
+  address trap_pc = NULL;  // Pc of the instruction causing the trap.
+
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+    if (TraceTraps) {
+      tty->print_cr("     pc at " INTPTR_FORMAT, p2i(pc));
+    }
+    if ((unsigned long)(pc - (address)info->si_addr) <= (unsigned long)Assembler::instr_maxlen() ) {
+      trap_pc = (address)info->si_addr;
+      if (TraceTraps) {
+        tty->print_cr("trap_pc at " INTPTR_FORMAT, p2i(trap_pc));
+      }
+    }
+
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address)info->si_addr; // Address causing SIGSEGV, usually mem ref target.
+
+      // Check if fault address is within thread stack.
+      if (thread->on_local_stack(addr)) {
+        // stack overflow
+        if (thread->in_stack_yellow_reserved_zone(addr)) {
+          thread->disable_stack_yellow_reserved_zone();
+          if (thread->thread_state() == _thread_in_Java) {
+            // Throw a stack overflow exception.
+            // Guard pages will be reenabled while unwinding the stack.
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code. Return and try to finish.
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+          // This is a likely cause, but hard to verify. Let's just print
+          // it as a hint.
+          tty->print_raw_cr("Please check if any of your loaded .so files has "
+                            "enabled executable stack (see man page execstack(8))");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      }
+    }
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+
+      // Handle signal from NativeJump::patch_verified_entry().
+      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+        if (TraceTraps) {
+          tty->print_cr("trap: zombie_not_entrant (SIGILL)");
+        }
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      }
+
+      else if (sig == SIGSEGV &&
+               os::is_poll_address((address)info->si_addr)) {
+        if (TraceTraps) {
+          tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
+        }
+        stub = SharedRuntime::get_poll_stub(pc);
+
+        // Info->si_addr only points to the page base address, so we
+        // must extract the real si_addr from the instruction and the
+        // ucontext.
+        assert(((NativeInstruction*)pc)->is_safepoint_poll(), "must be safepoint poll");
+        const address real_si_addr = ((NativeInstruction*)pc)->get_poll_address(uc);
+      }
+
+      // SIGTRAP-based implicit null check in compiled code.
+      else if ((sig == SIGFPE) &&
+               TrapBasedNullChecks &&
+               (trap_pc != NULL) &&
+               Assembler::is_sigtrap_zero_check(trap_pc)) {
+        if (TraceTraps) {
+          tty->print_cr("trap: NULL_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc));
+        }
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL);
+      }
+
+      else if (sig == SIGSEGV && ImplicitNullChecks &&
+               CodeCache::contains((void*) pc) &&
+               !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
+        if (TraceTraps) {
+          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
+        }
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+      }
+
+      // SIGTRAP-based implicit range check in compiled code.
+      else if (sig == SIGFPE && TrapBasedRangeChecks &&
+               (trap_pc != NULL) &&
+               Assembler::is_sigtrap_range_check(trap_pc)) {
+        if (TraceTraps) {
+          tty->print_cr("trap: RANGE_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc));
+        }
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL);
+      }
+
+      else if (sig == SIGFPE && info->si_code == FPE_INTDIV) {
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+      }
+
+      else if (sig == SIGBUS) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault here if the
+        // underlying file has been truncated. Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL;
+        if (nm != NULL && nm->has_unsafe_access()) {
+          // We don't really need a stub here! Just set the pending exeption and
+          // continue at the next instruction after the faulting read. Returning
+          // garbage from this read is ok.
+          thread->set_pending_unsafe_access_error();
+          uc->uc_mcontext.psw.addr = ((unsigned long)pc) + Assembler::instr_len(pc);
+          return true;
+        }
+      }
+    }
+
+    else { // thread->thread_state() != _thread_in_Java
+      if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
+        // SIGILL must be caused by VM_Version::determine_features().
+        //*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
+                             // Flushing of icache is not necessary.
+        stub = pc; // Continue with next instruction.
+      } else if (thread->thread_state() == _thread_in_vm &&
+                 sig == SIGBUS && thread->doing_unsafe_access()) {
+        // We don't really need a stub here! Just set the pending exeption and
+        // continue at the next instruction after the faulting read. Returning
+        // garbage from this read is ok.
+        thread->set_pending_unsafe_access_error();
+        os::Linux::ucontext_set_pc(uc, pc + Assembler::instr_len(pc));
+        return true;
+      }
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    // Info->si_addr need not be the exact address, it is only
+    // guaranteed to be on the same page as the address that caused
+    // the SIGSEGV.
+    if ((sig == SIGSEGV) &&
+        (os::get_memory_serialize_page() ==
+         (address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
+      return true;
+    }
+  }
+
+  if (stub != NULL) {
+    // Save all thread context in case we need to restore it.
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+    os::Linux::ucontext_set_pc(uc, stub);
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+    return true;
+  }
+
+  if (!abort_if_unrecognized) {
+    // caller wants another chance, so give it to him
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+
+  VMError::report_and_die(t, sig, pc, info, ucVoid);
+
+  ShouldNotReachHere();
+  return false;
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+  // Nothing to do on z/Architecture.
+}
+
+int os::Linux::get_fpu_control_word(void) {
+  // Nothing to do on z/Architecture.
+  return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+  // Nothing to do on z/Architecture.
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Posix::_compiler_thread_min_stack_allowed = 128 * K;
+size_t os::Posix::_java_thread_min_stack_allowed = 128 * K;
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 128 * K;
+
+// return default stack size for thr_type
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1024 * K);
+  return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+  // z/Architecture: put 2 guard pages right in the middle of thread stack. This value
+  // should be consistent with the value used by register stack handling code.
+  return 2 * page_size();
+}
+
+// Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |    glibc guard page    | - Right in the middle of stack, 2 pages
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |  HotSpot Guard Pages   | - red and yellow pages
+//    |                        |/
+//    +------------------------+ JavaThread::stack_yellow_zone_base()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |    glibc guard page    | - Right in the middle of stack, 2 pages
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P2 is the address returned from pthread_attr_getstackaddr(), P2 - P1
+//    is the stack size returned by pthread_attr_getstacksize().
+
+
+static void current_stack_region(address * bottom, size_t * size) {
+  if (os::Linux::is_initial_thread()) {
+    // Initial thread needs special handling because pthread_getattr_np()
+    // may return bogus value.
+    *bottom = os::Linux::initial_thread_stack_bottom();
+    *size   = os::Linux::initial_thread_stack_size();
+  } else {
+    pthread_attr_t attr;
+
+    int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+    // JVM needs to know exact stack location, abort if it fails
+    if (rslt != 0) {
+      if (rslt == ENOMEM) {
+        vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+      } else {
+        fatal("pthread_getattr_np failed with errno = %d", rslt);
+      }
+    }
+
+    if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+      fatal("Can not locate current stack attributes!");
+    }
+
+    pthread_attr_destroy(&attr);
+
+  }
+  assert(os::current_stack_pointer() >= *bottom &&
+         os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+  // stack size includes normal stack and HotSpot guard pages
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+void os::print_context(outputStream *st, const void *context) {
+  if (context == NULL) return;
+
+  const ucontext_t* uc = (const ucontext_t*)context;
+
+  st->print_cr("Processor state:");
+  st->print_cr("----------------");
+  st->print_cr("        ip = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.addr);
+  st->print_cr(" proc mask = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.mask);
+  st->print_cr("   fpc reg = 0x%8.8x "          , uc->uc_mcontext.fpregs.fpc);
+  st->cr();
+
+  st->print_cr("General Purpose Registers:");
+  st->print_cr("--------------------------");
+  for( int i = 0; i < 16; i+=2 ) {
+    st->print("  r%-2d = " INTPTR_FORMAT "  " ,  i,   uc->uc_mcontext.gregs[i]);
+    st->print("  r%-2d = " INTPTR_FORMAT "  |",  i+1, uc->uc_mcontext.gregs[i+1]);
+    st->print("  r%-2d = %23.1ld  "           ,  i,   uc->uc_mcontext.gregs[i]);
+    st->print("  r%-2d = %23.1ld  "           ,  i+1, uc->uc_mcontext.gregs[i+1]);
+    st->cr();
+  }
+  st->cr();
+
+  st->print_cr("Access Registers:");
+  st->print_cr("-----------------");
+  for( int i = 0; i < 16; i+=2 ) {
+    st->print("  ar%-2d = 0x%8.8x  ", i,   uc->uc_mcontext.aregs[i]);
+    st->print("  ar%-2d = 0x%8.8x  ", i+1, uc->uc_mcontext.aregs[i+1]);
+    st->cr();
+  }
+  st->cr();
+
+  st->print_cr("Float Registers:");
+  st->print_cr("----------------");
+  for (int i = 0; i < 16; i += 2) {
+    st->print("  fr%-2d = " INTPTR_FORMAT "  " , i,   (int64_t)(uc->uc_mcontext.fpregs.fprs[i].d));
+    st->print("  fr%-2d = " INTPTR_FORMAT "  |", i+1, (int64_t)(uc->uc_mcontext.fpregs.fprs[i+1].d));
+    st->print("  fr%-2d = %23.15e  "           , i,   (uc->uc_mcontext.fpregs.fprs[i].d));
+    st->print("  fr%-2d = %23.15e  "           , i+1, (uc->uc_mcontext.fpregs.fprs[i+1].d));
+    st->cr();
+  }
+  st->cr();
+  st->cr();
+
+  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
+  print_hex_dump(st, (address)sp, (address)(sp + 128), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  if (Verbose) { st->print_cr("pc at " PTR_FORMAT, p2i(pc)); }
+  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
+  print_hex_dump(st, pc-64, pc+64, /*intrsize=*/4);
+  st->cr();
+}
+
+void os::print_register_info(outputStream *st, const void *context) {
+  st->print("Not ported\n");
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
+
+int os::extra_bang_size_in_bytes() {
+  // z/Architecture does not require the additional stack bang.
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+
+  static void setup_fpu() {}
+
+  // Used to register dynamic code cache area with the OS.
+  static bool register_code_area(char *low, char *high) { return true; }
+
+#endif // OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+inline void Prefetch::read(void* loc, intx interval) {
+  // No prefetch instructions on z/Architecture -> implement trivially.
+}
+
+inline void Prefetch::write(void* loc, intx interval) {
+  // No prefetch instructions on z/Architecture -> implement trivially.
+}
+
+#endif // OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/thread.hpp"
+
+// Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Linux/S390x.
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) {
+  Unimplemented();
+  return false;
+}
+
+void JavaThread::cache_global_variables() { }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
+
+ private:
+
+  void pd_initialize() {
+    _anchor.clear();
+    _last_interpreter_fp = NULL;
+  }
+
+  // The `last' frame is the youngest Java frame on the thread's stack.
+  frame pd_last_frame() {
+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+
+    intptr_t* sp = last_Java_sp();
+    address pc = _anchor.last_Java_pc();
+
+    // Last_Java_pc ist not set if we come here from compiled code.
+    if (pc == NULL) {
+      pc = (address) *(sp + 14);
+    }
+
+    return frame(sp, pc);
+  }
+
+ public:
+  void set_base_of_stack_pointer(intptr_t* base_sp) {}
+  intptr_t* base_of_stack_pointer() { return NULL; }
+  void record_base_of_stack_pointer() {}
+
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava);
+
+ protected:
+
+  // -Xprof support
+  //
+  // In order to find the last Java fp from an async profile
+  // tick, we store the current interpreter fp in the thread.
+  // This value is only valid while we are in the C++ interpreter
+  // and profiling.
+  intptr_t *_last_interpreter_fp;
+
+ public:
+
+  static ByteSize last_interpreter_fp_offset() {
+    return byte_offset_of(JavaThread, _last_interpreter_fp);
+  }
+
+  intptr_t* last_interpreter_fp() { return _last_interpreter_fp; }
+
+#endif // OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp	Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(pid_t)                                             \
+  declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+