8167673: [s390] The s390 port.
Summary: template interpreter, C1, C2
Reviewed-by: kvn, simonis
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/constMethod.hpp"
+#include "oops/method.hpp"
+#include "runtime/frame.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+ int i = 0;
+ switch (type) {
+ case T_BOOLEAN: i = 0; break;
+ case T_CHAR : i = 1; break;
+ case T_BYTE : i = 2; break;
+ case T_SHORT : i = 3; break;
+ case T_INT : i = 4; break;
+ case T_LONG : i = 5; break;
+ case T_VOID : i = 6; break;
+ case T_FLOAT : i = 7; break;
+ case T_DOUBLE : i = 8; break;
+ case T_OBJECT : i = 9; break;
+ case T_ARRAY : i = 9; break;
+ default : ShouldNotReachHere();
+ }
+ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
+ return i;
+}
+
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+ // No special entry points that preclude compilation.
+ return true;
+}
+
+// How much stack a method top interpreter activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+
+ // We have to size the following 2 frames:
+ //
+ // [TOP_IJAVA_FRAME_ABI]
+ // [ENTRY_FRAME]
+ //
+ // This expands to (see frame_s390.hpp):
+ //
+ // [TOP_IJAVA_FRAME_ABI]
+ // [operand stack] > stack
+ // [monitors] (optional) > monitors
+ // [IJAVA_STATE] > interpreter_state
+ // [PARENT_IJAVA_FRAME_ABI]
+ // [callee's locals w/o arguments] \ locals
+ // [outgoing arguments] /
+ // [ENTRY_FRAME_LOCALS]
+
+ int locals = method->max_locals() * BytesPerWord;
+ int interpreter_state = frame::z_ijava_state_size;
+
+ int stack = method->max_stack() * BytesPerWord;
+ int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0;
+
+ int total_bytes =
+ frame::z_top_ijava_frame_abi_size +
+ stack +
+ monitors +
+ interpreter_state +
+ frame::z_parent_ijava_frame_abi_size +
+ locals +
+ frame::z_entry_frame_locals_size;
+
+ return (total_bytes/BytesPerWord);
+}
+
+// Returns number of stackElementWords needed for the interpreter frame with the
+// given sections.
+// This overestimates the stack by one slot in case of alignments.
+int AbstractInterpreter::size_activation(int max_stack,
+ int temps,
+ int extra_args,
+ int monitors,
+ int callee_params,
+ int callee_locals,
+ bool is_top_frame) {
+ // Note: This calculation must exactly parallel the frame setup
+ // in AbstractInterpreterGenerator::generate_method_entry.
+
+ assert((Interpreter::stackElementSize == frame::alignment_in_bytes), "must align frame size");
+ const int abi_scratch = is_top_frame ? (frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize) :
+ (frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize);
+
+ const int size =
+ max_stack +
+ (callee_locals - callee_params) + // Already counted in max_stack().
+ monitors * frame::interpreter_frame_monitor_size() +
+ abi_scratch +
+ frame::z_ijava_state_size / Interpreter::stackElementSize;
+
+ // Fixed size of an interpreter frame.
+ return size;
+}
+
+// Fills a sceletal interpreter frame generated during deoptimizations.
+//
+// Parameters:
+//
+// interpreter_frame != NULL:
+// set up the method, locals, and monitors.
+// The frame interpreter_frame, if not NULL, is guaranteed to be the
+// right size, as determined by a previous call to this method.
+// It is also guaranteed to be walkable even though it is in a skeletal state
+//
+// is_top_frame == true:
+// We're processing the *oldest* interpreter frame!
+//
+// pop_frame_extra_args:
+// If this is != 0 we are returning to a deoptimized frame by popping
+// off the callee frame. We want to re-execute the call that called the
+// callee interpreted, but since the return to the interpreter would pop
+// the arguments off advance the esp by dummy popframe_extra_args slots.
+// Popping off those will establish the stack layout as it was before the call.
+//
+
+void AbstractInterpreter::layout_activation(Method* method,
+ int tempcount,
+ int popframe_extra_args,
+ int moncount,
+ int caller_actual_parameters,
+ int callee_param_count,
+ int callee_locals_count,
+ frame* caller,
+ frame* interpreter_frame,
+ bool is_top_frame,
+ bool is_bottom_frame) {
+ // TOP_IJAVA_FRAME:
+ //
+ // 0 [TOP_IJAVA_FRAME_ABI] -+
+ // 16 [operand stack] | size
+ // [monitors] (optional) |
+ // [IJAVA_STATE] -+
+ // Note: own locals are located in the caller frame.
+ //
+ // PARENT_IJAVA_FRAME:
+ //
+ // 0 [PARENT_IJAVA_FRAME_ABI] -+
+ // [callee's locals w/o arguments] |
+ // [outgoing arguments] | size
+ // [used part of operand stack w/o arguments] |
+ // [monitors] (optional) |
+ // [IJAVA_STATE] -+
+ //
+
+ // Now we know our caller, calc the exact frame layout and size
+ // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0).
+ intptr_t* locals_base = (caller->is_interpreted_frame())
+ ? (caller->interpreter_frame_tos_address() + caller_actual_parameters - 1)
+ : (caller->sp() + method->max_locals() - 1 +
+ frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize);
+
+ intptr_t* monitor_base = (intptr_t*)((address)interpreter_frame->fp() - frame::z_ijava_state_size);
+ intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
+ intptr_t* operand_stack_base = monitor;
+ intptr_t* tos = operand_stack_base - tempcount - popframe_extra_args;
+ intptr_t* top_frame_sp =
+ operand_stack_base - method->max_stack() - frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize;
+ intptr_t* sender_sp;
+ if (caller->is_interpreted_frame()) {
+ sender_sp = caller->interpreter_frame_top_frame_sp();
+ } else if (caller->is_compiled_frame()) {
+ sender_sp = caller->fp() - caller->cb()->frame_size();
+ // The bottom frame's sender_sp is its caller's unextended_sp.
+ // It was already set when its skeleton was pushed (see push_skeleton_frames()).
+ // Note: the unextended_sp is required by nmethod::orig_pc_addr().
+ assert(is_bottom_frame && (sender_sp == caller->unextended_sp()),
+ "must initialize sender_sp of bottom skeleton frame when pushing it");
+ } else {
+ assert(caller->is_entry_frame(), "is there a new frame type??");
+ sender_sp = caller->sp(); // Call_stub only uses it's fp.
+ }
+
+ interpreter_frame->interpreter_frame_set_method(method);
+ interpreter_frame->interpreter_frame_set_mirror(method->method_holder()->java_mirror());
+ interpreter_frame->interpreter_frame_set_locals(locals_base);
+ interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor);
+ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
+ interpreter_frame->interpreter_frame_set_tos_address(tos);
+ interpreter_frame->interpreter_frame_set_sender_sp(sender_sp);
+ interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all
+// assembler_s390.* files.
+
+// Convert the raw encoding form into the form expected by the
+// constructor for Address. This is called by adlc generated code.
+Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
+ assert(scale == 0, "Scale should not be used on z/Architecture. The call to make_raw is "
+ "generated by adlc and this must mirror all features of Operands from machnode.hpp.");
+ assert(disp_reloc == relocInfo::none, "not implemented on z/Architecture.");
+
+ Address madr(as_Register(base), as_Register(index), in_ByteSize(disp));
+ return madr;
+}
+
+int AbstractAssembler::code_fill_byte() {
+ return 0x00; // Illegal instruction 0x00000000.
+}
+
+// Condition code masks. Details see enum branch_condition.
+// Although this method is meant for INT CCs, the Overflow/Ordered
+// bit in the masks has to be considered. The CC might have been set
+// by a float operation, but is evaluated while calculating an integer
+// result. See elementary test TestFloat.isNotEqual(FF)Z for example.
+Assembler::branch_condition Assembler::inverse_condition(Assembler::branch_condition cc) {
+ Assembler::branch_condition unordered_bit = (Assembler::branch_condition)(cc & bcondNotOrdered);
+ Assembler::branch_condition inverse_cc;
+
+ // Some are commented out to avoid duplicate labels.
+ switch (cc) {
+ case bcondNever : inverse_cc = bcondAlways; break; // 0 -> 15
+ case bcondAlways : inverse_cc = bcondNever; break; // 15 -> 0
+
+ case bcondOverflow : inverse_cc = bcondNotOverflow; break; // 1 -> 14
+ case bcondNotOverflow : inverse_cc = bcondOverflow; break; // 14 -> 1
+
+ default :
+ switch ((Assembler::branch_condition)(cc & bcondOrdered)) {
+ case bcondEqual : inverse_cc = bcondNotEqual; break; // 8 -> 6
+ // case bcondZero :
+ // case bcondAllZero :
+
+ case bcondNotEqual : inverse_cc = bcondEqual; break; // 6 -> 8
+ // case bcondNotZero :
+ // case bcondMixed :
+
+ case bcondLow : inverse_cc = bcondNotLow; break; // 4 -> 10
+ // case bcondNegative :
+
+ case bcondNotLow : inverse_cc = bcondLow; break; // 10 -> 4
+ // case bcondNotNegative :
+
+ case bcondHigh : inverse_cc = bcondNotHigh; break; // 2 -> 12
+ // case bcondPositive :
+
+ case bcondNotHigh : inverse_cc = bcondHigh; break; // 12 -> 2
+ // case bcondNotPositive :
+
+ default :
+ fprintf(stderr, "inverse_condition(%d)\n", (int)cc);
+ fflush(stderr);
+ ShouldNotReachHere();
+ return bcondNever;
+ }
+ // If cc is even, inverse_cc must be odd.
+ if (!unordered_bit) {
+ inverse_cc = (Assembler::branch_condition)(inverse_cc | bcondNotOrdered);
+ }
+ break;
+ }
+ return inverse_cc;
+}
+
+Assembler::branch_condition Assembler::inverse_float_condition(Assembler::branch_condition cc) {
+ Assembler::branch_condition inverse_cc;
+
+ switch (cc) {
+ case bcondNever : inverse_cc = bcondAlways; break; // 0
+ case bcondAlways : inverse_cc = bcondNever; break; // 15
+
+ case bcondNotOrdered : inverse_cc = bcondOrdered; break; // 14
+ case bcondOrdered : inverse_cc = bcondNotOrdered; break; // 1
+
+ case bcondEqual : inverse_cc = (branch_condition)(bcondNotEqual + bcondNotOrdered); break; // 8
+ case bcondNotEqual + bcondNotOrdered : inverse_cc = bcondEqual; break; // 7
+
+ case bcondLow + bcondNotOrdered : inverse_cc = (branch_condition)(bcondHigh + bcondEqual); break; // 5
+ case bcondNotLow : inverse_cc = (branch_condition)(bcondLow + bcondNotOrdered); break; // 10
+
+ case bcondHigh : inverse_cc = (branch_condition)(bcondLow + bcondNotOrdered + bcondEqual); break; // 2
+ case bcondNotHigh + bcondNotOrdered : inverse_cc = bcondHigh; break; // 13
+
+ default :
+ fprintf(stderr, "inverse_float_condition(%d)\n", (int)cc);
+ fflush(stderr);
+ ShouldNotReachHere();
+ return bcondNever;
+ }
+ return inverse_cc;
+}
+
+#ifdef ASSERT
+void Assembler::print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) {
+ out->flush();
+ switch (ilen) {
+ case 2: out->print_cr("inst = %4.4x, %s", (unsigned short)inst, msg); break;
+ case 4: out->print_cr("inst = %8.8x, %s\n", (unsigned int)inst, msg); break;
+ case 6: out->print_cr("inst = %12.12lx, %s\n", inst, msg); break;
+ default: out->print_cr("inst = %16.16lx, %s\n", inst, msg); break;
+ }
+ out->flush();
+}
+
+void Assembler::dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg) {
+ out->cr();
+ out->print_cr("-------------------------------");
+ out->print_cr("-- %s", msg);
+ out->print_cr("-------------------------------");
+ out->print_cr("Hex dump of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range);
+ os::print_hex_dump(out, pc-range, pc+range, 2);
+
+ out->cr();
+ out->print_cr("Disassembly of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range);
+ Disassembler::decode(pc, pc + range, out);
+}
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2530 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ASSEMBLER_S390_HPP
+#define CPU_S390_VM_ASSEMBLER_S390_HPP
+
+#undef LUCY_DBG
+
+#define NearLabel Label
+
+// Immediate is an abstraction to represent the various immediate
+// operands which exist on z/Architecture. Neither this class nor
+// instances hereof have an own state. It consists of methods only.
+class Immediate VALUE_OBJ_CLASS_SPEC {
+
+ public:
+ static bool is_simm(int64_t x, unsigned int nbits) {
+ // nbits < 2 --> false
+ // nbits >= 64 --> true
+ assert(2 <= nbits && nbits < 64, "Don't call, use statically known result.");
+ const int64_t min = -(1L << (nbits-1));
+ const int64_t maxplus1 = (1L << (nbits-1));
+ return min <= x && x < maxplus1;
+ }
+ static bool is_simm32(int64_t x) {
+ return is_simm(x, 32);
+ }
+ static bool is_simm20(int64_t x) {
+ return is_simm(x, 20);
+ }
+ static bool is_simm16(int64_t x) {
+ return is_simm(x, 16);
+ }
+ static bool is_simm8(int64_t x) {
+ return is_simm(x, 8);
+ }
+
+ // Test if x is within signed immediate range for nbits.
+ static bool is_uimm(int64_t x, unsigned int nbits) {
+ // nbits == 0 --> false
+ // nbits >= 64 --> true
+ assert(1 <= nbits && nbits < 64, "don't call, use statically known result");
+ const uint64_t xu = (unsigned long)x;
+ const uint64_t maxplus1 = 1UL << nbits;
+ return xu < maxplus1; // Unsigned comparison. Negative inputs appear to be very large.
+ }
+ static bool is_uimm32(int64_t x) {
+ return is_uimm(x, 32);
+ }
+ static bool is_uimm16(int64_t x) {
+ return is_uimm(x, 16);
+ }
+ static bool is_uimm12(int64_t x) {
+ return is_uimm(x, 12);
+ }
+ static bool is_uimm8(int64_t x) {
+ return is_uimm(x, 8);
+ }
+};
+
+// Displacement is an abstraction to represent the various
+// displacements which exist with addresses on z/ArchiTecture.
+// Neither this class nor instances hereof have an own state. It
+// consists of methods only.
+class Displacement VALUE_OBJ_CLASS_SPEC {
+
+ public: // These tests are used outside the (Macro)Assembler world, e.g. in ad-file.
+
+ static bool is_longDisp(int64_t x) { // Fits in a 20-bit displacement field.
+ return Immediate::is_simm20(x);
+ }
+ static bool is_shortDisp(int64_t x) { // Fits in a 12-bit displacement field.
+ return Immediate::is_uimm12(x);
+ }
+ static bool is_validDisp(int64_t x) { // Is a valid displacement, regardless of length constraints.
+ return is_longDisp(x);
+ }
+};
+
+// RelAddr is an abstraction to represent relative addresses in the
+// form they are used on z/Architecture for instructions which access
+// their operand with pc-relative addresses. Neither this class nor
+// instances hereof have an own state. It consists of methods only.
+class RelAddr VALUE_OBJ_CLASS_SPEC {
+
+ private: // No public use at all. Solely for (Macro)Assembler.
+
+ static bool is_in_range_of_RelAddr(address target, address pc, bool shortForm) {
+ // Guard against illegal branch targets, e.g. -1. Occurrences in
+ // CompiledStaticCall and ad-file. Do not assert (it's a test
+ // function!). Just return false in case of illegal operands.
+ if ((((uint64_t)target) & 0x0001L) != 0) return false;
+ if ((((uint64_t)pc) & 0x0001L) != 0) return false;
+
+ if (shortForm) {
+ return Immediate::is_simm((int64_t)(target-pc), 17); // Relative short addresses can reach +/- 2**16 bytes.
+ } else {
+ return Immediate::is_simm((int64_t)(target-pc), 33); // Relative long addresses can reach +/- 2**32 bytes.
+ }
+ }
+
+ static bool is_in_range_of_RelAddr16(address target, address pc) {
+ return is_in_range_of_RelAddr(target, pc, true);
+ }
+ static bool is_in_range_of_RelAddr16(ptrdiff_t distance) {
+ return is_in_range_of_RelAddr((address)distance, 0, true);
+ }
+
+ static bool is_in_range_of_RelAddr32(address target, address pc) {
+ return is_in_range_of_RelAddr(target, pc, false);
+ }
+ static bool is_in_range_of_RelAddr32(ptrdiff_t distance) {
+ return is_in_range_of_RelAddr((address)distance, 0, false);
+ }
+
+ static int pcrel_off(address target, address pc, bool shortForm) {
+ assert(((uint64_t)target & 0x0001L) == 0, "target of a relative address must be aligned");
+ assert(((uint64_t)pc & 0x0001L) == 0, "origin of a relative address must be aligned");
+
+ if ((target == NULL) || (target == pc)) {
+ return 0; // Yet unknown branch destination.
+ } else {
+ guarantee(is_in_range_of_RelAddr(target, pc, shortForm), "target not within reach");
+ return (int)((target - pc)>>1);
+ }
+ }
+
+ static int pcrel_off16(address target, address pc) {
+ return pcrel_off(target, pc, true);
+ }
+ static int pcrel_off16(ptrdiff_t distance) {
+ return pcrel_off((address)distance, 0, true);
+ }
+
+ static int pcrel_off32(address target, address pc) {
+ return pcrel_off(target, pc, false);
+ }
+ static int pcrel_off32(ptrdiff_t distance) {
+ return pcrel_off((address)distance, 0, false);
+ }
+
+ static ptrdiff_t inv_pcrel_off16(int offset) {
+ return ((ptrdiff_t)offset)<<1;
+ }
+
+ static ptrdiff_t inv_pcrel_off32(int offset) {
+ return ((ptrdiff_t)offset)<<1;
+ }
+
+ friend class Assembler;
+ friend class MacroAssembler;
+ friend class NativeGeneralJump;
+};
+
+// Address is an abstraction used to represent a memory location
+// as passed to Z assembler instructions.
+//
+// Note: A register location is represented via a Register, not
+// via an address for efficiency & simplicity reasons.
+class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+ Register _base; // Base register.
+ Register _index; // Index register
+ intptr_t _disp; // Constant displacement.
+
+ public:
+ Address() :
+ _base(noreg),
+ _index(noreg),
+ _disp(0) {}
+
+ Address(Register base, Register index, intptr_t disp = 0) :
+ _base(base),
+ _index(index),
+ _disp(disp) {}
+
+ Address(Register base, intptr_t disp = 0) :
+ _base(base),
+ _index(noreg),
+ _disp(disp) {}
+
+ Address(Register base, RegisterOrConstant roc, intptr_t disp = 0) :
+ _base(base),
+ _index(noreg),
+ _disp(disp) {
+ if (roc.is_constant()) _disp += roc.as_constant(); else _index = roc.as_register();
+ }
+
+#ifdef ASSERT
+ // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+ Address(Register base, ByteSize disp) :
+ _base(base),
+ _index(noreg),
+ _disp(in_bytes(disp)) {}
+
+ Address(Register base, Register index, ByteSize disp) :
+ _base(base),
+ _index(index),
+ _disp(in_bytes(disp)) {}
+#endif
+
+ // Aborts if disp is a register and base and index are set already.
+ Address plus_disp(RegisterOrConstant disp) const {
+ Address a = (*this);
+ a._disp += disp.constant_or_zero();
+ if (disp.is_register()) {
+ if (a._index == noreg) {
+ a._index = disp.as_register();
+ } else {
+ guarantee(_base == noreg, "can not encode"); a._base = disp.as_register();
+ }
+ }
+ return a;
+ }
+
+ // A call to this is generated by adlc for replacement variable $xxx$$Address.
+ static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
+
+ bool is_same_address(Address a) const {
+ return _base == a._base && _index == a._index && _disp == a._disp;
+ }
+
+ // testers
+ bool has_base() const { return _base != noreg; }
+ bool has_index() const { return _index != noreg; }
+ bool has_disp() const { return true; } // There is no "invalid" value.
+
+ bool is_disp12() const { return Immediate::is_uimm12(disp()); }
+ bool is_disp20() const { return Immediate::is_simm20(disp()); }
+ bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
+ bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
+ bool is_RXform() { return has_base() && has_index() && is_disp12(); }
+ bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
+ bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
+
+ bool uses(Register r) { return _base == r || _index == r; };
+
+ // accessors
+ Register base() const { return _base; }
+ Register baseOrR0() const { assert(_base != Z_R0, ""); return _base == noreg ? Z_R0 : _base; }
+ Register index() const { return _index; }
+ Register indexOrR0() const { assert(_index != Z_R0, ""); return _index == noreg ? Z_R0 : _index; }
+ intptr_t disp() const { return _disp; }
+ // Specific version for short displacement instructions.
+ int disp12() const {
+ assert(is_disp12(), "displacement out of range for uimm12");
+ return _disp;
+ }
+ // Specific version for long displacement instructions.
+ int disp20() const {
+ assert(is_disp20(), "displacement out of range for simm20");
+ return _disp;
+ }
+ intptr_t value() const { return _disp; }
+
+ friend class Assembler;
+};
+
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+ private:
+ address _address;
+ RelocationHolder _rspec;
+
+ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+ switch (rtype) {
+ case relocInfo::external_word_type:
+ return external_word_Relocation::spec(addr);
+ case relocInfo::internal_word_type:
+ return internal_word_Relocation::spec(addr);
+ case relocInfo::opt_virtual_call_type:
+ return opt_virtual_call_Relocation::spec();
+ case relocInfo::static_call_type:
+ return static_call_Relocation::spec();
+ case relocInfo::runtime_call_w_cp_type:
+ return runtime_call_w_cp_Relocation::spec();
+ case relocInfo::none:
+ return RelocationHolder();
+ default:
+ ShouldNotReachHere();
+ return RelocationHolder();
+ }
+ }
+
+ protected:
+ // creation
+ AddressLiteral() : _address(NULL), _rspec(NULL) {}
+
+ public:
+ AddressLiteral(address addr, RelocationHolder const& rspec)
+ : _address(addr),
+ _rspec(rspec) {}
+
+ // Some constructors to avoid casting at the call site.
+ AddressLiteral(jobject obj, RelocationHolder const& rspec)
+ : _address((address) obj),
+ _rspec(rspec) {}
+
+ AddressLiteral(intptr_t value, RelocationHolder const& rspec)
+ : _address((address) value),
+ _rspec(rspec) {}
+
+ AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ // Some constructors to avoid casting at the call site.
+ AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(oop addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none)
+ : _address((address) addr),
+ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+ intptr_t value() const { return (intptr_t) _address; }
+
+ const relocInfo::relocType rtype() const { return _rspec.type(); }
+ const RelocationHolder& rspec() const { return _rspec; }
+
+ RelocationHolder rspec(int offset) const {
+ return offset == 0 ? _rspec : _rspec.plus(offset);
+ }
+};
+
+// Convenience classes
+class ExternalAddress: public AddressLiteral {
+ private:
+ static relocInfo::relocType reloc_for_target(address target) {
+ // Sometimes ExternalAddress is used for values which aren't
+ // exactly addresses, like the card table base.
+ // External_word_type can't be used for values in the first page
+ // so just skip the reloc in that case.
+ return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+ }
+
+ public:
+ ExternalAddress(address target) : AddressLiteral(target, reloc_for_target( target)) {}
+ ExternalAddress(oop* target) : AddressLiteral(target, reloc_for_target((address) target)) {}
+};
+
+// Argument is an abstraction used to represent an outgoing actual
+// argument or an incoming formal parameter, whether it resides in
+// memory or in a register, in a manner consistent with the
+// z/Architecture Application Binary Interface, or ABI. This is often
+// referred to as the native or C calling convention.
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+ int _number;
+ bool _is_in;
+
+ public:
+ enum {
+ // Only 5 registers may contain integer parameters.
+ n_register_parameters = 5,
+ // Can have up to 4 floating registers.
+ n_float_register_parameters = 4
+ };
+
+ // creation
+ Argument(int number, bool is_in) : _number(number), _is_in(is_in) {}
+ Argument(int number) : _number(number) {}
+
+ int number() const { return _number; }
+
+ Argument successor() const { return Argument(number() + 1); }
+
+ // Locating register-based arguments:
+ bool is_register() const { return _number < n_register_parameters; }
+
+ // Locating Floating Point register-based arguments:
+ bool is_float_register() const { return _number < n_float_register_parameters; }
+
+ FloatRegister as_float_register() const {
+ assert(is_float_register(), "must be a register argument");
+ return as_FloatRegister((number() *2) + 1);
+ }
+
+ FloatRegister as_double_register() const {
+ assert(is_float_register(), "must be a register argument");
+ return as_FloatRegister((number() *2));
+ }
+
+ Register as_register() const {
+ assert(is_register(), "must be a register argument");
+ return as_Register(number() + Z_ARG1->encoding());
+ }
+
+ // debugging
+ const char* name() const;
+
+ friend class Assembler;
+};
+
+
+// The z/Architecture Assembler: Pure assembler doing NO optimizations
+// on the instruction level; i.e., what you write is what you get. The
+// Assembler is generating code into a CodeBuffer.
+class Assembler : public AbstractAssembler {
+ protected:
+
+ friend class AbstractAssembler;
+ friend class AddressLiteral;
+
+ // Code patchers need various routines like inv_wdisp().
+ friend class NativeInstruction;
+#ifndef COMPILER2
+ friend class NativeGeneralJump;
+#endif
+ friend class Relocation;
+
+ public:
+
+// Addressing
+
+// address calculation
+#define LA_ZOPC (unsigned int)(0x41 << 24)
+#define LAY_ZOPC (unsigned long)(0xe3L << 40 | 0x71L)
+#define LARL_ZOPC (unsigned long)(0xc0L << 40 | 0x00L << 32)
+
+
+// Data Transfer
+
+// register to register transfer
+#define LR_ZOPC (unsigned int)(24 << 8)
+#define LBR_ZOPC (unsigned int)(0xb926 << 16)
+#define LHR_ZOPC (unsigned int)(0xb927 << 16)
+#define LGBR_ZOPC (unsigned int)(0xb906 << 16)
+#define LGHR_ZOPC (unsigned int)(0xb907 << 16)
+#define LGFR_ZOPC (unsigned int)(0xb914 << 16)
+#define LGR_ZOPC (unsigned int)(0xb904 << 16)
+
+#define LLHR_ZOPC (unsigned int)(0xb995 << 16)
+#define LLGCR_ZOPC (unsigned int)(0xb984 << 16)
+#define LLGHR_ZOPC (unsigned int)(0xb985 << 16)
+#define LLGTR_ZOPC (unsigned int)(185 << 24 | 23 << 16)
+#define LLGFR_ZOPC (unsigned int)(185 << 24 | 22 << 16)
+
+#define LTR_ZOPC (unsigned int)(18 << 8)
+#define LTGFR_ZOPC (unsigned int)(185 << 24 | 18 << 16)
+#define LTGR_ZOPC (unsigned int)(185 << 24 | 2 << 16)
+
+#define LER_ZOPC (unsigned int)(56 << 8)
+#define LEDBR_ZOPC (unsigned int)(179 << 24 | 68 << 16)
+#define LEXBR_ZOPC (unsigned int)(179 << 24 | 70 << 16)
+#define LDEBR_ZOPC (unsigned int)(179 << 24 | 4 << 16)
+#define LDR_ZOPC (unsigned int)(40 << 8)
+#define LDXBR_ZOPC (unsigned int)(179 << 24 | 69 << 16)
+#define LXEBR_ZOPC (unsigned int)(179 << 24 | 6 << 16)
+#define LXDBR_ZOPC (unsigned int)(179 << 24 | 5 << 16)
+#define LXR_ZOPC (unsigned int)(179 << 24 | 101 << 16)
+#define LTEBR_ZOPC (unsigned int)(179 << 24 | 2 << 16)
+#define LTDBR_ZOPC (unsigned int)(179 << 24 | 18 << 16)
+#define LTXBR_ZOPC (unsigned int)(179 << 24 | 66 << 16)
+
+#define LRVR_ZOPC (unsigned int)(0xb91f << 16)
+#define LRVGR_ZOPC (unsigned int)(0xb90f << 16)
+
+#define LDGR_ZOPC (unsigned int)(0xb3c1 << 16) // z10
+#define LGDR_ZOPC (unsigned int)(0xb3cd << 16) // z10
+
+#define LOCR_ZOPC (unsigned int)(0xb9f2 << 16) // z196
+#define LOCGR_ZOPC (unsigned int)(0xb9e2 << 16) // z196
+
+// immediate to register transfer
+#define IIHH_ZOPC (unsigned int)(165 << 24)
+#define IIHL_ZOPC (unsigned int)(165 << 24 | 1 << 16)
+#define IILH_ZOPC (unsigned int)(165 << 24 | 2 << 16)
+#define IILL_ZOPC (unsigned int)(165 << 24 | 3 << 16)
+#define IIHF_ZOPC (unsigned long)(0xc0L << 40 | 8L << 32)
+#define IILF_ZOPC (unsigned long)(0xc0L << 40 | 9L << 32)
+#define LLIHH_ZOPC (unsigned int)(165 << 24 | 12 << 16)
+#define LLIHL_ZOPC (unsigned int)(165 << 24 | 13 << 16)
+#define LLILH_ZOPC (unsigned int)(165 << 24 | 14 << 16)
+#define LLILL_ZOPC (unsigned int)(165 << 24 | 15 << 16)
+#define LLIHF_ZOPC (unsigned long)(0xc0L << 40 | 14L << 32)
+#define LLILF_ZOPC (unsigned long)(0xc0L << 40 | 15L << 32)
+#define LHI_ZOPC (unsigned int)(167 << 24 | 8 << 16)
+#define LGHI_ZOPC (unsigned int)(167 << 24 | 9 << 16)
+#define LGFI_ZOPC (unsigned long)(0xc0L << 40 | 1L << 32)
+
+#define LZER_ZOPC (unsigned int)(0xb374 << 16)
+#define LZDR_ZOPC (unsigned int)(0xb375 << 16)
+
+// LOAD: memory to register transfer
+#define LB_ZOPC (unsigned long)(227L << 40 | 118L)
+#define LH_ZOPC (unsigned int)(72 << 24)
+#define LHY_ZOPC (unsigned long)(227L << 40 | 120L)
+#define L_ZOPC (unsigned int)(88 << 24)
+#define LY_ZOPC (unsigned long)(227L << 40 | 88L)
+#define LT_ZOPC (unsigned long)(0xe3L << 40 | 0x12L)
+#define LGB_ZOPC (unsigned long)(227L << 40 | 119L)
+#define LGH_ZOPC (unsigned long)(227L << 40 | 21L)
+#define LGF_ZOPC (unsigned long)(227L << 40 | 20L)
+#define LG_ZOPC (unsigned long)(227L << 40 | 4L)
+#define LTG_ZOPC (unsigned long)(0xe3L << 40 | 0x02L)
+#define LTGF_ZOPC (unsigned long)(0xe3L << 40 | 0x32L)
+
+#define LLC_ZOPC (unsigned long)(0xe3L << 40 | 0x94L)
+#define LLH_ZOPC (unsigned long)(0xe3L << 40 | 0x95L)
+#define LLGT_ZOPC (unsigned long)(227L << 40 | 23L)
+#define LLGC_ZOPC (unsigned long)(227L << 40 | 144L)
+#define LLGH_ZOPC (unsigned long)(227L << 40 | 145L)
+#define LLGF_ZOPC (unsigned long)(227L << 40 | 22L)
+
+#define IC_ZOPC (unsigned int)(0x43 << 24)
+#define ICY_ZOPC (unsigned long)(0xe3L << 40 | 0x73L)
+#define ICM_ZOPC (unsigned int)(0xbf << 24)
+#define ICMY_ZOPC (unsigned long)(0xebL << 40 | 0x81L)
+#define ICMH_ZOPC (unsigned long)(0xebL << 40 | 0x80L)
+
+#define LRVH_ZOPC (unsigned long)(0xe3L << 40 | 0x1fL)
+#define LRV_ZOPC (unsigned long)(0xe3L << 40 | 0x1eL)
+#define LRVG_ZOPC (unsigned long)(0xe3L << 40 | 0x0fL)
+
+
+// LOAD relative: memory to register transfer
+#define LHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x05L << 32) // z10
+#define LRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0dL << 32) // z10
+#define LGHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x04L << 32) // z10
+#define LGFRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0cL << 32) // z10
+#define LGRL_ZOPC (unsigned long)(0xc4L << 40 | 0x08L << 32) // z10
+
+#define LLHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x02L << 32) // z10
+#define LLGHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x06L << 32) // z10
+#define LLGFRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0eL << 32) // z10
+
+#define LOC_ZOPC (unsigned long)(0xebL << 40 | 0xf2L) // z196
+#define LOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe2L) // z196
+
+#define LMG_ZOPC (unsigned long)(235L << 40 | 4L)
+
+#define LE_ZOPC (unsigned int)(0x78 << 24)
+#define LEY_ZOPC (unsigned long)(237L << 40 | 100L)
+#define LDEB_ZOPC (unsigned long)(237L << 40 | 4)
+#define LD_ZOPC (unsigned int)(0x68 << 24)
+#define LDY_ZOPC (unsigned long)(237L << 40 | 101L)
+#define LXEB_ZOPC (unsigned long)(237L << 40 | 6)
+#define LXDB_ZOPC (unsigned long)(237L << 40 | 5)
+
+// STORE: register to memory transfer
+#define STC_ZOPC (unsigned int)(0x42 << 24)
+#define STCY_ZOPC (unsigned long)(227L << 40 | 114L)
+#define STH_ZOPC (unsigned int)(64 << 24)
+#define STHY_ZOPC (unsigned long)(227L << 40 | 112L)
+#define ST_ZOPC (unsigned int)(80 << 24)
+#define STY_ZOPC (unsigned long)(227L << 40 | 80L)
+#define STG_ZOPC (unsigned long)(227L << 40 | 36L)
+
+#define STCM_ZOPC (unsigned long)(0xbeL << 24)
+#define STCMY_ZOPC (unsigned long)(0xebL << 40 | 0x2dL)
+#define STCMH_ZOPC (unsigned long)(0xebL << 40 | 0x2cL)
+
+// STORE relative: memory to register transfer
+#define STHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x07L << 32) // z10
+#define STRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0fL << 32) // z10
+#define STGRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0bL << 32) // z10
+
+#define STOC_ZOPC (unsigned long)(0xebL << 40 | 0xf3L) // z196
+#define STOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe3L) // z196
+
+#define STMG_ZOPC (unsigned long)(235L << 40 | 36L)
+
+#define STE_ZOPC (unsigned int)(0x70 << 24)
+#define STEY_ZOPC (unsigned long)(237L << 40 | 102L)
+#define STD_ZOPC (unsigned int)(0x60 << 24)
+#define STDY_ZOPC (unsigned long)(237L << 40 | 103L)
+
+// MOVE: immediate to memory transfer
+#define MVHHI_ZOPC (unsigned long)(0xe5L << 40 | 0x44L << 32) // z10
+#define MVHI_ZOPC (unsigned long)(0xe5L << 40 | 0x4cL << 32) // z10
+#define MVGHI_ZOPC (unsigned long)(0xe5L << 40 | 0x48L << 32) // z10
+
+
+// ALU operations
+
+// Load Positive
+#define LPR_ZOPC (unsigned int)(16 << 8)
+#define LPGFR_ZOPC (unsigned int)(185 << 24 | 16 << 16)
+#define LPGR_ZOPC (unsigned int)(185 << 24)
+#define LPEBR_ZOPC (unsigned int)(179 << 24)
+#define LPDBR_ZOPC (unsigned int)(179 << 24 | 16 << 16)
+#define LPXBR_ZOPC (unsigned int)(179 << 24 | 64 << 16)
+
+// Load Negative
+#define LNR_ZOPC (unsigned int)(17 << 8)
+#define LNGFR_ZOPC (unsigned int)(185 << 24 | 17 << 16)
+#define LNGR_ZOPC (unsigned int)(185 << 24 | 1 << 16)
+#define LNEBR_ZOPC (unsigned int)(179 << 24 | 1 << 16)
+#define LNDBR_ZOPC (unsigned int)(179 << 24 | 17 << 16)
+#define LNXBR_ZOPC (unsigned int)(179 << 24 | 65 << 16)
+
+// Load Complement
+#define LCR_ZOPC (unsigned int)(19 << 8)
+#define LCGFR_ZOPC (unsigned int)(185 << 24 | 19 << 16)
+#define LCGR_ZOPC (unsigned int)(185 << 24 | 3 << 16)
+#define LCEBR_ZOPC (unsigned int)(179 << 24 | 3 << 16)
+#define LCDBR_ZOPC (unsigned int)(179 << 24 | 19 << 16)
+#define LCXBR_ZOPC (unsigned int)(179 << 24 | 67 << 16)
+
+// Add
+// RR, signed
+#define AR_ZOPC (unsigned int)(26 << 8)
+#define AGFR_ZOPC (unsigned int)(0xb9 << 24 | 0x18 << 16)
+#define AGR_ZOPC (unsigned int)(0xb9 << 24 | 0x08 << 16)
+// RRF, signed
+#define ARK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f8 << 16)
+#define AGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e8 << 16)
+// RI, signed
+#define AHI_ZOPC (unsigned int)(167 << 24 | 10 << 16)
+#define AFI_ZOPC (unsigned long)(0xc2L << 40 | 9L << 32)
+#define AGHI_ZOPC (unsigned int)(167 << 24 | 11 << 16)
+#define AGFI_ZOPC (unsigned long)(0xc2L << 40 | 8L << 32)
+// RIE, signed
+#define AHIK_ZOPC (unsigned long)(0xecL << 40 | 0x00d8L)
+#define AGHIK_ZOPC (unsigned long)(0xecL << 40 | 0x00d9L)
+#define AIH_ZOPC (unsigned long)(0xccL << 40 | 0x08L << 32)
+// RM, signed
+#define AHY_ZOPC (unsigned long)(227L << 40 | 122L)
+#define A_ZOPC (unsigned int)(90 << 24)
+#define AY_ZOPC (unsigned long)(227L << 40 | 90L)
+#define AGF_ZOPC (unsigned long)(227L << 40 | 24L)
+#define AG_ZOPC (unsigned long)(227L << 40 | 8L)
+// In-memory arithmetic (add signed, add logical with signed immediate).
+// MI, signed
+#define ASI_ZOPC (unsigned long)(0xebL << 40 | 0x6aL)
+#define AGSI_ZOPC (unsigned long)(0xebL << 40 | 0x7aL)
+
+// RR, Logical
+#define ALR_ZOPC (unsigned int)(30 << 8)
+#define ALGFR_ZOPC (unsigned int)(185 << 24 | 26 << 16)
+#define ALGR_ZOPC (unsigned int)(185 << 24 | 10 << 16)
+#define ALCGR_ZOPC (unsigned int)(185 << 24 | 136 << 16)
+// RRF, Logical
+#define ALRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00fa << 16)
+#define ALGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00ea << 16)
+// RI, Logical
+#define ALFI_ZOPC (unsigned long)(0xc2L << 40 | 0x0bL << 32)
+#define ALGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x0aL << 32)
+// RIE, Logical
+#define ALHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00daL)
+#define ALGHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00dbL)
+// RM, Logical
+#define AL_ZOPC (unsigned int)(0x5e << 24)
+#define ALY_ZOPC (unsigned long)(227L << 40 | 94L)
+#define ALGF_ZOPC (unsigned long)(227L << 40 | 26L)
+#define ALG_ZOPC (unsigned long)(227L << 40 | 10L)
+// In-memory arithmetic (add signed, add logical with signed immediate).
+// MI, Logical
+#define ALSI_ZOPC (unsigned long)(0xebL << 40 | 0x6eL)
+#define ALGSI_ZOPC (unsigned long)(0xebL << 40 | 0x7eL)
+
+// RR, BFP
+#define AEBR_ZOPC (unsigned int)(179 << 24 | 10 << 16)
+#define ADBR_ZOPC (unsigned int)(179 << 24 | 26 << 16)
+#define AXBR_ZOPC (unsigned int)(179 << 24 | 74 << 16)
+// RM, BFP
+#define AEB_ZOPC (unsigned long)(237L << 40 | 10)
+#define ADB_ZOPC (unsigned long)(237L << 40 | 26)
+
+// Subtract
+// RR, signed
+#define SR_ZOPC (unsigned int)(27 << 8)
+#define SGFR_ZOPC (unsigned int)(185 << 24 | 25 << 16)
+#define SGR_ZOPC (unsigned int)(185 << 24 | 9 << 16)
+// RRF, signed
+#define SRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f9 << 16)
+#define SGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e9 << 16)
+// RM, signed
+#define SH_ZOPC (unsigned int)(0x4b << 24)
+#define SHY_ZOPC (unsigned long)(227L << 40 | 123L)
+#define S_ZOPC (unsigned int)(0x5B << 24)
+#define SY_ZOPC (unsigned long)(227L << 40 | 91L)
+#define SGF_ZOPC (unsigned long)(227L << 40 | 25)
+#define SG_ZOPC (unsigned long)(227L << 40 | 9)
+// RR, Logical
+#define SLR_ZOPC (unsigned int)(31 << 8)
+#define SLGFR_ZOPC (unsigned int)(185 << 24 | 27 << 16)
+#define SLGR_ZOPC (unsigned int)(185 << 24 | 11 << 16)
+// RIL, Logical
+#define SLFI_ZOPC (unsigned long)(0xc2L << 40 | 0x05L << 32)
+#define SLGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x04L << 32)
+// RRF, Logical
+#define SLRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00fb << 16)
+#define SLGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00eb << 16)
+// RM, Logical
+#define SLY_ZOPC (unsigned long)(227L << 40 | 95L)
+#define SLGF_ZOPC (unsigned long)(227L << 40 | 27L)
+#define SLG_ZOPC (unsigned long)(227L << 40 | 11L)
+
+// RR, BFP
+#define SEBR_ZOPC (unsigned int)(179 << 24 | 11 << 16)
+#define SDBR_ZOPC (unsigned int)(179 << 24 | 27 << 16)
+#define SXBR_ZOPC (unsigned int)(179 << 24 | 75 << 16)
+// RM, BFP
+#define SEB_ZOPC (unsigned long)(237L << 40 | 11)
+#define SDB_ZOPC (unsigned long)(237L << 40 | 27)
+
+// Multiply
+// RR, signed
+#define MR_ZOPC (unsigned int)(28 << 8)
+#define MSR_ZOPC (unsigned int)(178 << 24 | 82 << 16)
+#define MSGFR_ZOPC (unsigned int)(185 << 24 | 28 << 16)
+#define MSGR_ZOPC (unsigned int)(185 << 24 | 12 << 16)
+// RI, signed
+#define MHI_ZOPC (unsigned int)(167 << 24 | 12 << 16)
+#define MGHI_ZOPC (unsigned int)(167 << 24 | 13 << 16)
+#define MSFI_ZOPC (unsigned long)(0xc2L << 40 | 0x01L << 32) // z10
+#define MSGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x00L << 32) // z10
+// RM, signed
+#define M_ZOPC (unsigned int)(92 << 24)
+#define MS_ZOPC (unsigned int)(0x71 << 24)
+#define MHY_ZOPC (unsigned long)(0xe3L<< 40 | 0x7cL)
+#define MSY_ZOPC (unsigned long)(227L << 40 | 81L)
+#define MSGF_ZOPC (unsigned long)(227L << 40 | 28L)
+#define MSG_ZOPC (unsigned long)(227L << 40 | 12L)
+// RR, unsigned
+#define MLR_ZOPC (unsigned int)(185 << 24 | 150 << 16)
+#define MLGR_ZOPC (unsigned int)(185 << 24 | 134 << 16)
+// RM, unsigned
+#define ML_ZOPC (unsigned long)(227L << 40 | 150L)
+#define MLG_ZOPC (unsigned long)(227L << 40 | 134L)
+
+// RR, BFP
+#define MEEBR_ZOPC (unsigned int)(179 << 24 | 23 << 16)
+#define MDEBR_ZOPC (unsigned int)(179 << 24 | 12 << 16)
+#define MDBR_ZOPC (unsigned int)(179 << 24 | 28 << 16)
+#define MXDBR_ZOPC (unsigned int)(179 << 24 | 7 << 16)
+#define MXBR_ZOPC (unsigned int)(179 << 24 | 76 << 16)
+// RM, BFP
+#define MEEB_ZOPC (unsigned long)(237L << 40 | 23)
+#define MDEB_ZOPC (unsigned long)(237L << 40 | 12)
+#define MDB_ZOPC (unsigned long)(237L << 40 | 28)
+#define MXDB_ZOPC (unsigned long)(237L << 40 | 7)
+
+// Divide
+// RR, signed
+#define DSGFR_ZOPC (unsigned int)(0xb91d << 16)
+#define DSGR_ZOPC (unsigned int)(0xb90d << 16)
+// RM, signed
+#define D_ZOPC (unsigned int)(93 << 24)
+#define DSGF_ZOPC (unsigned long)(227L << 40 | 29L)
+#define DSG_ZOPC (unsigned long)(227L << 40 | 13L)
+// RR, unsigned
+#define DLR_ZOPC (unsigned int)(185 << 24 | 151 << 16)
+#define DLGR_ZOPC (unsigned int)(185 << 24 | 135 << 16)
+// RM, unsigned
+#define DL_ZOPC (unsigned long)(227L << 40 | 151L)
+#define DLG_ZOPC (unsigned long)(227L << 40 | 135L)
+
+// RR, BFP
+#define DEBR_ZOPC (unsigned int)(179 << 24 | 13 << 16)
+#define DDBR_ZOPC (unsigned int)(179 << 24 | 29 << 16)
+#define DXBR_ZOPC (unsigned int)(179 << 24 | 77 << 16)
+// RM, BFP
+#define DEB_ZOPC (unsigned long)(237L << 40 | 13)
+#define DDB_ZOPC (unsigned long)(237L << 40 | 29)
+
+// Square Root
+// RR, BFP
+#define SQEBR_ZOPC (unsigned int)(0xb314 << 16)
+#define SQDBR_ZOPC (unsigned int)(0xb315 << 16)
+#define SQXBR_ZOPC (unsigned int)(0xb316 << 16)
+// RM, BFP
+#define SQEB_ZOPC (unsigned long)(237L << 40 | 20)
+#define SQDB_ZOPC (unsigned long)(237L << 40 | 21)
+
+// Compare and Test
+// RR, signed
+#define CR_ZOPC (unsigned int)(25 << 8)
+#define CGFR_ZOPC (unsigned int)(185 << 24 | 48 << 16)
+#define CGR_ZOPC (unsigned int)(185 << 24 | 32 << 16)
+// RI, signed
+#define CHI_ZOPC (unsigned int)(167 << 24 | 14 << 16)
+#define CFI_ZOPC (unsigned long)(0xc2L << 40 | 0xdL << 32)
+#define CGHI_ZOPC (unsigned int)(167 << 24 | 15 << 16)
+#define CGFI_ZOPC (unsigned long)(0xc2L << 40 | 0xcL << 32)
+// RM, signed
+#define CH_ZOPC (unsigned int)(0x49 << 24)
+#define CHY_ZOPC (unsigned long)(227L << 40 | 121L)
+#define C_ZOPC (unsigned int)(0x59 << 24)
+#define CY_ZOPC (unsigned long)(227L << 40 | 89L)
+#define CGF_ZOPC (unsigned long)(227L << 40 | 48L)
+#define CG_ZOPC (unsigned long)(227L << 40 | 32L)
+// RR, unsigned
+#define CLR_ZOPC (unsigned int)(21 << 8)
+#define CLGFR_ZOPC (unsigned int)(185 << 24 | 49 << 16)
+#define CLGR_ZOPC (unsigned int)(185 << 24 | 33 << 16)
+// RIL, unsigned
+#define CLFI_ZOPC (unsigned long)(0xc2L << 40 | 0xfL << 32)
+#define CLGFI_ZOPC (unsigned long)(0xc2L << 40 | 0xeL << 32)
+// RM, unsigned
+#define CL_ZOPC (unsigned int)(0x55 << 24)
+#define CLY_ZOPC (unsigned long)(227L << 40 | 85L)
+#define CLGF_ZOPC (unsigned long)(227L << 40 | 49L)
+#define CLG_ZOPC (unsigned long)(227L << 40 | 33L)
+// RI, unsigned
+#define TMHH_ZOPC (unsigned int)(167 << 24 | 2 << 16)
+#define TMHL_ZOPC (unsigned int)(167 << 24 | 3 << 16)
+#define TMLH_ZOPC (unsigned int)(167 << 24)
+#define TMLL_ZOPC (unsigned int)(167 << 24 | 1 << 16)
+
+// RR, BFP
+#define CEBR_ZOPC (unsigned int)(179 << 24 | 9 << 16)
+#define CDBR_ZOPC (unsigned int)(179 << 24 | 25 << 16)
+#define CXBR_ZOPC (unsigned int)(179 << 24 | 73 << 16)
+// RM, BFP
+#define CEB_ZOPC (unsigned long)(237L << 40 | 9)
+#define CDB_ZOPC (unsigned long)(237L << 40 | 25)
+
+// Shift
+// arithmetic
+#define SLA_ZOPC (unsigned int)(139 << 24)
+#define SLAG_ZOPC (unsigned long)(235L << 40 | 11L)
+#define SRA_ZOPC (unsigned int)(138 << 24)
+#define SRAG_ZOPC (unsigned long)(235L << 40 | 10L)
+// logical
+#define SLL_ZOPC (unsigned int)(137 << 24)
+#define SLLG_ZOPC (unsigned long)(235L << 40 | 13L)
+#define SRL_ZOPC (unsigned int)(136 << 24)
+#define SRLG_ZOPC (unsigned long)(235L << 40 | 12L)
+
+// Rotate, then AND/XOR/OR/insert
+// rotate
+#define RLL_ZOPC (unsigned long)(0xebL << 40 | 0x1dL) // z10
+#define RLLG_ZOPC (unsigned long)(0xebL << 40 | 0x1cL) // z10
+// rotate and {AND|XOR|OR|INS}
+#define RNSBG_ZOPC (unsigned long)(0xecL << 40 | 0x54L) // z196
+#define RXSBG_ZOPC (unsigned long)(0xecL << 40 | 0x57L) // z196
+#define ROSBG_ZOPC (unsigned long)(0xecL << 40 | 0x56L) // z196
+#define RISBG_ZOPC (unsigned long)(0xecL << 40 | 0x55L) // z196
+
+// AND
+// RR, signed
+#define NR_ZOPC (unsigned int)(20 << 8)
+#define NGR_ZOPC (unsigned int)(185 << 24 | 128 << 16)
+// RRF, signed
+#define NRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f4 << 16)
+#define NGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e4 << 16)
+// RI, signed
+#define NIHH_ZOPC (unsigned int)(165 << 24 | 4 << 16)
+#define NIHL_ZOPC (unsigned int)(165 << 24 | 5 << 16)
+#define NILH_ZOPC (unsigned int)(165 << 24 | 6 << 16)
+#define NILL_ZOPC (unsigned int)(165 << 24 | 7 << 16)
+#define NIHF_ZOPC (unsigned long)(0xc0L << 40 | 10L << 32)
+#define NILF_ZOPC (unsigned long)(0xc0L << 40 | 11L << 32)
+// RM, signed
+#define N_ZOPC (unsigned int)(0x54 << 24)
+#define NY_ZOPC (unsigned long)(227L << 40 | 84L)
+#define NG_ZOPC (unsigned long)(227L << 40 | 128L)
+
+// OR
+// RR, signed
+#define OR_ZOPC (unsigned int)(22 << 8)
+#define OGR_ZOPC (unsigned int)(185 << 24 | 129 << 16)
+// RRF, signed
+#define ORK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f6 << 16)
+#define OGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e6 << 16)
+// RI, signed
+#define OIHH_ZOPC (unsigned int)(165 << 24 | 8 << 16)
+#define OIHL_ZOPC (unsigned int)(165 << 24 | 9 << 16)
+#define OILH_ZOPC (unsigned int)(165 << 24 | 10 << 16)
+#define OILL_ZOPC (unsigned int)(165 << 24 | 11 << 16)
+#define OIHF_ZOPC (unsigned long)(0xc0L << 40 | 12L << 32)
+#define OILF_ZOPC (unsigned long)(0xc0L << 40 | 13L << 32)
+// RM, signed
+#define O_ZOPC (unsigned int)(0x56 << 24)
+#define OY_ZOPC (unsigned long)(227L << 40 | 86L)
+#define OG_ZOPC (unsigned long)(227L << 40 | 129L)
+
+// XOR
+// RR, signed
+#define XR_ZOPC (unsigned int)(23 << 8)
+#define XGR_ZOPC (unsigned int)(185 << 24 | 130 << 16)
+// RRF, signed
+#define XRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f7 << 16)
+#define XGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e7 << 16)
+// RI, signed
+#define XIHF_ZOPC (unsigned long)(0xc0L << 40 | 6L << 32)
+#define XILF_ZOPC (unsigned long)(0xc0L << 40 | 7L << 32)
+// RM, signed
+#define X_ZOPC (unsigned int)(0x57 << 24)
+#define XY_ZOPC (unsigned long)(227L << 40 | 87L)
+#define XG_ZOPC (unsigned long)(227L << 40 | 130L)
+
+
+// Data Conversion
+
+// INT to BFP
+#define CEFBR_ZOPC (unsigned int)(179 << 24 | 148 << 16)
+#define CDFBR_ZOPC (unsigned int)(179 << 24 | 149 << 16)
+#define CXFBR_ZOPC (unsigned int)(179 << 24 | 150 << 16)
+#define CEGBR_ZOPC (unsigned int)(179 << 24 | 164 << 16)
+#define CDGBR_ZOPC (unsigned int)(179 << 24 | 165 << 16)
+#define CXGBR_ZOPC (unsigned int)(179 << 24 | 166 << 16)
+// BFP to INT
+#define CFEBR_ZOPC (unsigned int)(179 << 24 | 152 << 16)
+#define CFDBR_ZOPC (unsigned int)(179 << 24 | 153 << 16)
+#define CFXBR_ZOPC (unsigned int)(179 << 24 | 154 << 16)
+#define CGEBR_ZOPC (unsigned int)(179 << 24 | 168 << 16)
+#define CGDBR_ZOPC (unsigned int)(179 << 24 | 169 << 16)
+#define CGXBR_ZOPC (unsigned int)(179 << 24 | 170 << 16)
+// INT to DEC
+#define CVD_ZOPC (unsigned int)(0x4e << 24)
+#define CVDY_ZOPC (unsigned long)(0xe3L << 40 | 0x26L)
+#define CVDG_ZOPC (unsigned long)(0xe3L << 40 | 0x2eL)
+
+
+// BFP Control
+
+#define SRNM_ZOPC (unsigned int)(178 << 24 | 153 << 16)
+#define EFPC_ZOPC (unsigned int)(179 << 24 | 140 << 16)
+#define SFPC_ZOPC (unsigned int)(179 << 24 | 132 << 16)
+#define STFPC_ZOPC (unsigned int)(178 << 24 | 156 << 16)
+#define LFPC_ZOPC (unsigned int)(178 << 24 | 157 << 16)
+
+
+// Branch Instructions
+
+// Register
+#define BCR_ZOPC (unsigned int)(7 << 8)
+#define BALR_ZOPC (unsigned int)(5 << 8)
+#define BASR_ZOPC (unsigned int)(13 << 8)
+#define BCTGR_ZOPC (unsigned long)(0xb946 << 16)
+// Absolute
+#define BC_ZOPC (unsigned int)(71 << 24)
+#define BAL_ZOPC (unsigned int)(69 << 24)
+#define BAS_ZOPC (unsigned int)(77 << 24)
+#define BXH_ZOPC (unsigned int)(134 << 24)
+#define BXHG_ZOPC (unsigned long)(235L << 40 | 68)
+// Relative
+#define BRC_ZOPC (unsigned int)(167 << 24 | 4 << 16)
+#define BRCL_ZOPC (unsigned long)(192L << 40 | 4L << 32)
+#define BRAS_ZOPC (unsigned int)(167 << 24 | 5 << 16)
+#define BRASL_ZOPC (unsigned long)(192L << 40 | 5L << 32)
+#define BRCT_ZOPC (unsigned int)(167 << 24 | 6 << 16)
+#define BRCTG_ZOPC (unsigned int)(167 << 24 | 7 << 16)
+#define BRXH_ZOPC (unsigned int)(132 << 24)
+#define BRXHG_ZOPC (unsigned long)(236L << 40 | 68)
+#define BRXLE_ZOPC (unsigned int)(133 << 24)
+#define BRXLG_ZOPC (unsigned long)(236L << 40 | 69)
+
+
+// Compare and Branch Instructions
+
+// signed comp reg/reg, branch Absolute
+#define CRB_ZOPC (unsigned long)(0xecL << 40 | 0xf6L) // z10
+#define CGRB_ZOPC (unsigned long)(0xecL << 40 | 0xe4L) // z10
+// signed comp reg/reg, branch Relative
+#define CRJ_ZOPC (unsigned long)(0xecL << 40 | 0x76L) // z10
+#define CGRJ_ZOPC (unsigned long)(0xecL << 40 | 0x64L) // z10
+// signed comp reg/imm, branch absolute
+#define CIB_ZOPC (unsigned long)(0xecL << 40 | 0xfeL) // z10
+#define CGIB_ZOPC (unsigned long)(0xecL << 40 | 0xfcL) // z10
+// signed comp reg/imm, branch relative
+#define CIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7eL) // z10
+#define CGIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7cL) // z10
+
+// unsigned comp reg/reg, branch Absolute
+#define CLRB_ZOPC (unsigned long)(0xecL << 40 | 0xf7L) // z10
+#define CLGRB_ZOPC (unsigned long)(0xecL << 40 | 0xe5L) // z10
+// unsigned comp reg/reg, branch Relative
+#define CLRJ_ZOPC (unsigned long)(0xecL << 40 | 0x77L) // z10
+#define CLGRJ_ZOPC (unsigned long)(0xecL << 40 | 0x65L) // z10
+// unsigned comp reg/imm, branch absolute
+#define CLIB_ZOPC (unsigned long)(0xecL << 40 | 0xffL) // z10
+#define CLGIB_ZOPC (unsigned long)(0xecL << 40 | 0xfdL) // z10
+// unsigned comp reg/imm, branch relative
+#define CLIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7fL) // z10
+#define CLGIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7dL) // z10
+
+// comp reg/reg, trap
+#define CRT_ZOPC (unsigned int)(0xb972 << 16) // z10
+#define CGRT_ZOPC (unsigned int)(0xb960 << 16) // z10
+#define CLRT_ZOPC (unsigned int)(0xb973 << 16) // z10
+#define CLGRT_ZOPC (unsigned int)(0xb961 << 16) // z10
+// comp reg/imm, trap
+#define CIT_ZOPC (unsigned long)(0xecL << 40 | 0x72L) // z10
+#define CGIT_ZOPC (unsigned long)(0xecL << 40 | 0x70L) // z10
+#define CLFIT_ZOPC (unsigned long)(0xecL << 40 | 0x73L) // z10
+#define CLGIT_ZOPC (unsigned long)(0xecL << 40 | 0x71L) // z10
+
+
+// Direct Memory Operations
+
+// Compare
+#define CLI_ZOPC (unsigned int)(0x95 << 24)
+#define CLIY_ZOPC (unsigned long)(0xebL << 40 | 0x55L)
+#define CLC_ZOPC (unsigned long)(0xd5L << 40)
+#define CLCL_ZOPC (unsigned int)(0x0f << 8)
+#define CLCLE_ZOPC (unsigned int)(0xa9 << 24)
+#define CLCLU_ZOPC (unsigned long)(0xebL << 40 | 0x8fL)
+
+// Move
+#define MVI_ZOPC (unsigned int)(0x92 << 24)
+#define MVIY_ZOPC (unsigned long)(0xebL << 40 | 0x52L)
+#define MVC_ZOPC (unsigned long)(0xd2L << 40)
+#define MVCL_ZOPC (unsigned int)(0x0e << 8)
+#define MVCLE_ZOPC (unsigned int)(0xa8 << 24)
+
+// Test
+#define TM_ZOPC (unsigned int)(0x91 << 24)
+#define TMY_ZOPC (unsigned long)(0xebL << 40 | 0x51L)
+
+// AND
+#define NI_ZOPC (unsigned int)(0x94 << 24)
+#define NIY_ZOPC (unsigned long)(0xebL << 40 | 0x54L)
+#define NC_ZOPC (unsigned long)(0xd4L << 40)
+
+// OR
+#define OI_ZOPC (unsigned int)(0x96 << 24)
+#define OIY_ZOPC (unsigned long)(0xebL << 40 | 0x56L)
+#define OC_ZOPC (unsigned long)(0xd6L << 40)
+
+// XOR
+#define XI_ZOPC (unsigned int)(0x97 << 24)
+#define XIY_ZOPC (unsigned long)(0xebL << 40 | 0x57L)
+#define XC_ZOPC (unsigned long)(0xd7L << 40)
+
+// Search String
+#define SRST_ZOPC (unsigned int)(178 << 24 | 94 << 16)
+#define SRSTU_ZOPC (unsigned int)(185 << 24 | 190 << 16)
+
+// Translate characters
+#define TROO_ZOPC (unsigned int)(0xb9 << 24 | 0x93 << 16)
+#define TROT_ZOPC (unsigned int)(0xb9 << 24 | 0x92 << 16)
+#define TRTO_ZOPC (unsigned int)(0xb9 << 24 | 0x91 << 16)
+#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
+
+
+// Miscellaneous Operations
+
+// Execute
+#define EX_ZOPC (unsigned int)(68L << 24)
+#define EXRL_ZOPC (unsigned long)(0xc6L << 40 | 0x00L << 32) // z10
+
+// Compare and Swap
+#define CS_ZOPC (unsigned int)(0xba << 24)
+#define CSY_ZOPC (unsigned long)(0xebL << 40 | 0x14L)
+#define CSG_ZOPC (unsigned long)(0xebL << 40 | 0x30L)
+
+// Interlocked-Update
+#define LAA_ZOPC (unsigned long)(0xebL << 40 | 0xf8L) // z196
+#define LAAG_ZOPC (unsigned long)(0xebL << 40 | 0xe8L) // z196
+#define LAAL_ZOPC (unsigned long)(0xebL << 40 | 0xfaL) // z196
+#define LAALG_ZOPC (unsigned long)(0xebL << 40 | 0xeaL) // z196
+#define LAN_ZOPC (unsigned long)(0xebL << 40 | 0xf4L) // z196
+#define LANG_ZOPC (unsigned long)(0xebL << 40 | 0xe4L) // z196
+#define LAX_ZOPC (unsigned long)(0xebL << 40 | 0xf7L) // z196
+#define LAXG_ZOPC (unsigned long)(0xebL << 40 | 0xe7L) // z196
+#define LAO_ZOPC (unsigned long)(0xebL << 40 | 0xf6L) // z196
+#define LAOG_ZOPC (unsigned long)(0xebL << 40 | 0xe6L) // z196
+
+// System Functions
+#define STCK_ZOPC (unsigned int)(0xb2 << 24 | 0x05 << 16)
+#define STCKF_ZOPC (unsigned int)(0xb2 << 24 | 0x7c << 16)
+#define STFLE_ZOPC (unsigned int)(0xb2 << 24 | 0xb0 << 16)
+#define ECTG_ZOPC (unsigned long)(0xc8L <<40 | 0x01L << 32) // z10
+#define ECAG_ZOPC (unsigned long)(0xebL <<40 | 0x4cL) // z10
+
+// Execution Prediction
+#define PFD_ZOPC (unsigned long)(0xe3L <<40 | 0x36L) // z10
+#define PFDRL_ZOPC (unsigned long)(0xc6L <<40 | 0x02L << 32) // z10
+#define BPP_ZOPC (unsigned long)(0xc7L <<40) // branch prediction preload -- EC12
+#define BPRP_ZOPC (unsigned long)(0xc5L <<40) // branch prediction preload -- EC12
+
+// Transaction Control
+#define TBEGIN_ZOPC (unsigned long)(0xe560L << 32) // tx begin -- EC12
+#define TBEGINC_ZOPC (unsigned long)(0xe561L << 32) // tx begin (constrained) -- EC12
+#define TEND_ZOPC (unsigned int)(0xb2f8 << 16) // tx end -- EC12
+#define TABORT_ZOPC (unsigned int)(0xb2fc << 16) // tx abort -- EC12
+#define ETND_ZOPC (unsigned int)(0xb2ec << 16) // tx nesting depth -- EC12
+#define PPA_ZOPC (unsigned int)(0xb2e8 << 16) // tx processor assist -- EC12
+
+// Crypto and Checksum
+#define CKSM_ZOPC (unsigned int)(0xb2 << 24 | 0x41 << 16) // checksum. This is NOT CRC32
+#define KM_ZOPC (unsigned int)(0xb9 << 24 | 0x2e << 16) // cipher
+#define KMC_ZOPC (unsigned int)(0xb9 << 24 | 0x2f << 16) // cipher
+#define KIMD_ZOPC (unsigned int)(0xb9 << 24 | 0x3e << 16) // SHA (msg digest)
+#define KLMD_ZOPC (unsigned int)(0xb9 << 24 | 0x3f << 16) // SHA (msg digest)
+#define KMAC_ZOPC (unsigned int)(0xb9 << 24 | 0x1e << 16) // Message Authentication Code
+
+// Various
+#define TCEB_ZOPC (unsigned long)(237L << 40 | 16)
+#define TCDB_ZOPC (unsigned long)(237L << 40 | 17)
+#define TAM_ZOPC (unsigned long)(267)
+
+#define FLOGR_ZOPC (unsigned int)(0xb9 << 24 | 0x83 << 16)
+#define POPCNT_ZOPC (unsigned int)(0xb9e1 << 16)
+#define AHHHR_ZOPC (unsigned int)(0xb9c8 << 16)
+#define AHHLR_ZOPC (unsigned int)(0xb9d8 << 16)
+
+
+// OpCode field masks
+
+#define RI_MASK (unsigned int)(0xff << 24 | 0x0f << 16)
+#define RRE_MASK (unsigned int)(0xff << 24 | 0xff << 16)
+#define RSI_MASK (unsigned int)(0xff << 24)
+#define RIE_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define RIL_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32)
+
+#define BASR_MASK (unsigned int)(0xff << 8)
+#define BCR_MASK (unsigned int)(0xff << 8)
+#define BRC_MASK (unsigned int)(0xff << 24 | 0x0f << 16)
+#define LGHI_MASK (unsigned int)(0xff << 24 | 0x0f << 16)
+#define LLI_MASK (unsigned int)(0xff << 24 | 0x0f << 16)
+#define II_MASK (unsigned int)(0xff << 24 | 0x0f << 16)
+#define LLIF_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define IIF_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define BRASL_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32)
+#define TM_MASK (unsigned int)(0xff << 24)
+#define TMY_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LB_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LH_MASK (unsigned int)(0xff << 24)
+#define L_MASK (unsigned int)(0xff << 24)
+#define LY_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LG_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LLGH_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LLGF_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define SLAG_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define LARL_MASK (unsigned long)(0xff0fL << 32)
+#define LGRL_MASK (unsigned long)(0xff0fL << 32)
+#define LE_MASK (unsigned int)(0xff << 24)
+#define LD_MASK (unsigned int)(0xff << 24)
+#define ST_MASK (unsigned int)(0xff << 24)
+#define STC_MASK (unsigned int)(0xff << 24)
+#define STG_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define STH_MASK (unsigned int)(0xff << 24)
+#define STE_MASK (unsigned int)(0xff << 24)
+#define STD_MASK (unsigned int)(0xff << 24)
+#define CMPBRANCH_MASK (unsigned long)(0xffL << 40 | 0xffL)
+#define REL_LONG_MASK (unsigned long)(0xff0fL << 32)
+
+ public:
+ // Condition code masks. Details:
+ // - Mask bit#3 must be zero for all compare and branch/trap instructions to ensure
+ // future compatibility.
+ // - For all arithmetic instructions which set the condition code, mask bit#3
+ // indicates overflow ("unordered" in float operations).
+ // - "unordered" float comparison results have to be treated as low.
+ // - When overflow/unordered is detected, none of the branch conditions is true,
+ // except for bcondOverflow/bcondNotOrdered and bcondAlways.
+ // - For INT comparisons, the inverse condition can be calculated as (14-cond).
+ // - For FLOAT comparisons, the inverse condition can be calculated as (15-cond).
+ enum branch_condition {
+ bcondNever = 0,
+ bcondAlways = 15,
+
+ // Specific names. Make use of lightweight sync.
+ // Full and lightweight sync operation.
+ bcondFullSync = 15,
+ bcondLightSync = 14,
+ bcondNop = 0,
+
+ // arithmetic compare instructions
+ // arithmetic load and test, insert instructions
+ // Mask bit#3 must be zero for future compatibility.
+ bcondEqual = 8,
+ bcondNotEqual = 6,
+ bcondLow = 4,
+ bcondNotLow = 10,
+ bcondHigh = 2,
+ bcondNotHigh = 12,
+ // arithmetic calculation instructions
+ // Mask bit#3 indicates overflow if detected by instr.
+ // Mask bit#3 = 0 (overflow is not handled by compiler).
+ bcondOverflow = 1,
+ bcondNotOverflow = 14,
+ bcondZero = bcondEqual,
+ bcondNotZero = bcondNotEqual,
+ bcondNegative = bcondLow,
+ bcondNotNegative = bcondNotLow,
+ bcondPositive = bcondHigh,
+ bcondNotPositive = bcondNotHigh,
+ bcondNotOrdered = 1, // float comparisons
+ bcondOrdered = 14, // float comparisons
+ bcondLowOrNotOrdered = bcondLow|bcondNotOrdered, // float comparisons
+ bcondHighOrNotOrdered = bcondHigh|bcondNotOrdered, // float comparisons
+ // unsigned arithmetic calculation instructions
+ // Mask bit#0 is not used by these instructions.
+ // There is no indication of overflow for these instr.
+ bcondLogZero = 2,
+ bcondLogNotZero = 5,
+ bcondLogNotZero_Borrow = 4,
+ bcondLogNotZero_NoBorrow = 1,
+ // string search instructions
+ bcondFound = 4,
+ bcondNotFound = 2,
+ bcondInterrupted = 1,
+ // bit test instructions
+ bcondAllZero = 8,
+ bcondMixed = 6,
+ bcondAllOne = 1,
+ bcondNotAllZero = 7 // for tmll
+ };
+
+ enum Condition {
+ // z/Architecture
+ negative = 0,
+ less = 0,
+ positive = 1,
+ greater = 1,
+ zero = 2,
+ equal = 2,
+ summary_overflow = 3,
+ };
+
+ // Rounding mode for float-2-int conversions.
+ enum RoundingMode {
+ current_mode = 0, // Mode taken from FPC register.
+ biased_to_nearest = 1,
+ to_nearest = 4,
+ to_zero = 5,
+ to_plus_infinity = 6,
+ to_minus_infinity = 7
+ };
+
+ // Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
+ static branch_condition inverse_condition(branch_condition cc);
+ static branch_condition inverse_float_condition(branch_condition cc);
+
+
+ //-----------------------------------------------
+ // instruction property getter methods
+ //-----------------------------------------------
+
+ // Calculate length of instruction.
+ static int instr_len(unsigned char *instr);
+
+ // Longest instructions are 6 bytes on z/Architecture.
+ static int instr_maxlen() { return 6; }
+
+ // Average instruction is 4 bytes on z/Architecture (just a guess).
+ static int instr_avglen() { return 4; }
+
+ // Shortest instructions are 2 bytes on z/Architecture.
+ static int instr_minlen() { return 2; }
+
+ // Move instruction at pc right-justified into passed long int.
+ // Return instr len in bytes as function result.
+ static unsigned int get_instruction(unsigned char *pc, unsigned long *instr);
+
+ // Move instruction in passed (long int) into storage at pc.
+ // This code is _NOT_ MT-safe!!
+ static void set_instruction(unsigned char *pc, unsigned long instr, unsigned int len) {
+ memcpy(pc, ((unsigned char *)&instr)+sizeof(unsigned long)-len, len);
+ }
+
+
+ //------------------------------------------
+ // instruction field test methods
+ //------------------------------------------
+
+ // Only used once in s390.ad to implement Matcher::is_short_branch_offset().
+ static bool is_within_range_of_RelAddr16(address target, address origin) {
+ return RelAddr::is_in_range_of_RelAddr16(target, origin);
+ }
+
+
+ //----------------------------------
+ // some diagnostic output
+ //----------------------------------
+
+ static void print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) PRODUCT_RETURN;
+ static void dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg = " ") PRODUCT_RETURN;
+
+ protected:
+
+ //-------------------------------------------------------
+ // instruction field helper methods (internal)
+ //-------------------------------------------------------
+
+ // Return a mask of 1s between hi_bit and lo_bit (inclusive).
+ static long fmask(unsigned int hi_bit, unsigned int lo_bit) {
+ assert(hi_bit >= lo_bit && hi_bit < 48, "bad bits");
+ return ((1L<<(hi_bit-lo_bit+1)) - 1) << lo_bit;
+ }
+
+ // extract u_field
+ // unsigned value
+ static long inv_u_field(long x, int hi_bit, int lo_bit) {
+ return (x & fmask(hi_bit, lo_bit)) >> lo_bit;
+ }
+
+ // extract s_field
+ // Signed value, may need sign extension.
+ static long inv_s_field(long x, int hi_bit, int lo_bit) {
+ x = inv_u_field(x, hi_bit, lo_bit);
+ // Highest extracted bit set -> sign extension.
+ return (x >= (1L<<(hi_bit-lo_bit)) ? x | ((-1L)<<(hi_bit-lo_bit)) : x);
+ }
+
+ // Extract primary opcode from instruction.
+ static int z_inv_op(int x) { return inv_u_field(x, 31, 24); }
+ static int z_inv_op(long x) { return inv_u_field(x, 47, 40); }
+
+ static int inv_reg( long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-4); } // Regs are encoded in 4 bits.
+ static int inv_mask(long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-8); } // Mask is 8 bits long.
+ static int inv_simm16_48(long x) { return (inv_s_field(x, 31, 16)); } // 6-byte instructions only
+ static int inv_simm16(long x) { return (inv_s_field(x, 15, 0)); } // 4-byte instructions only
+ static int inv_simm20(long x) { return (inv_u_field(x, 27, 16) | // 6-byte instructions only
+ inv_s_field(x, 15, 8)<<12); }
+ static int inv_simm32(long x) { return (inv_s_field(x, 31, 0)); } // 6-byte instructions only
+ static int inv_uimm12(long x) { return (inv_u_field(x, 11, 0)); } // 4-byte instructions only
+
+ // Encode u_field from long value.
+ static long u_field(long x, int hi_bit, int lo_bit) {
+ long r = x << lo_bit;
+ assert((r & ~fmask(hi_bit, lo_bit)) == 0, "value out of range");
+ assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking");
+ return r;
+ }
+
+ public:
+
+ //--------------------------------------------------
+ // instruction field construction methods
+ //--------------------------------------------------
+
+ // Compute relative address (32 bit) for branch.
+ // Only used once in nativeInst_s390.cpp.
+ static intptr_t z_pcrel_off(address dest, address pc) {
+ return RelAddr::pcrel_off32(dest, pc);
+ }
+
+ // Extract 20-bit signed displacement.
+ // Only used in disassembler_s390.cpp for temp enhancements.
+ static int inv_simm20_xx(address iLoc) {
+ unsigned long instr = 0;
+ unsigned long iLen = get_instruction(iLoc, &instr);
+ return inv_simm20(instr);
+ }
+
+ // unsigned immediate, in low bits, nbits long
+ static long uimm(long x, int nbits) {
+ assert(Immediate::is_uimm(x, nbits), "unsigned constant out of range");
+ return x & fmask(nbits - 1, 0);
+ }
+
+ // Cast '1' to long to avoid sign extension if nbits = 32.
+ // signed immediate, in low bits, nbits long
+ static long simm(long x, int nbits) {
+ assert(Immediate::is_simm(x, nbits), "value out of range");
+ return x & fmask(nbits - 1, 0);
+ }
+
+ static long imm(int64_t x, int nbits) {
+ // Assert that x can be represented with nbits bits ignoring the sign bits,
+ // i.e. the more higher bits should all be 0 or 1.
+ assert((x >> nbits) == 0 || (x >> nbits) == -1, "value out of range");
+ return x & fmask(nbits-1, 0);
+ }
+
+ // A 20-bit displacement is only in instructions of the
+ // RSY, RXY, or SIY format. In these instructions, the D
+ // field consists of a DL (low) field in bit positions 20-31
+ // and of a DH (high) field in bit positions 32-39. The
+ // value of the displacement is formed by appending the
+ // contents of the DH field to the left of the contents of
+ // the DL field.
+ static long simm20(int64_t ui20) {
+ assert(Immediate::is_simm(ui20, 20), "value out of range");
+ return ( ((ui20 & 0xfffL) << (48-32)) | // DL
+ (((ui20 >> 12) & 0xffL) << (48-40))); // DH
+ }
+
+ static long reg(Register r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
+ static long reg(int r, int s, int len) { return u_field(r, (len-s)-1, (len-s)-4); }
+ static long regt(Register r, int s, int len) { return reg(r, s, len); }
+ static long regz(Register r, int s, int len) { assert(r != Z_R0, "cannot use register R0 in memory access"); return reg(r, s, len); }
+
+ static long uimm4( int64_t ui4, int s, int len) { return uimm(ui4, 4) << (len-s-4); }
+ static long uimm6( int64_t ui6, int s, int len) { return uimm(ui6, 6) << (len-s-6); }
+ static long uimm8( int64_t ui8, int s, int len) { return uimm(ui8, 8) << (len-s-8); }
+ static long uimm12(int64_t ui12, int s, int len) { return uimm(ui12, 12) << (len-s-12); }
+ static long uimm16(int64_t ui16, int s, int len) { return uimm(ui16, 16) << (len-s-16); }
+ static long uimm32(int64_t ui32, int s, int len) { return uimm((unsigned)ui32, 32) << (len-s-32); } // prevent sign extension
+
+ static long simm8( int64_t si8, int s, int len) { return simm(si8, 8) << (len-s-8); }
+ static long simm12(int64_t si12, int s, int len) { return simm(si12, 12) << (len-s-12); }
+ static long simm16(int64_t si16, int s, int len) { return simm(si16, 16) << (len-s-16); }
+ static long simm24(int64_t si24, int s, int len) { return simm(si24, 24) << (len-s-24); }
+ static long simm32(int64_t si32, int s, int len) { return simm(si32, 32) << (len-s-32); }
+
+ static long imm8( int64_t i8, int s, int len) { return imm(i8, 8) << (len-s-8); }
+ static long imm12(int64_t i12, int s, int len) { return imm(i12, 12) << (len-s-12); }
+ static long imm16(int64_t i16, int s, int len) { return imm(i16, 16) << (len-s-16); }
+ static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
+ static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
+
+ static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
+ static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
+
+ // Rounding mode for float-2-int conversions.
+ static long rounding_mode(RoundingMode m, int s, int len) {
+ assert(m != 2 && m != 3, "invalid mode");
+ return uimm(m, 4) << (len-s-4);
+ }
+
+ //--------------------------------------------
+ // instruction field getter methods
+ //--------------------------------------------
+
+ static int get_imm32(address a, int instruction_number) {
+ int imm;
+ int *p =((int *)(a + 2 + 6 * instruction_number));
+ imm = *p;
+ return imm;
+ }
+
+ static short get_imm16(address a, int instruction_number) {
+ short imm;
+ short *p =((short *)a) + 2 * instruction_number + 1;
+ imm = *p;
+ return imm;
+ }
+
+
+ //--------------------------------------------
+ // instruction field setter methods
+ //--------------------------------------------
+
+ static void set_imm32(address a, int64_t s) {
+ assert(Immediate::is_simm32(s) || Immediate::is_uimm32(s), "to big");
+ int* p = (int *) (a + 2);
+ *p = s;
+ }
+
+ static void set_imm16(int* instr, int64_t s) {
+ assert(Immediate::is_simm16(s) || Immediate::is_uimm16(s), "to big");
+ short* p = ((short *)instr) + 1;
+ *p = s;
+ }
+
+ public:
+
+ static unsigned int align(unsigned int x, unsigned int a) { return ((x + (a - 1)) & ~(a - 1)); }
+ static bool is_aligned(unsigned int x, unsigned int a) { return (0 == x % a); }
+
+ inline void emit_16(int x);
+ inline void emit_32(int x);
+ inline void emit_48(long x);
+
+ // Compare and control flow instructions
+ // =====================================
+
+ // See also commodity routines compare64_and_branch(), compare32_and_branch().
+
+ // compare instructions
+ // compare register
+ inline void z_cr( Register r1, Register r2); // compare (r1, r2) ; int32
+ inline void z_cgr( Register r1, Register r2); // compare (r1, r2) ; int64
+ inline void z_cgfr(Register r1, Register r2); // compare (r1, r2) ; int64 <--> int32
+ // compare immediate
+ inline void z_chi( Register r1, int64_t i2); // compare (r1, i2_imm16) ; int32
+ inline void z_cfi( Register r1, int64_t i2); // compare (r1, i2_imm32) ; int32
+ inline void z_cghi(Register r1, int64_t i2); // compare (r1, i2_imm16) ; int64
+ inline void z_cgfi(Register r1, int64_t i2); // compare (r1, i2_imm32) ; int64
+ // compare memory
+ inline void z_ch( Register r1, const Address &a); // compare (r1, *(a)) ; int32 <--> int16
+ inline void z_ch( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32 <--> int16
+ inline void z_c( Register r1, const Address &a); // compare (r1, *(a)) ; int32
+ inline void z_c( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32
+ inline void z_cy( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; int32
+ inline void z_cy( Register r1, int64_t d2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; int32
+ inline void z_cy( Register r1, const Address& a); // compare (r1, *(a)) ; int32
+ //inline void z_cgf(Register r1,const Address &a); // compare (r1, *(a)) ; int64 <--> int32
+ //inline void z_cgf(Register r1,int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2)) ; int64 <--> int32
+ inline void z_cg( Register r1, const Address &a); // compare (r1, *(a)) ; int64
+ inline void z_cg( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm20+x2+b2)) ; int64
+
+ // compare logical instructions
+ // compare register
+ inline void z_clr( Register r1, Register r2); // compare (r1, r2) ; uint32
+ inline void z_clgr( Register r1, Register r2); // compare (r1, r2) ; uint64
+ // compare immediate
+ inline void z_clfi( Register r1, int64_t i2); // compare (r1, i2_uimm32) ; uint32
+ inline void z_clgfi(Register r1, int64_t i2); // compare (r1, i2_uimm32) ; uint64
+ inline void z_cl( Register r1, const Address &a); // compare (r1, *(a) ; uint32
+ inline void z_cl( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2) ; uint32
+ inline void z_cly( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm20+x2+b2)) ; uint32
+ inline void z_cly( Register r1, int64_t d2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; uint32
+ inline void z_cly( Register r1, const Address& a); // compare (r1, *(a)) ; uint32
+ inline void z_clg( Register r1, const Address &a); // compare (r1, *(a) ; uint64
+ inline void z_clg( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_imm20+x2+b2) ; uint64
+
+ // test under mask
+ inline void z_tmll(Register r1, int64_t i2); // test under mask, see docu
+ inline void z_tmlh(Register r1, int64_t i2); // test under mask, see docu
+ inline void z_tmhl(Register r1, int64_t i2); // test under mask, see docu
+ inline void z_tmhh(Register r1, int64_t i2); // test under mask, see docu
+
+ // branch instructions
+ inline void z_bc( branch_condition m1, int64_t d2, Register x2, Register b2);// branch m1 ? pc = (d2_uimm12+x2+b2)
+ inline void z_bcr( branch_condition m1, Register r2); // branch (m1 && r2!=R0) ? pc = r2
+ inline void z_brc( branch_condition i1, int64_t i2); // branch i1 ? pc = pc + i2_imm16
+ inline void z_brc( branch_condition i1, address a); // branch i1 ? pc = a
+ inline void z_brc( branch_condition i1, Label& L); // branch i1 ? pc = Label
+ //inline void z_brcl(branch_condition i1, int64_t i2); // branch i1 ? pc = pc + i2_imm32
+ inline void z_brcl(branch_condition i1, address a); // branch i1 ? pc = a
+ inline void z_brcl(branch_condition i1, Label& L); // branch i1 ? pc = Label
+ inline void z_bctgr(Register r1, Register r2); // branch on count r1 -= 1; (r1!=0) ? pc = r2 ; r1 is int64
+
+ // branch unconditional / always
+ inline void z_br(Register r2); // branch to r2, nop if r2 == Z_R0
+
+
+ // See also commodity routines compare64_and_branch(), compare32_and_branch().
+ // signed comparison and branch
+ inline void z_crb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4 ; int32 -- z10
+ inline void z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4 ; int64 -- z10
+ inline void z_crj( Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; int32 -- z10
+ inline void z_crj( Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; int32 -- z10
+ inline void z_cgrj(Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; int64 -- z10
+ inline void z_cgrj(Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; int64 -- z10
+ inline void z_cib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_imm8) ? goto b4+d4 ; int32 -- z10
+ inline void z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_imm8) ? goto b4+d4 ; int64 -- z10
+ inline void z_cij( Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_imm8) ? goto L ; int32 -- z10
+ inline void z_cij( Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int32 -- z10
+ inline void z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_imm8) ? goto L ; int64 -- z10
+ inline void z_cgij(Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int64 -- z10
+ // unsigned comparison and branch
+ inline void z_clrb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4 ; uint32 -- z10
+ inline void z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4 ; uint64 -- z10
+ inline void z_clrj( Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; uint32 -- z10
+ inline void z_clrj( Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; uint32 -- z10
+ inline void z_clgrj(Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; uint64 -- z10
+ inline void z_clgrj(Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; uint64 -- z10
+ inline void z_clib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4 ; uint32 -- z10
+ inline void z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4 ; uint64 -- z10
+ inline void z_clij( Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_uimm8) ? goto L ; uint32 -- z10
+ inline void z_clij( Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint32 -- z10
+ inline void z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_uimm8) ? goto L ; uint64 -- z10
+ inline void z_clgij(Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint64 -- z10
+
+ // Compare and trap instructions.
+ // signed comparison
+ inline void z_crt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; int32 -- z10
+ inline void z_cgrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; int64 -- z10
+ inline void z_cit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_imm16) ? trap ; int32 -- z10
+ inline void z_cgit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_imm16) ? trap ; int64 -- z10
+ // unsigned comparison
+ inline void z_clrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; uint32 -- z10
+ inline void z_clgrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; uint64 -- z10
+ inline void z_clfit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_uimm16) ? trap ; uint32 -- z10
+ inline void z_clgit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_uimm16) ? trap ; uint64 -- z10
+
+ inline void z_illtrap();
+ inline void z_illtrap(int id);
+ inline void z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern);
+
+
+ // load address, add for addresses
+ // ===============================
+
+ // The versions without suffix z assert that the base reg is != Z_R0.
+ // Z_R0 is interpreted as constant '0'. The variants with Address operand
+ // check this automatically, so no two versions are needed.
+ inline void z_layz(Register r1, int64_t d2, Register x2, Register b2); // Special version. Allows Z_R0 as base reg.
+ inline void z_lay(Register r1, const Address &a); // r1 = a
+ inline void z_lay(Register r1, int64_t d2, Register x2, Register b2); // r1 = d2_imm20+x2+b2
+ inline void z_laz(Register r1, int64_t d2, Register x2, Register b2); // Special version. Allows Z_R0 as base reg.
+ inline void z_la(Register r1, const Address &a); // r1 = a ; unsigned immediate!
+ inline void z_la(Register r1, int64_t d2, Register x2, Register b2); // r1 = d2_uimm12+x2+b2 ; unsigned immediate!
+ inline void z_larl(Register r1, int64_t i2); // r1 = pc + i2_imm32<<1;
+ inline void z_larl(Register r1, address a2); // r1 = pc + i2_imm32<<1;
+
+ // Load instructions for integers
+ // ==============================
+
+ // Address as base + index + offset
+ inline void z_lb( Register r1, const Address &a); // load r1 = *(a) ; int32 <- int8
+ inline void z_lb( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int8
+ inline void z_lh( Register r1, const Address &a); // load r1 = *(a) ; int32 <- int16
+ inline void z_lh( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32 <- int16
+ inline void z_lhy(Register r1, const Address &a); // load r1 = *(a) ; int32 <- int16
+ inline void z_lhy(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int16
+ inline void z_l( Register r1, const Address& a); // load r1 = *(a) ; int32
+ inline void z_l( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32
+ inline void z_ly( Register r1, const Address& a); // load r1 = *(a) ; int32
+ inline void z_ly( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32
+
+ inline void z_lgb(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int8
+ inline void z_lgb(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int8
+ inline void z_lgh(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int16
+ inline void z_lgh(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm12+x2+b2) ; int64 <- int16
+ inline void z_lgf(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int32
+ inline void z_lgf(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int32
+ inline void z_lg( Register r1, const Address& a); // load r1 = *(a) ; int64 <- int64
+ inline void z_lg( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int64
+
+ // load and test
+ inline void z_lt( Register r1, const Address &a); // load and test r1 = *(a) ; int32
+ inline void z_lt( Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int32
+ inline void z_ltg( Register r1, const Address &a); // load and test r1 = *(a) ; int64
+ inline void z_ltg( Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64
+ inline void z_ltgf(Register r1, const Address &a); // load and test r1 = *(a) ; int64 <- int32
+ inline void z_ltgf(Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64 <- int32
+
+ // load unsigned integer - zero extended
+ inline void z_llc( Register r1, const Address& a); // load r1 = *(a) ; uint32 <- uint8
+ inline void z_llc( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint8
+ inline void z_llh( Register r1, const Address& a); // load r1 = *(a) ; uint32 <- uint16
+ inline void z_llh( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint16
+ inline void z_llgc(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint8
+ inline void z_llgc(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint8
+ inline void z_llgc( Register r1, int64_t d2, Register b2); // load r1 = *(d2_imm20+b2) ; uint64 <- uint8
+ inline void z_llgh(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint16
+ inline void z_llgh(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint16
+ inline void z_llgf(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint32
+ inline void z_llgf(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint32
+
+ // pc relative addressing
+ inline void z_lhrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int32 <- int16 -- z10
+ inline void z_lrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int32 -- z10
+ inline void z_lghrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 <- int16 -- z10
+ inline void z_lgfrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 <- int32 -- z10
+ inline void z_lgrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 -- z10
+
+ inline void z_llhrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint32 <- uint16 -- z10
+ inline void z_llghrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint16 -- z10
+ inline void z_llgfrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint32 -- z10
+
+ // Store instructions for integers
+ // ===============================
+
+ // Address as base + index + offset
+ inline void z_stc( Register r1, const Address &d); // store *(a) = r1 ; int8
+ inline void z_stc( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int8
+ inline void z_stcy(Register r1, const Address &d); // store *(a) = r1 ; int8
+ inline void z_stcy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int8
+ inline void z_sth( Register r1, const Address &d); // store *(a) = r1 ; int16
+ inline void z_sth( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int16
+ inline void z_sthy(Register r1, const Address &d); // store *(a) = r1 ; int16
+ inline void z_sthy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int16
+ inline void z_st( Register r1, const Address &d); // store *(a) = r1 ; int32
+ inline void z_st( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int32
+ inline void z_sty( Register r1, const Address &d); // store *(a) = r1 ; int32
+ inline void z_sty( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int32
+ inline void z_stg( Register r1, const Address &d); // store *(a) = r1 ; int64
+ inline void z_stg( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int64
+
+ inline void z_stcm( Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+ inline void z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+ inline void z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask
+
+ // pc relative addressing
+ inline void z_sthrl(Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int16 -- z10
+ inline void z_strl( Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int32 -- z10
+ inline void z_stgrl(Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int64 -- z10
+
+
+ // Load and store immediates
+ // =========================
+
+ // load immediate
+ inline void z_lhi( Register r1, int64_t i2); // r1 = i2_imm16 ; int32 <- int16
+ inline void z_lghi(Register r1, int64_t i2); // r1 = i2_imm16 ; int64 <- int16
+ inline void z_lgfi(Register r1, int64_t i2); // r1 = i2_imm32 ; int64 <- int32
+
+ inline void z_llihf(Register r1, int64_t i2); // r1 = i2_imm32 ; uint64 <- (uint32<<32)
+ inline void z_llilf(Register r1, int64_t i2); // r1 = i2_imm32 ; uint64 <- uint32
+ inline void z_llihh(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<48)
+ inline void z_llihl(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<32)
+ inline void z_llilh(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<16)
+ inline void z_llill(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- uint16
+
+ // insert immediate
+ inline void z_ic( Register r1, int64_t d2, Register x2, Register b2); // insert character
+ inline void z_icy( Register r1, int64_t d2, Register x2, Register b2); // insert character
+ inline void z_icm( Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask
+ inline void z_icmy(Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask
+ inline void z_icmh(Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask
+
+ inline void z_iihh(Register r1, int64_t i2); // insert immediate r1[ 0-15] = i2_imm16
+ inline void z_iihl(Register r1, int64_t i2); // insert immediate r1[16-31] = i2_imm16
+ inline void z_iilh(Register r1, int64_t i2); // insert immediate r1[32-47] = i2_imm16
+ inline void z_iill(Register r1, int64_t i2); // insert immediate r1[48-63] = i2_imm16
+ inline void z_iihf(Register r1, int64_t i2); // insert immediate r1[32-63] = i2_imm32
+ inline void z_iilf(Register r1, int64_t i2); // insert immediate r1[ 0-31] = i2_imm32
+
+ // store immediate
+ inline void z_mvhhi(const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int16
+ inline void z_mvhhi(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int16
+ inline void z_mvhi( const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int32
+ inline void z_mvhi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int32
+ inline void z_mvghi(const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int64
+ inline void z_mvghi(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int64
+
+ // Move and Convert instructions
+ // =============================
+
+ // move, sign extend
+ inline void z_lbr(Register r1, Register r2); // move r1 = r2 ; int32 <- int8
+ inline void z_lhr( Register r1, Register r2); // move r1 = r2 ; int32 <- int16
+ inline void z_lr(Register r1, Register r2); // move r1 = r2 ; int32, no sign extension
+ inline void z_lgbr(Register r1, Register r2); // move r1 = r2 ; int64 <- int8
+ inline void z_lghr(Register r1, Register r2); // move r1 = r2 ; int64 <- int16
+ inline void z_lgfr(Register r1, Register r2); // move r1 = r2 ; int64 <- int32
+ inline void z_lgr(Register r1, Register r2); // move r1 = r2 ; int64
+ // move, zero extend
+ inline void z_llhr( Register r1, Register r2); // move r1 = r2 ; uint32 <- uint16
+ inline void z_llgcr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint8
+ inline void z_llghr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint16
+ inline void z_llgfr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint32
+
+ // move and test register
+ inline void z_ltr(Register r1, Register r2); // load/move and test r1 = r2; int32
+ inline void z_ltgr(Register r1, Register r2); // load/move and test r1 = r2; int64
+ inline void z_ltgfr(Register r1, Register r2); // load/move and test r1 = r2; int64 <-- int32
+
+ // move and byte-reverse
+ inline void z_lrvr( Register r1, Register r2); // move and reverse byte order r1 = r2; int32
+ inline void z_lrvgr(Register r1, Register r2); // move and reverse byte order r1 = r2; int64
+
+
+ // Arithmetic instructions (Integer only)
+ // ======================================
+ // For float arithmetic instructions scroll further down
+ // Add logical differs in the condition codes set!
+
+ // add registers
+ inline void z_ar( Register r1, Register r2); // add r1 = r1 + r2 ; int32
+ inline void z_agr( Register r1, Register r2); // add r1 = r1 + r2 ; int64
+ inline void z_agfr( Register r1, Register r2); // add r1 = r1 + r2 ; int64 <- int32
+ inline void z_ark( Register r1, Register r2, Register r3); // add r1 = r2 + r3 ; int32
+ inline void z_agrk( Register r1, Register r2, Register r3); // add r1 = r2 + r3 ; int64
+
+ inline void z_alr( Register r1, Register r2); // add logical r1 = r1 + r2 ; int32
+ inline void z_algr( Register r1, Register r2); // add logical r1 = r1 + r2 ; int64
+ inline void z_algfr(Register r1, Register r2); // add logical r1 = r1 + r2 ; int64 <- int32
+ inline void z_alrk( Register r1, Register r2, Register r3); // add logical r1 = r2 + r3 ; int32
+ inline void z_algrk(Register r1, Register r2, Register r3); // add logical r1 = r2 + r3 ; int64
+ inline void z_alcgr(Register r1, Register r2); // add logical with carry r1 = r1 + r2 + c ; int64
+
+ // add immediate
+ inline void z_ahi( Register r1, int64_t i2); // add r1 = r1 + i2_imm16 ; int32
+ inline void z_afi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32
+ inline void z_alfi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32
+ inline void z_aghi( Register r1, int64_t i2); // add logical r1 = r1 + i2_imm16 ; int64
+ inline void z_agfi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int64
+ inline void z_algfi(Register r1, int64_t i2); // add logical r1 = r1 + i2_imm32 ; int64
+ inline void z_ahik( Register r1, Register r3, int64_t i2); // add r1 = r3 + i2_imm16 ; int32
+ inline void z_aghik(Register r1, Register r3, int64_t i2); // add r1 = r3 + i2_imm16 ; int64
+ inline void z_aih( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32 (HiWord)
+
+ // add memory
+ inline void z_a( Register r1, int64_t d2, Register x2, Register b2); // add r1 = r1 + *(d2_uimm12+s2+b2) ; int32
+ inline void z_ay( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2) ; int32
+ inline void z_ag( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2) ; int64
+ inline void z_agf( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64 <- int32
+ inline void z_al( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_uimm12+x2+b2) ; int32
+ inline void z_aly( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int32
+ inline void z_alg( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64
+ inline void z_algf(Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64 <- int32
+ inline void z_a( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32
+ inline void z_ay( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32
+ inline void z_al( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32
+ inline void z_aly( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32
+ inline void z_ag( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64
+ inline void z_agf( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 <- int32
+ inline void z_alg( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64
+ inline void z_algf(Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 <- int32
+
+
+ inline void z_alhsik( Register r1, Register r3, int64_t i2); // add logical r1 = r3 + i2_imm16 ; int32
+ inline void z_alghsik(Register r1, Register r3, int64_t i2); // add logical r1 = r3 + i2_imm16 ; int64
+
+ inline void z_asi( int64_t d1, Register b1, int64_t i2); // add *(d1_imm20+b1) += i2_imm8 ; int32 -- z10
+ inline void z_agsi( int64_t d1, Register b1, int64_t i2); // add *(d1_imm20+b1) += i2_imm8 ; int64 -- z10
+ inline void z_alsi( int64_t d1, Register b1, int64_t i2); // add logical *(d1_imm20+b1) += i2_imm8 ; uint32 -- z10
+ inline void z_algsi(int64_t d1, Register b1, int64_t i2); // add logical *(d1_imm20+b1) += i2_imm8 ; uint64 -- z10
+ inline void z_asi( const Address& d, int64_t i2); // add *(d) += i2_imm8 ; int32 -- z10
+ inline void z_agsi( const Address& d, int64_t i2); // add *(d) += i2_imm8 ; int64 -- z10
+ inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10
+ inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10
+
+ // negate
+ inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32
+ inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64
+ inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32
+ inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32
+ inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64
+ inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32
+
+ // subtract intstructions
+ // sub registers
+ inline void z_sr( Register r1, Register r2); // sub r1 = r1 - r2 ; int32
+ inline void z_sgr( Register r1, Register r2); // sub r1 = r1 - r2 ; int64
+ inline void z_sgfr( Register r1, Register r2); // sub r1 = r1 - r2 ; int64 <- int32
+ inline void z_srk( Register r1, Register r2, Register r3); // sub r1 = r2 - r3 ; int32
+ inline void z_sgrk( Register r1, Register r2, Register r3); // sub r1 = r2 - r3 ; int64
+
+ inline void z_slr( Register r1, Register r2); // sub logical r1 = r1 - r2 ; int32
+ inline void z_slgr( Register r1, Register r2); // sub logical r1 = r1 - r2 ; int64
+ inline void z_slgfr(Register r1, Register r2); // sub logical r1 = r1 - r2 ; int64 <- int32
+ inline void z_slrk( Register r1, Register r2, Register r3); // sub logical r1 = r2 - r3 ; int32
+ inline void z_slgrk(Register r1, Register r2, Register r3); // sub logical r1 = r2 - r3 ; int64
+ inline void z_slfi( Register r1, int64_t i2); // sub logical r1 = r1 - i2_uimm32 ; int32
+ inline void z_slgfi(Register r1, int64_t i2); // add logical r1 = r1 - i2_uimm32 ; int64
+
+ // sub memory
+ inline void z_s( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32
+ inline void z_sy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 + *(d2_imm20+s2+b2) ; int32
+ inline void z_sg( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64
+ inline void z_sgf( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64 - int32
+ inline void z_slg( Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64
+ inline void z_slgf(Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64 - uint32
+ inline void z_s( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
+ inline void z_sy( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
+ inline void z_sg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64
+ inline void z_sgf( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64 - int32
+ inline void z_slg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; uint64
+ inline void z_slgf(Register r1, const Address& a); // sub r1 = r1 - *(a) ; uint64 - uint32
+
+ inline void z_sh( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16
+ inline void z_shy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16
+ inline void z_sh( Register r1, const Address &a); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16
+ inline void z_shy( Register r1, const Address &a); // sub r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16
+
+ // Multiplication instructions
+ // mul registers
+ inline void z_msr( Register r1, Register r2); // mul r1 = r1 * r2 ; int32
+ inline void z_msgr( Register r1, Register r2); // mul r1 = r1 * r2 ; int64
+ inline void z_msgfr(Register r1, Register r2); // mul r1 = r1 * r2 ; int64 <- int32
+ inline void z_mlr( Register r1, Register r2); // mul r1 = r1 * r2 ; int32 unsigned
+ inline void z_mlgr( Register r1, Register r2); // mul r1 = r1 * r2 ; int64 unsigned
+ // mul register - memory
+ inline void z_mhy( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_msy( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_msg( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_msgf(Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_ml( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_mlg( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2)
+ inline void z_mhy( Register r1, const Address& a); // mul r1 = r1 * *(a)
+ inline void z_msy( Register r1, const Address& a); // mul r1 = r1 * *(a)
+ inline void z_msg( Register r1, const Address& a); // mul r1 = r1 * *(a)
+ inline void z_msgf(Register r1, const Address& a); // mul r1 = r1 * *(a)
+ inline void z_ml( Register r1, const Address& a); // mul r1 = r1 * *(a)
+ inline void z_mlg( Register r1, const Address& a); // mul r1 = r1 * *(a)
+
+ inline void z_msfi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm32; int32 -- z10
+ inline void z_msgfi(Register r1, int64_t i2); // mult r1 = r1 * i2_imm32; int64 -- z10
+ inline void z_mhi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm16; int32
+ inline void z_mghi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm16; int64
+
+ // Division instructions
+ inline void z_dsgr( Register r1, Register r2); // div r1 = r1 / r2 ; int64/int32 needs reg pair!
+ inline void z_dsgfr(Register r1, Register r2); // div r1 = r1 / r2 ; int64/int32 needs reg pair!
+
+
+ // Logic instructions
+ // ===================
+
+ // and
+ inline void z_n( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_ny( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_ng( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_n( Register r1, const Address& a);
+ inline void z_ny( Register r1, const Address& a);
+ inline void z_ng( Register r1, const Address& a);
+
+ inline void z_nr( Register r1, Register r2); // and r1 = r1 & r2 ; int32
+ inline void z_ngr( Register r1, Register r2); // and r1 = r1 & r2 ; int64
+ inline void z_nrk( Register r1, Register r2, Register r3); // and r1 = r2 & r3 ; int32
+ inline void z_ngrk(Register r1, Register r2, Register r3); // and r1 = r2 & r3 ; int64
+
+ inline void z_nihh(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 0-15
+ inline void z_nihl(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 16-31
+ inline void z_nilh(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 32-47
+ inline void z_nill(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 48-63
+ inline void z_nihf(Register r1, int64_t i2); // and r1 = r1 & i2_imm32 ; and only for bits 0-31
+ inline void z_nilf(Register r1, int64_t i2); // and r1 = r1 & i2_imm32 ; and only for bits 32-63 see also MacroAssembler::nilf.
+
+ // or
+ inline void z_o( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_oy( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_og( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_o( Register r1, const Address& a);
+ inline void z_oy( Register r1, const Address& a);
+ inline void z_og( Register r1, const Address& a);
+
+ inline void z_or( Register r1, Register r2); // or r1 = r1 | r2; int32
+ inline void z_ogr( Register r1, Register r2); // or r1 = r1 | r2; int64
+ inline void z_ork( Register r1, Register r2, Register r3); // or r1 = r2 | r3 ; int32
+ inline void z_ogrk(Register r1, Register r2, Register r3); // or r1 = r2 | r3 ; int64
+
+ inline void z_oihh(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 0-15
+ inline void z_oihl(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 16-31
+ inline void z_oilh(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 32-47
+ inline void z_oill(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 48-63
+ inline void z_oihf(Register r1, int64_t i2); // or r1 = r1 | i2_imm32 ; or only for bits 0-31
+ inline void z_oilf(Register r1, int64_t i2); // or r1 = r1 | i2_imm32 ; or only for bits 32-63
+
+ // xor
+ inline void z_x( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_xy( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_xg( Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_x( Register r1, const Address& a);
+ inline void z_xy( Register r1, const Address& a);
+ inline void z_xg( Register r1, const Address& a);
+
+ inline void z_xr( Register r1, Register r2); // xor r1 = r1 ^ r2 ; int32
+ inline void z_xgr( Register r1, Register r2); // xor r1 = r1 ^ r2 ; int64
+ inline void z_xrk( Register r1, Register r2, Register r3); // xor r1 = r2 ^ r3 ; int32
+ inline void z_xgrk(Register r1, Register r2, Register r3); // xor r1 = r2 ^ r3 ; int64
+
+ inline void z_xihf(Register r1, int64_t i2); // xor r1 = r1 ^ i2_imm32 ; or only for bits 0-31
+ inline void z_xilf(Register r1, int64_t i2); // xor r1 = r1 ^ i2_imm32 ; or only for bits 32-63
+
+ // shift
+ inline void z_sla( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
+ inline void z_slag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved!
+ inline void z_sra( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended
+ inline void z_srag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended
+ inline void z_sll( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added
+ inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added
+ inline void z_srl( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended
+ inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended
+
+ // rotate
+ inline void z_rll( Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int32 -- z10
+ inline void z_rllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int64 -- z10
+
+ // rotate the AND/XOR/OR/insert
+ inline void z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then AND selected bits -- z196
+ inline void z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then XOR selected bits -- z196
+ inline void z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then OR selected bits -- z196
+ inline void z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest = false); // rotate then INS selected bits -- z196
+
+
+ // memory-immediate instructions (8-bit immediate)
+ // ===============================================
+
+ inline void z_cli( int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8 ; int8
+ inline void z_mvi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm8 ; int8
+ inline void z_tm( int64_t d1, Register b1, int64_t i2); // test *(d1_imm12+b1) against mask i2_imm8 ; int8
+ inline void z_ni( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) &= i2_imm8 ; int8
+ inline void z_oi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) |= i2_imm8 ; int8
+ inline void z_xi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) ^= i2_imm8 ; int8
+ inline void z_cliy(int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8 ; int8
+ inline void z_mviy(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm8 ; int8
+ inline void z_tmy( int64_t d1, Register b1, int64_t i2); // test *(d1_imm12+b1) against mask i2_imm8 ; int8
+ inline void z_niy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) &= i2_imm8 ; int8
+ inline void z_oiy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) |= i2_imm8 ; int8
+ inline void z_xiy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) ^= i2_imm8 ; int8
+ inline void z_cli( const Address& a, int64_t imm8); // compare *(a) ^= imm8 ; int8
+ inline void z_mvi( const Address& a, int64_t imm8); // store *(a) = imm8 ; int8
+ inline void z_tm( const Address& a, int64_t imm8); // test *(a) against mask imm8 ; int8
+ inline void z_ni( const Address& a, int64_t imm8); // store *(a) &= imm8 ; int8
+ inline void z_oi( const Address& a, int64_t imm8); // store *(a) |= imm8 ; int8
+ inline void z_xi( const Address& a, int64_t imm8); // store *(a) ^= imm8 ; int8
+ inline void z_cliy(const Address& a, int64_t imm8); // compare *(a) ^= imm8 ; int8
+ inline void z_mviy(const Address& a, int64_t imm8); // store *(a) = imm8 ; int8
+ inline void z_tmy( const Address& a, int64_t imm8); // test *(a) against mask imm8 ; int8
+ inline void z_niy( const Address& a, int64_t imm8); // store *(a) &= imm8 ; int8
+ inline void z_oiy( const Address& a, int64_t imm8); // store *(a) |= imm8 ; int8
+ inline void z_xiy( const Address& a, int64_t imm8); // store *(a) ^= imm8 ; int8
+
+
+ //------------------------------
+ // Interlocked-Update
+ //------------------------------
+ inline void z_laa( Register r1, Register r3, int64_t d2, Register b2); // load and add int32, signed -- z196
+ inline void z_laag( Register r1, Register r3, int64_t d2, Register b2); // load and add int64, signed -- z196
+ inline void z_laal( Register r1, Register r3, int64_t d2, Register b2); // load and add int32, unsigned -- z196
+ inline void z_laalg(Register r1, Register r3, int64_t d2, Register b2); // load and add int64, unsigned -- z196
+ inline void z_lan( Register r1, Register r3, int64_t d2, Register b2); // load and and int32 -- z196
+ inline void z_lang( Register r1, Register r3, int64_t d2, Register b2); // load and and int64 -- z196
+ inline void z_lax( Register r1, Register r3, int64_t d2, Register b2); // load and xor int32 -- z196
+ inline void z_laxg( Register r1, Register r3, int64_t d2, Register b2); // load and xor int64 -- z196
+ inline void z_lao( Register r1, Register r3, int64_t d2, Register b2); // load and or int32 -- z196
+ inline void z_laog( Register r1, Register r3, int64_t d2, Register b2); // load and or int64 -- z196
+
+ inline void z_laa( Register r1, Register r3, const Address& a); // load and add int32, signed -- z196
+ inline void z_laag( Register r1, Register r3, const Address& a); // load and add int64, signed -- z196
+ inline void z_laal( Register r1, Register r3, const Address& a); // load and add int32, unsigned -- z196
+ inline void z_laalg(Register r1, Register r3, const Address& a); // load and add int64, unsigned -- z196
+ inline void z_lan( Register r1, Register r3, const Address& a); // load and and int32 -- z196
+ inline void z_lang( Register r1, Register r3, const Address& a); // load and and int64 -- z196
+ inline void z_lax( Register r1, Register r3, const Address& a); // load and xor int32 -- z196
+ inline void z_laxg( Register r1, Register r3, const Address& a); // load and xor int64 -- z196
+ inline void z_lao( Register r1, Register r3, const Address& a); // load and or int32 -- z196
+ inline void z_laog( Register r1, Register r3, const Address& a); // load and or int64 -- z196
+
+ //--------------------------------
+ // Execution Prediction
+ //--------------------------------
+ inline void z_pfd( int64_t m1, int64_t d2, Register x2, Register b2); // prefetch
+ inline void z_pfd( int64_t m1, Address a);
+ inline void z_pfdrl(int64_t m1, int64_t i2); // prefetch
+ inline void z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3); // branch prediction -- EC12
+ inline void z_bprp( int64_t m1, int64_t i2, int64_t i3); // branch prediction -- EC12
+
+ //-------------------------------
+ // Transaction Control
+ //-------------------------------
+ inline void z_tbegin(int64_t d1, Register b1, int64_t i2); // begin transaction -- EC12
+ inline void z_tbeginc(int64_t d1, Register b1, int64_t i2); // begin transaction (constrained) -- EC12
+ inline void z_tend(); // end transaction -- EC12
+ inline void z_tabort(int64_t d2, Register b2); // abort transaction -- EC12
+ inline void z_etnd(Register r1); // extract tx nesting depth -- EC12
+ inline void z_ppa(Register r1, Register r2, int64_t m3); // perform processor assist -- EC12
+
+ //---------------------------------
+ // Conditional Execution
+ //---------------------------------
+ inline void z_locr( Register r1, Register r2, branch_condition cc); // if (cc) load r1 = r2 ; int32 -- z196
+ inline void z_locgr(Register r1, Register r2, branch_condition cc); // if (cc) load r1 = r2 ; int64 -- z196
+ inline void z_loc( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2) ; int32 -- z196
+ inline void z_locg( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2) ; int64 -- z196
+ inline void z_loc( Register r1, const Address& a, branch_condition cc); // if (cc) load r1 = *(a) ; int32 -- z196
+ inline void z_locg( Register r1, const Address& a, branch_condition cc); // if (cc) load r1 = *(a) ; int64 -- z196
+ inline void z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int32 -- z196
+ inline void z_stocg(Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int64 -- z196
+
+
+ // Complex CISC instructions
+ // ==========================
+
+ inline void z_cksm(Register r1, Register r2); // checksum. This is NOT CRC32
+ inline void z_km( Register r1, Register r2); // cipher message
+ inline void z_kmc( Register r1, Register r2); // cipher message with chaining
+ inline void z_kimd(Register r1, Register r2); // msg digest (SHA)
+ inline void z_klmd(Register r1, Register r2); // msg digest (SHA)
+ inline void z_kmac(Register r1, Register r2); // msg authentication code
+
+ inline void z_ex(Register r1, int64_t d2, Register x2, Register b2);// execute
+ inline void z_exrl(Register r1, int64_t i2); // execute relative long -- z10
+ inline void z_exrl(Register r1, address a2); // execute relative long -- z10
+
+ inline void z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3); // extract cpu time
+ inline void z_ecag(Register r1, Register r3, int64_t d2, Register b2); // extract CPU attribute
+
+ inline void z_srst(Register r1, Register r2); // search string
+ inline void z_srstu(Register r1, Register r2); // search string unicode
+
+ inline void z_mvc(const Address& d, const Address& s, int64_t l); // move l bytes
+ inline void z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
+ inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // move region of memory
+
+ inline void z_stfle(int64_t d2, Register b2); // store facility list extended
+
+ inline void z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// and *(d1+b1) = *(d1+l+b1) & *(d2+b2) ; d1, d2: uimm12, ands l+1 bytes
+ inline void z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// or *(d1+b1) = *(d1+l+b1) | *(d2+b2) ; d1, d2: uimm12, ors l+1 bytes
+ inline void z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// xor *(d1+b1) = *(d1+l+b1) ^ *(d2+b2) ; d1, d2: uimm12, xors l+1 bytes
+ inline void z_nc(Address dst, int64_t len, Address src2); // and *dst = *dst & *src2, ands len bytes in memory
+ inline void z_oc(Address dst, int64_t len, Address src2); // or *dst = *dst | *src2, ors len bytes in memory
+ inline void z_xc(Address dst, int64_t len, Address src2); // xor *dst = *dst ^ *src2, xors len bytes in memory
+
+ // compare instructions
+ inline void z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // compare (*(d1_uimm12+b1), *(d1_uimm12+b1)) ; compare l bytes
+ inline void z_clcle(Register r1, Register r3, int64_t d2, Register b2); // compare logical long extended, see docu
+ inline void z_clclu(Register r1, Register r3, int64_t d2, Register b2); // compare logical long unicode, see docu
+
+ // Translate characters
+ inline void z_troo(Register r1, Register r2, int64_t m3);
+ inline void z_trot(Register r1, Register r2, int64_t m3);
+ inline void z_trto(Register r1, Register r2, int64_t m3);
+ inline void z_trtt(Register r1, Register r2, int64_t m3);
+
+
+ // Floatingpoint instructions
+ // ==========================
+
+ // compare instructions
+ inline void z_cebr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; float
+ inline void z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm12+x2+b2)) ; float
+ inline void z_ceb(FloatRegister r1, const Address &a); // compare (r1, *(d2_imm12+x2+b2)) ; float
+ inline void z_cdbr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; double
+ inline void z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm12+x2+b2)) ; double
+ inline void z_cdb(FloatRegister r1, const Address &a); // compare (r1, *(d2_imm12+x2+b2)) ; double
+
+ // load instructions
+ inline void z_le( FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2) ; float
+ inline void z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; float
+ inline void z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2) ; double
+ inline void z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; double
+ inline void z_le( FloatRegister r1, const Address &a); // load r1 = *(a) ; float
+ inline void z_ley(FloatRegister r1, const Address &a); // load r1 = *(a) ; float
+ inline void z_ld( FloatRegister r1, const Address &a); // load r1 = *(a) ; double
+ inline void z_ldy(FloatRegister r1, const Address &a); // load r1 = *(a) ; double
+
+ // store instructions
+ inline void z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; float
+ inline void z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; float
+ inline void z_std( FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; double
+ inline void z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; double
+ inline void z_ste( FloatRegister r1, const Address &a); // store *(a) = r1 ; float
+ inline void z_stey(FloatRegister r1, const Address &a); // store *(a) = r1 ; float
+ inline void z_std( FloatRegister r1, const Address &a); // store *(a) = r1 ; double
+ inline void z_stdy(FloatRegister r1, const Address &a); // store *(a) = r1 ; double
+
+ // load and store immediates
+ inline void z_lzer(FloatRegister r1); // r1 = 0 ; single
+ inline void z_lzdr(FloatRegister r1); // r1 = 0 ; double
+
+ // Move and Convert instructions
+ inline void z_ler(FloatRegister r1, FloatRegister r2); // move r1 = r2 ; float
+ inline void z_ldr(FloatRegister r1, FloatRegister r2); // move r1 = r2 ; double
+ inline void z_ledbr(FloatRegister r1, FloatRegister r2); // conv / round r1 = r2 ; float <- double
+ inline void z_ldebr(FloatRegister r1, FloatRegister r2); // conv r1 = r2 ; double <- float
+
+ // move between integer and float registers
+ inline void z_cefbr( FloatRegister r1, Register r2); // r1 = r2; float <-- int32
+ inline void z_cdfbr( FloatRegister r1, Register r2); // r1 = r2; double <-- int32
+ inline void z_cegbr( FloatRegister r1, Register r2); // r1 = r2; float <-- int64
+ inline void z_cdgbr( FloatRegister r1, Register r2); // r1 = r2; double <-- int64
+
+ // rounding mode for float-2-int conversions
+ inline void z_cfebr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int32 <-- float
+ inline void z_cfdbr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int32 <-- double
+ inline void z_cgebr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int64 <-- float
+ inline void z_cgdbr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int64 <-- double
+
+ inline void z_ldgr(FloatRegister r1, Register r2); // fr1 = r2 ; what kind of conversion? -- z10
+ inline void z_lgdr(Register r1, FloatRegister r2); // r1 = fr2 ; what kind of conversion? -- z10
+
+
+ // ADD
+ inline void z_aebr(FloatRegister f1, FloatRegister f2); // f1 = f1 + f2 ; float
+ inline void z_adbr(FloatRegister f1, FloatRegister f2); // f1 = f1 + f2 ; double
+ inline void z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 + *(d2+x2+b2) ; float
+ inline void z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 + *(d2+x2+b2) ; double
+ inline void z_aeb( FloatRegister f1, const Address& a); // f1 = f1 + *(a) ; float
+ inline void z_adb( FloatRegister f1, const Address& a); // f1 = f1 + *(a) ; double
+
+ // SUB
+ inline void z_sebr(FloatRegister f1, FloatRegister f2); // f1 = f1 - f2 ; float
+ inline void z_sdbr(FloatRegister f1, FloatRegister f2); // f1 = f1 - f2 ; double
+ inline void z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 - *(d2+x2+b2) ; float
+ inline void z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 - *(d2+x2+b2) ; double
+ inline void z_seb( FloatRegister f1, const Address& a); // f1 = f1 - *(a) ; float
+ inline void z_sdb( FloatRegister f1, const Address& a); // f1 = f1 - *(a) ; double
+ // negate
+ inline void z_lcebr(FloatRegister r1, FloatRegister r2); // neg r1 = -r2 ; float
+ inline void z_lcdbr(FloatRegister r1, FloatRegister r2); // neg r1 = -r2 ; double
+
+ // Absolute value, monadic if fr2 == noreg.
+ inline void z_lpdbr( FloatRegister fr1, FloatRegister fr2 = fnoreg); // fr1 = |fr2|
+
+
+ // MUL
+ inline void z_meebr(FloatRegister f1, FloatRegister f2); // f1 = f1 * f2 ; float
+ inline void z_mdbr( FloatRegister f1, FloatRegister f2); // f1 = f1 * f2 ; double
+ inline void z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 * *(d2+x2+b2) ; float
+ inline void z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 * *(d2+x2+b2) ; double
+ inline void z_meeb( FloatRegister f1, const Address& a);
+ inline void z_mdb( FloatRegister f1, const Address& a);
+
+ // DIV
+ inline void z_debr( FloatRegister f1, FloatRegister f2); // f1 = f1 / f2 ; float
+ inline void z_ddbr( FloatRegister f1, FloatRegister f2); // f1 = f1 / f2 ; double
+ inline void z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 / *(d2+x2+b2) ; float
+ inline void z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 / *(d2+x2+b2) ; double
+ inline void z_deb( FloatRegister f1, const Address& a); // f1 = f1 / *(a) ; float
+ inline void z_ddb( FloatRegister f1, const Address& a); // f1 = f1 / *(a) ; double
+
+ // square root
+ inline void z_sqdbr(FloatRegister fr1, FloatRegister fr2); // fr1 = sqrt(fr2) ; double
+ inline void z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2); // fr1 = srqt( *(d2+x2+b2)
+ inline void z_sqdb( FloatRegister fr1, int64_t d2, Register b2); // fr1 = srqt( *(d2+b2)
+
+ // Nop instruction
+ // ===============
+
+ // branch never (nop)
+ inline void z_nop();
+
+ // ===============================================================================================
+
+ // Simplified emitters:
+ // ====================
+
+
+ // Some memory instructions without index register (just convenience).
+ inline void z_layz(Register r1, int64_t d2, Register b2 = Z_R0);
+ inline void z_lay(Register r1, int64_t d2, Register b2);
+ inline void z_laz(Register r1, int64_t d2, Register b2);
+ inline void z_la(Register r1, int64_t d2, Register b2);
+ inline void z_l(Register r1, int64_t d2, Register b2);
+ inline void z_ly(Register r1, int64_t d2, Register b2);
+ inline void z_lg(Register r1, int64_t d2, Register b2);
+ inline void z_st(Register r1, int64_t d2, Register b2);
+ inline void z_sty(Register r1, int64_t d2, Register b2);
+ inline void z_stg(Register r1, int64_t d2, Register b2);
+ inline void z_lgf(Register r1, int64_t d2, Register b2);
+ inline void z_lgh(Register r1, int64_t d2, Register b2);
+ inline void z_llgh(Register r1, int64_t d2, Register b2);
+ inline void z_llgf(Register r1, int64_t d2, Register b2);
+ inline void z_lgb(Register r1, int64_t d2, Register b2);
+ inline void z_cl( Register r1, int64_t d2, Register b2);
+ inline void z_c(Register r1, int64_t d2, Register b2);
+ inline void z_cg(Register r1, int64_t d2, Register b2);
+ inline void z_sh(Register r1, int64_t d2, Register b2);
+ inline void z_shy(Register r1, int64_t d2, Register b2);
+ inline void z_ste(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_std(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_stdy(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_stey(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_ld(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_ldy(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_le(FloatRegister r1, int64_t d2, Register b2);
+ inline void z_ley(FloatRegister r1, int64_t d2, Register b2);
+
+ inline void z_agf(Register r1, int64_t d2, Register b2);
+
+ inline void z_exrl(Register r1, Label& L);
+ inline void z_larl(Register r1, Label& L);
+ inline void z_bru( Label& L);
+ inline void z_brul(Label& L);
+ inline void z_brul(address a);
+ inline void z_brh( Label& L);
+ inline void z_brl( Label& L);
+ inline void z_bre( Label& L);
+ inline void z_brnh(Label& L);
+ inline void z_brnl(Label& L);
+ inline void z_brne(Label& L);
+ inline void z_brz( Label& L);
+ inline void z_brnz(Label& L);
+ inline void z_brnaz(Label& L);
+ inline void z_braz(Label& L);
+ inline void z_brnp(Label& L);
+
+ inline void z_btrue( Label& L);
+ inline void z_bfalse(Label& L);
+
+ inline void z_brno( Label& L);
+
+
+ inline void z_basr(Register r1, Register r2);
+ inline void z_brasl(Register r1, address a);
+ inline void z_brct(Register r1, address a);
+ inline void z_brct(Register r1, Label& L);
+
+ inline void z_brxh(Register r1, Register r3, address a);
+ inline void z_brxh(Register r1, Register r3, Label& L);
+
+ inline void z_brxle(Register r1, Register r3, address a);
+ inline void z_brxle(Register r1, Register r3, Label& L);
+
+ inline void z_brxhg(Register r1, Register r3, address a);
+ inline void z_brxhg(Register r1, Register r3, Label& L);
+
+ inline void z_brxlg(Register r1, Register r3, address a);
+ inline void z_brxlg(Register r1, Register r3, Label& L);
+
+ // Ppopulation count intrinsics.
+ inline void z_flogr(Register r1, Register r2); // find leftmost one
+ inline void z_popcnt(Register r1, Register r2); // population count
+ inline void z_ahhhr(Register r1, Register r2, Register r3); // ADD halfword high high
+ inline void z_ahhlr(Register r1, Register r2, Register r3); // ADD halfword high low
+
+ inline void z_tam();
+ inline void z_stck(int64_t d2, Register b2);
+ inline void z_stckf(int64_t d2, Register b2);
+ inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2);
+ inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2);
+
+ inline void z_cs( Register r1, Register r3, int64_t d2, Register b2);
+ inline void z_csy(Register r1, Register r3, int64_t d2, Register b2);
+ inline void z_csg(Register r1, Register r3, int64_t d2, Register b2);
+ inline void z_cs( Register r1, Register r3, const Address& a);
+ inline void z_csy(Register r1, Register r3, const Address& a);
+ inline void z_csg(Register r1, Register r3, const Address& a);
+
+ inline void z_cvd(Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_cvdg(Register r1, int64_t d2, Register x2, Register b2);
+ inline void z_cvd(Register r1, int64_t d2, Register b2);
+ inline void z_cvdg(Register r1, int64_t d2, Register b2);
+
+ // Instruction queries:
+ // instruction properties and recognize emitted instructions
+ // ===========================================================
+
+ static int nop_size() { return 2; }
+
+ static int z_brul_size() { return 6; }
+
+ static bool is_z_basr(short x) {
+ return (BASR_ZOPC == (x & BASR_MASK));
+ }
+ static bool is_z_algr(long x) {
+ return (ALGR_ZOPC == (x & RRE_MASK));
+ }
+ static bool is_z_lb(long x) {
+ return (LB_ZOPC == (x & LB_MASK));
+ }
+ static bool is_z_lh(int x) {
+ return (LH_ZOPC == (x & LH_MASK));
+ }
+ static bool is_z_l(int x) {
+ return (L_ZOPC == (x & L_MASK));
+ }
+ static bool is_z_lgr(long x) {
+ return (LGR_ZOPC == (x & RRE_MASK));
+ }
+ static bool is_z_ly(long x) {
+ return (LY_ZOPC == (x & LY_MASK));
+ }
+ static bool is_z_lg(long x) {
+ return (LG_ZOPC == (x & LG_MASK));
+ }
+ static bool is_z_llgh(long x) {
+ return (LLGH_ZOPC == (x & LLGH_MASK));
+ }
+ static bool is_z_llgf(long x) {
+ return (LLGF_ZOPC == (x & LLGF_MASK));
+ }
+ static bool is_z_le(int x) {
+ return (LE_ZOPC == (x & LE_MASK));
+ }
+ static bool is_z_ld(int x) {
+ return (LD_ZOPC == (x & LD_MASK));
+ }
+ static bool is_z_st(int x) {
+ return (ST_ZOPC == (x & ST_MASK));
+ }
+ static bool is_z_stc(int x) {
+ return (STC_ZOPC == (x & STC_MASK));
+ }
+ static bool is_z_stg(long x) {
+ return (STG_ZOPC == (x & STG_MASK));
+ }
+ static bool is_z_sth(int x) {
+ return (STH_ZOPC == (x & STH_MASK));
+ }
+ static bool is_z_ste(int x) {
+ return (STE_ZOPC == (x & STE_MASK));
+ }
+ static bool is_z_std(int x) {
+ return (STD_ZOPC == (x & STD_MASK));
+ }
+ static bool is_z_slag(long x) {
+ return (SLAG_ZOPC == (x & SLAG_MASK));
+ }
+ static bool is_z_tmy(long x) {
+ return (TMY_ZOPC == (x & TMY_MASK));
+ }
+ static bool is_z_tm(long x) {
+ return ((unsigned int)TM_ZOPC == (x & (unsigned int)TM_MASK));
+ }
+ static bool is_z_bcr(long x) {
+ return (BCR_ZOPC == (x & BCR_MASK));
+ }
+ static bool is_z_nop(long x) {
+ return is_z_bcr(x) && ((x & 0x00ff) == 0);
+ }
+ static bool is_z_nop(address x) {
+ return is_z_nop(* (short *) x);
+ }
+ static bool is_z_br(long x) {
+ return is_z_bcr(x) && ((x & 0x00f0) == 0x00f0);
+ }
+ static bool is_z_brc(long x, int cond) {
+ return ((unsigned int)BRC_ZOPC == (x & BRC_MASK)) && ((cond<<20) == (x & 0x00f00000U));
+ }
+ // Make use of lightweight sync.
+ static bool is_z_sync_full(long x) {
+ return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondFullSync) && ((x & 0x000f)==0x0000);
+ }
+ static bool is_z_sync_light(long x) {
+ return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondLightSync) && ((x & 0x000f)==0x0000);
+ }
+ static bool is_z_sync(long x) {
+ return is_z_sync_full(x) || is_z_sync_light(x);
+ }
+
+ static bool is_z_brasl(long x) {
+ return (BRASL_ZOPC == (x & BRASL_MASK));
+ }
+ static bool is_z_brasl(address a) {
+ long x = (*((long *)a))>>16;
+ return is_z_brasl(x);
+ }
+ static bool is_z_larl(long x) {
+ return (LARL_ZOPC == (x & LARL_MASK));
+ }
+ static bool is_z_lgrl(long x) {
+ return (LGRL_ZOPC == (x & LGRL_MASK));
+ }
+ static bool is_z_lgrl(address a) {
+ long x = (*((long *)a))>>16;
+ return is_z_lgrl(x);
+ }
+
+ static bool is_z_lghi(unsigned long x) {
+ return (unsigned int)LGHI_ZOPC == (x & (unsigned int)LGHI_MASK);
+ }
+
+ static bool is_z_llill(unsigned long x) {
+ return (unsigned int)LLILL_ZOPC == (x & (unsigned int)LLI_MASK);
+ }
+ static bool is_z_llilh(unsigned long x) {
+ return (unsigned int)LLILH_ZOPC == (x & (unsigned int)LLI_MASK);
+ }
+ static bool is_z_llihl(unsigned long x) {
+ return (unsigned int)LLIHL_ZOPC == (x & (unsigned int)LLI_MASK);
+ }
+ static bool is_z_llihh(unsigned long x) {
+ return (unsigned int)LLIHH_ZOPC == (x & (unsigned int)LLI_MASK);
+ }
+ static bool is_z_llilf(unsigned long x) {
+ return LLILF_ZOPC == (x & LLIF_MASK);
+ }
+ static bool is_z_llihf(unsigned long x) {
+ return LLIHF_ZOPC == (x & LLIF_MASK);
+ }
+
+ static bool is_z_iill(unsigned long x) {
+ return (unsigned int)IILL_ZOPC == (x & (unsigned int)II_MASK);
+ }
+ static bool is_z_iilh(unsigned long x) {
+ return (unsigned int)IILH_ZOPC == (x & (unsigned int)II_MASK);
+ }
+ static bool is_z_iihl(unsigned long x) {
+ return (unsigned int)IIHL_ZOPC == (x & (unsigned int)II_MASK);
+ }
+ static bool is_z_iihh(unsigned long x) {
+ return (unsigned int)IIHH_ZOPC == (x & (unsigned int)II_MASK);
+ }
+ static bool is_z_iilf(unsigned long x) {
+ return IILF_ZOPC == (x & IIF_MASK);
+ }
+ static bool is_z_iihf(unsigned long x) {
+ return IIHF_ZOPC == (x & IIF_MASK);
+ }
+
+ static inline bool is_equal(unsigned long inst, unsigned long idef);
+ static inline bool is_equal(unsigned long inst, unsigned long idef, unsigned long imask);
+ static inline bool is_equal(address iloc, unsigned long idef);
+ static inline bool is_equal(address iloc, unsigned long idef, unsigned long imask);
+
+ static inline bool is_sigtrap_range_check(address pc);
+ static inline bool is_sigtrap_zero_check(address pc);
+
+ //-----------------
+ // memory barriers
+ //-----------------
+ // machine barrier instructions:
+ //
+ // - z_sync Two-way memory barrier, aka fence.
+ // Only load-after-store-order is not guaranteed in the
+ // z/Architecture memory model, i.e. only 'fence' is needed.
+ //
+ // semantic barrier instructions:
+ // (as defined in orderAccess.hpp)
+ //
+ // - z_release orders Store|Store, empty implementation
+ // Load|Store
+ // - z_acquire orders Load|Store, empty implementation
+ // Load|Load
+ // - z_fence orders Store|Store, implemented as z_sync.
+ // Load|Store,
+ // Load|Load,
+ // Store|Load
+ //
+ // For this implementation to be correct, we need H/W fixes on (very) old H/W:
+ // For z990, it is Driver-55: MCL232 in the J13484 (i390/ML) Stream.
+ // For z9, it is Driver-67: MCL065 in the G40963 (i390/ML) Stream.
+ // These drivers are a prereq. Otherwise, memory synchronization will not work.
+
+ inline void z_sync();
+ inline void z_release();
+ inline void z_acquire();
+ inline void z_fence();
+
+ // Creation
+ Assembler(CodeBuffer* code) : AbstractAssembler(code) { }
+
+};
+
+#endif // CPU_S390_VM_ASSEMBLER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
+#define CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all
+// assembler_s390.* files.
+
+// Local implementation of byte emitters to help inlining.
+inline void Assembler::emit_16(int x) {
+ CodeSection* cs = code_section();
+ address code_pos = pc();
+ *(unsigned short*)code_pos = (unsigned short)x;
+ cs->set_end( code_pos + sizeof(unsigned short));
+}
+
+inline void Assembler::emit_32(int x) {
+ CodeSection* cs = code_section();
+ address code_pos = pc();
+ *(jint*)code_pos = (jint)x;
+ cs->set_end( code_pos + sizeof( jint));
+}
+
+inline void Assembler::emit_48(long x) {
+ CodeSection* cs = code_section();
+ address code_pos = pc();
+ *(unsigned short*)code_pos = (unsigned short)(x>>32);
+ *(jint*)(code_pos+sizeof(unsigned short)) = (jint)x;
+ cs->set_end( code_pos + sizeof( jint) + sizeof( unsigned short));
+}
+
+// Support lightweight sync (from z196). Experimental as of now. For explanation see *.hpp file.
+inline void Assembler::z_sync() {
+ if (VM_Version::has_FastSync()) {
+ z_bcr(bcondLightSync, Z_R0);
+ } else {
+ z_bcr(bcondFullSync, Z_R0);
+ }
+}
+inline void Assembler::z_release() { }
+inline void Assembler::z_acquire() { }
+inline void Assembler::z_fence() { z_sync(); }
+
+inline void Assembler::z_illtrap() {
+ emit_16(0);
+}
+inline void Assembler::z_illtrap(int id) {
+ emit_16(id & 0x00ff);
+}
+inline void Assembler::z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern) {
+ z_llill(Z_R0, xpattern);
+ z_iilh(Z_R0, pattern);
+ z_illtrap((unsigned int)xpattern);
+}
+
+inline void Assembler::z_lhrl(Register r1, int64_t i2) { emit_48( LHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lrl(Register r1, int64_t i2) { emit_48( LRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lghrl(Register r1, int64_t i2) { emit_48( LGHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lgfrl(Register r1, int64_t i2) { emit_48( LGFRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lgrl(Register r1, int64_t i2) { emit_48( LGRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llhrl(Register r1, int64_t i2) { emit_48( LLHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llghrl(Register r1, int64_t i2){ emit_48( LLGHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llgfrl(Register r1, int64_t i2){ emit_48( LLGFRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+
+inline void Assembler::z_sthrl(Register r1, int64_t i2) { emit_48( STHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_strl(Register r1, int64_t i2) { emit_48( STRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_stgrl(Register r1, int64_t i2) { emit_48( STGRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+
+inline void Assembler::z_cksm(Register r1, Register r2) { emit_32( CKSM_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_km( Register r1, Register r2) { emit_32( KM_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kmc( Register r1, Register r2) { emit_32( KMC_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kimd(Register r1, Register r2) { emit_32( KIMD_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_klmd(Register r1, Register r2) { emit_32( KLMD_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+inline void Assembler::z_kmac(Register r1, Register r2) { emit_32( KMAC_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
+
+inline void Assembler::z_exrl(Register r1, int64_t i2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } // z10
+inline void Assembler::z_exrl(Register r1, address a2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a2, pc()), 16, 48)); } // z10
+
+inline void Assembler::z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3) { emit_48( ECTG_ZOPC | reg(r3, 8, 48) | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm12(d2, 36, 48) | reg(b2, 32, 48)); }
+inline void Assembler::z_ecag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( ECAG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+
+
+//------------------------------
+// Interlocked-Update
+//------------------------------
+inline void Assembler::z_laa( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAA_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laag( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laal( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laalg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAALG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lan( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAN_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lang( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LANG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lax( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAX_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laxg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAXG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_lao( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAO_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laog( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAOG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_laa( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laa( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laag( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laag( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laal( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laal( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laalg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laalg(r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lan( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lan( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lang( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lang( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lax( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lax( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laxg( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laxg( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_lao( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lao( r1, r3, a.disp12(), a.base()); }
+inline void Assembler::z_laog( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laog( r1, r3, a.disp12(), a.base()); }
+
+//--------------------------------
+// Execution Prediction
+//--------------------------------
+inline void Assembler::z_pfd( int64_t m1, int64_t d2, Register x2, Register b2) { emit_48( PFD_ZOPC | uimm4(m1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_pfd( int64_t m1, Address a) { z_pfd(m1, a.disp(), a.indexOrR0(), a.base()); }
+inline void Assembler::z_pfdrl(int64_t m1, int64_t i2) { emit_48( PFDRL_ZOPC | uimm4(m1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3) { emit_48( BPP_ZOPC | uimm4(m1, 8, 48) | uimm12(d3, 20, 48) | reg(b3, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_bprp( int64_t m1, int64_t i2, int64_t i3) { emit_48( BPRP_ZOPC | uimm4(m1, 8, 48) | simm12(i2, 12, 48) | simm24(i3, 24, 48)); }
+
+//-------------------------------
+// Transaction Control
+//-------------------------------
+inline void Assembler::z_tbegin( int64_t d1, Register b1, int64_t i2) { emit_48( TBEGIN_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tbeginc(int64_t d1, Register b1, int64_t i2) { emit_48( TBEGINC_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tend() { emit_32( TEND_ZOPC); }
+inline void Assembler::z_tabort( int64_t d2, Register b2) { emit_32( TABORT_ZOPC | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_etnd(Register r1) { emit_32( ETND_ZOPC | regt(r1, 24, 32)); }
+inline void Assembler::z_ppa(Register r1, Register r2, int64_t m3) { emit_32( PPA_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+
+//---------------------------------
+// Conditional Execution
+//---------------------------------
+inline void Assembler::z_locr( Register r1, Register r2, branch_condition cc) { emit_32( LOCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
+inline void Assembler::z_locgr( Register r1, Register r2, branch_condition cc) { emit_32( LOCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
+inline void Assembler::z_loc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_locg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_loc( Register r1, const Address &a, branch_condition cc) { z_loc(r1, a.disp(), a.base(), cc); }
+inline void Assembler::z_locg( Register r1, const Address &a, branch_condition cc) { z_locg(r1, a.disp(), a.base(), cc); }
+inline void Assembler::z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_stocg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+
+inline void Assembler::z_srst( Register r1, Register r2) { emit_32( SRST_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_srstu(Register r1, Register r2) { emit_32( SRSTU_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+//---------------------------------
+// Address calculation
+//---------------------------------
+inline void Assembler::z_layz(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | reg(b2, 16, 48)); }
+inline void Assembler::z_lay( Register r1, const Address &a) { z_layz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_laz( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_la( Register r1, const Address &a) { z_laz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_la( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32));}
+inline void Assembler::z_larl(Register r1, int64_t i2) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_larl(Register r1, address a) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+
+inline void Assembler::z_lr(Register r1, Register r2) { emit_16( LR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_lgr(Register r1, Register r2) { emit_32( LGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lh(Register r1, int64_t d2, Register x2, Register b2) { emit_32( LH_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_lh(Register r1, const Address &a) { z_lh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_l(Register r1, int64_t d2, Register x2, Register b2) { emit_32( L_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_l(Register r1, const Address &a) { z_l(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lg(Register r1, const Address &a) { z_lg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lbr( Register r1, Register r2) { emit_32( LBR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_lhr( Register r1, Register r2) { emit_32( LHR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_lgbr( Register r1, Register r2) { emit_32( LGBR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lghr( Register r1, Register r2) { emit_32( LGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lgfr( Register r1, Register r2) { emit_32( LGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llhr( Register r1, Register r2) { emit_32( LLHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llgcr(Register r1, Register r2) { emit_32( LLGCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llghr(Register r1, Register r2) { emit_32( LLGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_llgfr(Register r1, Register r2) { emit_32( LLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_sth(Register r1, const Address &a) { z_sth(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sth(Register r1, int64_t d2, Register x2, Register b2) { emit_32( STH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_st( Register r1, const Address& d) { z_st(r1, d.disp(), d.indexOrR0(), d.base()); }
+inline void Assembler::z_st( Register r1, int64_t d2, Register x2, Register b2) { emit_32( ST_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stg(Register r1, const Address& d) { z_stg(r1, d.disp(), d.indexOrR0(), d.base()); }
+inline void Assembler::z_stg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( STG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_stcm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( STCM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+
+// memory-immediate instructions (8-bit immediate)
+inline void Assembler::z_cli( int64_t d1, Register b1, int64_t i2) { emit_32( CLI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | uimm8(i2, 8, 32)); }
+inline void Assembler::z_mvi( int64_t d1, Register b1, int64_t i2) { emit_32( MVI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_tm( int64_t d1, Register b1, int64_t i2) { emit_32( TM_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_ni( int64_t d1, Register b1, int64_t i2) { emit_32( NI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_oi( int64_t d1, Register b1, int64_t i2) { emit_32( OI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_xi( int64_t d1, Register b1, int64_t i2) { emit_32( XI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
+inline void Assembler::z_cliy(int64_t d1, Register b1, int64_t i2) { emit_48( CLIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | uimm8(i2, 8, 48)); }
+inline void Assembler::z_mviy(int64_t d1, Register b1, int64_t i2) { emit_48( MVIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_tmy( int64_t d1, Register b1, int64_t i2) { emit_48( TMY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_niy( int64_t d1, Register b1, int64_t i2) { emit_48( NIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_oiy( int64_t d1, Register b1, int64_t i2) { emit_48( OIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_xiy( int64_t d1, Register b1, int64_t i2) { emit_48( XIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+
+inline void Assembler::z_cli( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_cli( a.disp12(), a.base(), imm); }
+inline void Assembler::z_mvi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_mvi( a.disp12(), a.base(), imm); }
+inline void Assembler::z_tm( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_tm( a.disp12(), a.base(), imm); }
+inline void Assembler::z_ni( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_ni( a.disp12(), a.base(), imm); }
+inline void Assembler::z_oi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_oi( a.disp12(), a.base(), imm); }
+inline void Assembler::z_xi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_xi( a.disp12(), a.base(), imm); }
+inline void Assembler::z_cliy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLIY"); z_cliy(a.disp20(), a.base(), imm); }
+inline void Assembler::z_mviy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in MVIY"); z_mviy(a.disp20(), a.base(), imm); }
+inline void Assembler::z_tmy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in TMY"); z_tmy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_niy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in NIY"); z_niy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_oiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in OIY"); z_oiy( a.disp20(), a.base(), imm); }
+inline void Assembler::z_xiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in XIY"); z_xiy( a.disp20(), a.base(), imm); }
+
+
+inline void Assembler::z_mvc(const Address& d, const Address& s, int64_t l) {
+ assert(!d.has_index() && !s.has_index(), "Address operand can not be encoded.");
+ z_mvc(d.disp(), l-1, d.base(), s.disp(), s.base());
+}
+inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+
+inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhi ( int64_t d1, Register b1, int64_t i2) { emit_48( MVHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvghi( int64_t d1, Register b1, int64_t i2) { emit_48( MVGHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhhi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
+inline void Assembler::z_mvhi ( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
+inline void Assembler::z_mvghi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVGHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
+
+inline void Assembler::z_ex(Register r1, int64_t d2, Register x2, Register b2) { emit_32( EX_ZOPC | regz(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+
+inline void Assembler::z_ic (Register r1, int64_t d2, Register x2, Register b2) { emit_32( IC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_icy (Register r1, int64_t d2, Register x2, Register b2) { emit_48( ICY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_icm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( ICM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_icmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_icmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_iihh(Register r1, int64_t i2) { emit_32( IIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iihl(Register r1, int64_t i2) { emit_32( IIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iilh(Register r1, int64_t i2) { emit_32( IILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iill(Register r1, int64_t i2) { emit_32( IILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_iihf(Register r1, int64_t i2) { emit_48( IIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_iilf(Register r1, int64_t i2) { emit_48( IILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_lgf(Register r1, const Address& a) { z_lgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lhy(Register r1, const Address &a) { z_lhy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lhy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lgh(Register r1, const Address &a) { z_lgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lt(Register r1, const Address &a) { z_lt(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lt (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LT_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ltg(Register r1, const Address &a) { z_ltg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ltg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ltgf(Register r1, const Address &a) { z_ltgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ltgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTGF_ZOPC| regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lb(Register r1, const Address &a) { z_lb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lb (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lgb(Register r1, const Address &a) { z_lgb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgb(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ly(Register r1, const Address &a) { z_ly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llc(Register r1, const Address& a) { z_llc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llh(Register r1, const Address &a) { z_llh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgf(Register r1, const Address &a) { z_llgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgh(Register r1, const Address &a) { z_llgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgc(Register r1, const Address &a) { z_llgc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llgc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_llgc(Register r1, int64_t d2, Register b2) { z_llgc( r1, d2, Z_R0, b2); }
+inline void Assembler::z_lhi(Register r1, int64_t i2) { emit_32( LHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lghi(Register r1, int64_t i2) { emit_32( LGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lgfi(Register r1, int64_t i2) { emit_48( LGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_llihf(Register r1, int64_t i2) { emit_48( LLIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_llilf(Register r1, int64_t i2) { emit_48( LLILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_llihh(Register r1, int64_t i2) { emit_32( LLIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llihl(Register r1, int64_t i2) { emit_32( LLIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llilh(Register r1, int64_t i2) { emit_32( LLILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_llill(Register r1, int64_t i2) { emit_32( LLILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+
+// allow "monadic" use
+inline void Assembler::z_lcr( Register r1, Register r2) { emit_16( LCR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lcgr( Register r1, Register r2) { emit_32( LCGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+
+inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_ltr( Register r1, Register r2) { emit_16( LTR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ltgr( Register r1, Register r2) { emit_32( LTGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ltgfr(Register r1, Register r2) { emit_32( LTGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_stc( Register r1, const Address &a) { z_stc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stc( Register r1, int64_t d2, Register x2, Register b2) { emit_32( STC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stcy( Register r1, const Address &a) { z_stcy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stcy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STCY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sthy( Register r1, const Address &a) { z_sthy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sthy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sty( Register r1, const Address &a) { z_sty(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sty( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_stfle(int64_t d2, Register b2) { emit_32(STFLE_ZOPC | uimm12(d2,20,32) | regz(b2,16,32)); }
+
+
+//-----------------------------------
+// SHIFT/RORATE OPERATIONS
+//-----------------------------------
+inline void Assembler::z_sla( Register r1, int64_t d2, Register b2) { emit_32( SLA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_sra( Register r1, int64_t d2, Register b2) { emit_32( SRA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_sll( Register r1, int64_t d2, Register b2) { emit_32( SLL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_srl( Register r1, int64_t d2, Register b2) { emit_32( SRL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+
+// rotate left
+inline void Assembler::z_rll( Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+inline void Assembler::z_rllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLLG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+
+// Rotate the AND/XOR/OR/insert
+inline void Assembler::z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then AND selected bits. -- z196
+ const int64_t len = 48;
+ assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+ emit_48( RNSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then XOR selected bits. -- z196
+ const int64_t len = 48;
+ assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+ emit_48( RXSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then OR selected bits. -- z196
+ const int64_t len = 48;
+ assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+ emit_48( ROSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1));
+}
+inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest) { // Rotate then INS selected bits. -- z196
+ const int64_t len = 48;
+ assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion.
+ assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction.
+ emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1));
+}
+
+
+//------------------------------
+// LOGICAL OPERATIONS
+//------------------------------
+inline void Assembler::z_n( Register r1, int64_t d2, Register x2, Register b2) { emit_32( N_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ny( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ng( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_n( Register r1, const Address& a) { z_n( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ny( Register r1, const Address& a) { z_ny(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ng( Register r1, const Address& a) { z_ng(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_nr( Register r1, Register r2) { emit_16( NR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ngr( Register r1, Register r2) { emit_32( NGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_nrk( Register r1, Register r2, Register r3) { emit_32( NRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_ngrk(Register r1, Register r2, Register r3) { emit_32( NGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_nihh(Register r1, int64_t i2) { emit_32( NIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nihl(Register r1, int64_t i2) { emit_32( NIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nilh(Register r1, int64_t i2) { emit_32( NILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nill(Register r1, int64_t i2) { emit_32( NILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_nihf(Register r1, int64_t i2) { emit_48( NIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_nilf(Register r1, int64_t i2) { emit_48( NILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_o( Register r1, int64_t d2, Register x2, Register b2) { emit_32( O_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_oy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_og( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_o( Register r1, const Address& a) { z_o( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_oy( Register r1, const Address& a) { z_oy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_og( Register r1, const Address& a) { z_og(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_or( Register r1, Register r2) { emit_16( OR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_ogr( Register r1, Register r2) { emit_32( OGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ork( Register r1, Register r2, Register r3) { emit_32( ORK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_ogrk(Register r1, Register r2, Register r3) { emit_32( OGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_oihh(Register r1, int64_t i2) { emit_32( OIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oihl(Register r1, int64_t i2) { emit_32( OIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oilh(Register r1, int64_t i2) { emit_32( OILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oill(Register r1, int64_t i2) { emit_32( OILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_oihf(Register r1, int64_t i2) { emit_48( OIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_oilf(Register r1, int64_t i2) { emit_48( OILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_x( Register r1, int64_t d2, Register x2, Register b2) { emit_32( X_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_xy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_xg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_x( Register r1, const Address& a) { z_x( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_xy( Register r1, const Address& a) { z_xy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_xg( Register r1, const Address& a) { z_xg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_xr( Register r1, Register r2) { emit_16( XR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_xgr( Register r1, Register r2) { emit_32( XGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_xrk( Register r1, Register r2, Register r3) { emit_32( XRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_xgrk(Register r1, Register r2, Register r3) { emit_32( XGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_xihf(Register r1, int64_t i2) { emit_48( XIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+inline void Assembler::z_xilf(Register r1, int64_t i2) { emit_48( XILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
+
+inline void Assembler::z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( NC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( OC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( XC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_nc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_nc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+inline void Assembler::z_oc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_oc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+inline void Assembler::z_xc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_xc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
+
+
+//---------------
+// ADD
+//---------------
+inline void Assembler::z_a( Register r1, int64_t d2, Register x2, Register b2) { emit_32( A_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_al( Register r1, int64_t d2, Register x2, Register b2) { emit_32( AL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_aly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ag( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_agf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_alg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_algf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_a( Register r1, const Address& a) { z_a( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ay( Register r1, const Address& a) { z_ay( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_al( Register r1, const Address& a) { z_al( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_aly( Register r1, const Address& a) { z_aly( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ag( Register r1, const Address& a) { z_ag( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_agf( Register r1, const Address& a) { z_agf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_alg( Register r1, const Address& a) { z_alg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_algf(Register r1, const Address& a) { z_algf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_ar( Register r1, Register r2) { emit_16( AR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_agr( Register r1, Register r2) { emit_32( AGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_agfr(Register r1, Register r2) { emit_32( AGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ark( Register r1, Register r2, Register r3) { emit_32( ARK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_agrk(Register r1, Register r2, Register r3) { emit_32( AGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_ahi( Register r1, int64_t i2) { emit_32( AHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_afi( Register r1, int64_t i2) { emit_48( AFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_aghi( Register r1, int64_t i2) { emit_32( AGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_agfi( Register r1, int64_t i2) { emit_48( AGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_aih( Register r1, int64_t i2) { emit_48( AIH_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_ahik( Register r1, Register r3, int64_t i2) { emit_48( AHIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+inline void Assembler::z_aghik(Register r1, Register r3, int64_t i2) { emit_48( AGHIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+
+
+//-----------------------
+// ADD LOGICAL
+//-----------------------
+inline void Assembler::z_alr( Register r1, Register r2) { emit_16( ALR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_algr( Register r1, Register r2) { emit_32( ALGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_algfr(Register r1, Register r2) { emit_32( ALGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_alrk( Register r1, Register r2, Register r3) { emit_32( ALRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_algrk(Register r1, Register r2, Register r3) { emit_32( ALGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_alcgr(Register r1, Register r2) { emit_32( ALCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_alfi( Register r1, int64_t i2) { emit_48( ALFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_algfi(Register r1, int64_t i2) { emit_48( ALGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+
+inline void Assembler::z_alhsik( Register r1, Register r3, int64_t i2) { emit_48( ALHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+inline void Assembler::z_alghsik(Register r1, Register r3, int64_t i2) { emit_48( ALGHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
+
+// In-memory arithmetic (add signed, add logical with signed immediate)
+inline void Assembler::z_asi( int64_t d1, Register b1, int64_t i2) { emit_48( ASI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_agsi( int64_t d1, Register b1, int64_t i2) { emit_48( AGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_alsi( int64_t d1, Register b1, int64_t i2) { emit_48( ALSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_algsi(int64_t d1, Register b1, int64_t i2) { emit_48( ALGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_asi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ASI"); z_asi( d.disp(), d.base(), i2); }
+inline void Assembler::z_agsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in AGSI"); z_agsi( d.disp(), d.base(), i2); }
+inline void Assembler::z_alsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALSI"); z_alsi( d.disp(), d.base(), i2); }
+inline void Assembler::z_algsi(const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALGSI"); z_algsi(d.disp(), d.base(), i2); }
+
+
+//--------------------
+// SUBTRACT
+//--------------------
+inline void Assembler::z_s( Register r1, int64_t d2, Register x2, Register b2) { emit_32( S_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_sy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_slg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_slgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_s( Register r1, const Address& a) { z_s( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sy( Register r1, const Address& a) { z_sy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sg( Register r1, const Address& a) { z_sg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sgf( Register r1, const Address& a) { z_sgf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_slg( Register r1, const Address& a) { z_slg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_slgf(Register r1, const Address& a) { z_slgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_sr( Register r1, Register r2) { emit_16( SR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_sgr( Register r1, Register r2) { emit_32( SGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_sgfr(Register r1, Register r2) { emit_32( SGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_srk( Register r1, Register r2, Register r3) { emit_32( SRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_sgrk(Register r1, Register r2, Register r3) { emit_32( SGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+
+inline void Assembler::z_sh( Register r1, int64_t d2, Register x2, Register b2) { emit_32( SH_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_shy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sh( Register r1, const Address &a) { z_sh( r1, a.disp(), a.indexOrR0(), a.base()); }
+inline void Assembler::z_shy( Register r1, const Address &a) { z_shy(r1, a.disp(), a.indexOrR0(), a.base()); }
+
+
+//----------------------------
+// SUBTRACT LOGICAL
+//----------------------------
+inline void Assembler::z_slr( Register r1, Register r2) { emit_16( SLR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_slgr( Register r1, Register r2) { emit_32( SLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_slgfr(Register r1, Register r2) { emit_32( SLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_slrk( Register r1, Register r2, Register r3) { emit_32(SLRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_slgrk(Register r1, Register r2, Register r3) { emit_32(SLGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
+inline void Assembler::z_slfi( Register r1, int64_t i2) { emit_48( SLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_slgfi(Register r1, int64_t i2) { emit_48( SLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+
+
+//--------------------
+// MULTIPLY
+//--------------------
+inline void Assembler::z_msr( Register r1, Register r2) { emit_32( MSR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_msgr( Register r1, Register r2) { emit_32( MSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_msgfr(Register r1, Register r2) { emit_32( MSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_mlr( Register r1, Register r2) { emit_32( MLR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_mlgr( Register r1, Register r2) { emit_32( MLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_mhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_msgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ml( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ML_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_mlg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+
+inline void Assembler::z_mhy( Register r1, const Address& a) { z_mhy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msy( Register r1, const Address& a) { z_msy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msg( Register r1, const Address& a) { z_msg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msgf(Register r1, const Address& a) { z_msgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ml( Register r1, const Address& a) { z_ml( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_mlg( Register r1, const Address& a) { z_mlg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_msfi( Register r1, int64_t i2) { emit_48( MSFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_msgfi(Register r1, int64_t i2) { emit_48( MSGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_mhi( Register r1, int64_t i2) { emit_32( MHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_mghi( Register r1, int64_t i2) { emit_32( MGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+
+
+//------------------
+// DIVIDE
+//------------------
+inline void Assembler::z_dsgr( Register r1, Register r2) { emit_32( DSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_dsgfr(Register r1, Register r2) { emit_32( DSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+
+//-------------------
+// COMPARE
+//-------------------
+inline void Assembler::z_cr( Register r1, Register r2) { emit_16( CR_ZOPC | reg(r1, 8, 16) | reg(r2,12,16)); }
+inline void Assembler::z_cgr( Register r1, Register r2) { emit_32( CGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_cgfr(Register r1, Register r2) { emit_32( CGFR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_chi( Register r1, int64_t i2) { emit_32( CHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_cghi(Register r1, int64_t i2) { emit_32( CGHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_cfi( Register r1, int64_t i2) { emit_48( CFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_cgfi(Register r1, int64_t i2) { emit_48( CGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_ch(Register r1, const Address &a) { z_ch(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ch(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_c(Register r1, const Address &a) { z_c(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_c(Register r1, int64_t d2, Register x2, Register b2) { emit_32( C_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_cy(Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cy(Register r1, int64_t d2, Register b2) { z_cy(r1, d2, Z_R0, b2); }
+inline void Assembler::z_cg(Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_clr(Register r1, Register r2) { emit_16( CLR_ZOPC | reg(r1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_clgr(Register r1, Register r2) { emit_32( CLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+
+
+inline void Assembler::z_clfi(Register r1, int64_t i2) { emit_48( CLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_clgfi(Register r1, int64_t i2) { emit_48( CLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_cl(Register r1, const Address &a) { z_cl(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cl(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CL_ZOPC | regt(r1, 8, 32) | uimm12(d2,20,32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_cly(Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cly(Register r1, int64_t d2, Register b2) { z_cly(r1, d2, Z_R0, b2); }
+inline void Assembler::z_clg(Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_clg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_clclu(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CLCLU_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | uimm12(d2, 20, 48) | reg(b2, 16, 48)); }
+
+inline void Assembler::z_tmll(Register r1, int64_t i2) { emit_32( TMLL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmlh(Register r1, int64_t i2) { emit_32( TMLH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmhl(Register r1, int64_t i2) { emit_32( TMHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+inline void Assembler::z_tmhh(Register r1, int64_t i2) { emit_32( TMHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
+
+// translate characters
+inline void Assembler::z_troo(Register r1, Register r2, int64_t m3) { emit_32( TROO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trot(Register r1, Register r2, int64_t m3) { emit_32( TROT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trto(Register r1, Register r2, int64_t m3) { emit_32( TRTO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_trtt(Register r1, Register r2, int64_t m3) { emit_32( TRTT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+
+// signed comparison
+inline void Assembler::z_crb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+// unsigned comparison
+inline void Assembler::z_clrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
+
+// Compare and trap instructions (signed).
+inline void Assembler::z_crt(Register r1, Register r2, int64_t m3) { emit_32( CRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_cgrt(Register r1, Register r2, int64_t m3) { emit_32( CGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_cit(Register r1, int64_t i2, int64_t m3) { emit_48( CIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgit(Register r1, int64_t i2, int64_t m3) { emit_48( CGIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+
+// Compare and trap instructions (unsigned).
+inline void Assembler::z_clrt(Register r1, Register r2, int64_t m3) { emit_32( CLRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clgrt(Register r1, Register r2, int64_t m3) { emit_32( CLGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clfit(Register r1, int64_t i2, int64_t m3) { emit_48( CLFIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgit(Register r1, int64_t i2, int64_t m3) { emit_48( CLGIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
+
+inline void Assembler::z_bc( branch_condition m1, int64_t d2, Register x2, Register b2) { emit_32( BC_ZOPC | 0 << 16 | uimm4(m1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_ZOPC | uimm4(m1,8,16) | reg(r2,12,16)); }
+inline void Assembler::z_brc( branch_condition i1, int64_t i2) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_brc( branch_condition i1, address a) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brcl(branch_condition i1, address a) { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48)| simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };
+
+inline void Assembler::z_basr(Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
+
+inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+
+inline void Assembler::z_brct(Register r1, address a) { emit_32( BRCT_ZOPC | regt(r1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brct(Register r1, Label& L) {z_brct(r1, target(L)); }
+
+inline void Assembler::z_brxh(Register r1, Register r3, address a) {emit_32( BRXH_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
+inline void Assembler::z_brxh(Register r1, Register r3, Label& L) {z_brxh(r1, r3, target(L)); }
+
+inline void Assembler::z_brxle(Register r1, Register r3, address a) {emit_32( BRXLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
+inline void Assembler::z_brxle(Register r1, Register r3, Label& L) {z_brxle(r1, r3, target(L)); }
+
+inline void Assembler::z_brxhg(Register r1, Register r3, address a) {emit_48( BRXHG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
+inline void Assembler::z_brxhg(Register r1, Register r3, Label& L) {z_brxhg(r1, r3, target(L)); }
+
+inline void Assembler::z_brxlg(Register r1, Register r3, address a) {emit_48( BRXLG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
+inline void Assembler::z_brxlg(Register r1, Register r3, Label& L) {z_brxlg(r1, r3, target(L)); }
+
+inline void Assembler::z_flogr(Register r1, Register r2) { emit_32( FLOGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_popcnt(Register r1, Register r2) { emit_32( POPCNT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32( AHHHR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+
+inline void Assembler::z_tam() { emit_16( TAM_ZOPC); }
+inline void Assembler::z_stck(int64_t d2, Register b2) { emit_32( STCK_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
+inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
+
+inline void Assembler::z_cs(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
+inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_cs( Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_cs( r1, r3, a.disp(), a.baseOrR0()); }
+inline void Assembler::z_csy(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csy(r1, r3, a.disp(), a.baseOrR0()); }
+inline void Assembler::z_csg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csg(r1, r3, a.disp(), a.baseOrR0()); }
+
+inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CVD_ZOPC | regt(r1, 8, 32) | reg(x2, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
+inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+
+
+//-------------------------------
+// FLOAT INSTRUCTIONS
+//-------------------------------
+
+//----------------
+// LOAD
+//----------------
+inline void Assembler::z_ler( FloatRegister r1, FloatRegister r2) { emit_16( LER_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); }
+inline void Assembler::z_ldr( FloatRegister r1, FloatRegister r2) { emit_16( LDR_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); }
+inline void Assembler::z_ldebr(FloatRegister r1, FloatRegister r2) { emit_32( LDEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_ledbr(FloatRegister r1, FloatRegister r2) { emit_32( LEDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_le( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LE_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LEY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LD_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LDY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_le( FloatRegister r1, const Address &a) { z_le( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ley(FloatRegister r1, const Address &a) { z_ley(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ld( FloatRegister r1, const Address &a) { z_ld( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ldy(FloatRegister r1, const Address &a) { z_ldy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lzdr(FloatRegister r1) { emit_32( LZDR_ZOPC | fregt(r1, 24, 32)); }
+inline void Assembler::z_lzer(FloatRegister f1) { emit_32( LZER_ZOPC | fregt(f1, 24, 32)); }
+
+
+//-----------------
+// STORE
+//-----------------
+inline void Assembler::z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STE_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STEY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_std( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STD_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STDY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ste( FloatRegister r1, const Address &a) { z_ste( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stey(FloatRegister r1, const Address &a) { z_stey(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_std( FloatRegister r1, const Address &a) { z_std( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stdy(FloatRegister r1, const Address &a) { z_stdy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// ADD
+//---------------
+inline void Assembler::z_aebr( FloatRegister f1, FloatRegister f2) { emit_32( AEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_adbr( FloatRegister f1, FloatRegister f2) { emit_32( ADBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( AEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( ADB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_aeb( FloatRegister r1, const Address& a) { z_aeb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_adb( FloatRegister r1, const Address& a) { z_adb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// SUB
+//---------------
+inline void Assembler::z_sebr( FloatRegister f1, FloatRegister f2) { emit_32( SEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_sdbr( FloatRegister f1, FloatRegister f2) { emit_32( SDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_seb( FloatRegister r1, const Address& a) { z_seb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sdb( FloatRegister r1, const Address& a) { z_sdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lcebr(FloatRegister r1, FloatRegister r2) { emit_32( LCEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_lcdbr(FloatRegister r1, FloatRegister r2) { emit_32( LCDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+
+inline void Assembler::z_lpdbr( FloatRegister fr1, FloatRegister fr2) { emit_32( LPDBR_ZOPC | fregt( fr1, 24,32) | freg((fr2 == fnoreg) ? fr1:fr2, 28, 32)); }
+
+
+//---------------
+// MUL
+//---------------
+inline void Assembler::z_meebr(FloatRegister f1, FloatRegister f2) { emit_32( MEEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_mdbr( FloatRegister f1, FloatRegister f2) { emit_32( MDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MEEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_meeb( FloatRegister r1, const Address& a) { z_meeb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_mdb( FloatRegister r1, const Address& a) { z_mdb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// DIV
+//---------------
+inline void Assembler::z_debr( FloatRegister f1, FloatRegister f2) { emit_32( DEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_ddbr( FloatRegister f1, FloatRegister f2) { emit_32( DDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_deb( FloatRegister r1, const Address& a) { z_deb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ddb( FloatRegister r1, const Address& a) { z_ddb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//---------------
+// square root
+//---------------
+inline void Assembler::z_sqdbr(FloatRegister f1, FloatRegister f2) { emit_32(SQDBR_ZOPC | fregt(f1, 24, 32) | freg(f2, 28, 32)); }
+inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2 ) { emit_48( SQDB_ZOPC | fregt( fr1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register b2) { z_sqdb( fr1, d2, Z_R0, b2);}
+
+
+//---------------
+// CMP
+//---------------
+inline void Assembler::z_cebr(FloatRegister r1, FloatRegister r2) { emit_32( CEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CEB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ceb(FloatRegister r1, const Address &a) { z_ceb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cdbr(FloatRegister r1, FloatRegister r2) { emit_32( CDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CDB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_cdb(FloatRegister r1, const Address &a) { z_cdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+//------------------------------------
+// FLOAT <-> INT conversion
+//------------------------------------
+inline void Assembler::z_ldgr(FloatRegister r1, Register r2) { emit_32( LDGR_ZOPC | fregt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lgdr(Register r1, FloatRegister r2) { emit_32( LGDR_ZOPC | regt( r1, 24, 32) | freg(r2, 28, 32)); }
+
+inline void Assembler::z_cefbr( FloatRegister r1, Register r2) { emit_32( CEFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cdfbr( FloatRegister r1, Register r2) { emit_32( CDFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cegbr( FloatRegister r1, Register r2) { emit_32( CEGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+inline void Assembler::z_cdgbr( FloatRegister r1, Register r2) { emit_32( CDGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); }
+
+inline void Assembler::z_cfebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cfdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cgebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_cgdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); }
+
+
+ inline void Assembler::z_layz(Register r1, int64_t d2, Register b2) { z_layz(r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lay(Register r1, int64_t d2, Register b2) { z_lay( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_laz(Register r1, int64_t d2, Register b2) { z_laz( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_la(Register r1, int64_t d2, Register b2) { z_la( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_l(Register r1, int64_t d2, Register b2) { z_l( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_ly(Register r1, int64_t d2, Register b2) { z_ly( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lg(Register r1, int64_t d2, Register b2) { z_lg( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_st(Register r1, int64_t d2, Register b2) { z_st( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_sty(Register r1, int64_t d2, Register b2) { z_sty( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_stg(Register r1, int64_t d2, Register b2) { z_stg( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lgf(Register r1, int64_t d2, Register b2) { z_lgf( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lgh(Register r1, int64_t d2, Register b2) { z_lgh( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_llgh(Register r1, int64_t d2, Register b2) { z_llgh(r1, d2, Z_R0, b2); }
+ inline void Assembler::z_llgf(Register r1, int64_t d2, Register b2) { z_llgf(r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lgb(Register r1, int64_t d2, Register b2) { z_lgb( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cl( Register r1, int64_t d2, Register b2) { z_cl( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_c(Register r1, int64_t d2, Register b2) { z_c( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cg(Register r1, int64_t d2, Register b2) { z_cg( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_sh(Register r1, int64_t d2, Register b2) { z_sh( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_shy(Register r1, int64_t d2, Register b2) { z_shy( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_ste(FloatRegister r1, int64_t d2, Register b2) { z_ste( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_std(FloatRegister r1, int64_t d2, Register b2) { z_std( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register b2) { z_stdy(r1, d2, Z_R0, b2); }
+ inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register b2) { z_stey(r1, d2, Z_R0, b2); }
+ inline void Assembler::z_ld(FloatRegister r1, int64_t d2, Register b2) { z_ld( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register b2) { z_ldy( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_le(FloatRegister r1, int64_t d2, Register b2) { z_le( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register b2) { z_ley( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_agf(Register r1, int64_t d2, Register b2) { z_agf( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cvd(Register r1, int64_t d2, Register b2) { z_cvd( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cvdg(Register r1, int64_t d2, Register b2) { z_cvdg(r1, d2, Z_R0, b2); }
+
+// signed comparison
+inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, Label& L) { z_crj( r1, r2, m3, target(L)); }
+inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, Label& L) { z_cgrj( r1, r2, m3, target(L)); }
+inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_cij( r1, i2, m3, target(L)); }
+inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_cgij( r1, i2, m3, target(L)); }
+// unsigned comparison
+inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, Label& L) { z_clrj( r1, r2, m3, target(L)); }
+inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, Label& L) { z_clgrj(r1, r2, m3, target(L)); }
+inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_clij( r1, i2, m3, target(L)); }
+inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_clgij(r1, i2, m3, target(L)); }
+
+// branch never (nop), branch always
+inline void Assembler::z_nop() { z_bcr(bcondNop, Z_R0); }
+inline void Assembler::z_br(Register r2) { assert(r2 != Z_R0, "nop if target is Z_R0, use z_nop() instead"); z_bcr(bcondAlways, r2 ); }
+
+inline void Assembler::z_exrl(Register r1, Label& L) { z_exrl(r1, target(L)); } // z10
+inline void Assembler::z_larl(Register r1, Label& L) { z_larl(r1, target(L)); }
+inline void Assembler::z_bru( Label& L) { z_brc(bcondAlways,target(L)); }
+inline void Assembler::z_brul( Label& L) { z_brcl(bcondAlways,target(L)); }
+inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a); }
+inline void Assembler::z_brh( Label& L) { z_brc(bcondHigh,target(L)); }
+inline void Assembler::z_brl( Label& L) { z_brc(bcondLow,target(L)); }
+inline void Assembler::z_bre( Label& L) { z_brc(bcondEqual,target(L)); }
+inline void Assembler::z_brnh( Label& L) { z_brc(bcondNotHigh,target(L)); }
+inline void Assembler::z_brnl( Label& L) { z_brc(bcondNotLow,target(L)); }
+inline void Assembler::z_brne( Label& L) { z_brc(bcondNotEqual,target(L)); }
+inline void Assembler::z_brz( Label& L) { z_brc(bcondZero,target(L)); }
+inline void Assembler::z_brnz( Label& L) { z_brc(bcondNotZero,target(L)); }
+inline void Assembler::z_braz( Label& L) { z_brc(bcondAllZero,target(L)); }
+inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero,target(L)); }
+inline void Assembler::z_brnp( Label& L) { z_brc( bcondNotPositive, target( L)); }
+inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne,target(L)); }
+inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero,target(L)); }
+inline void Assembler::z_brno( Label& L) { z_brc(bcondNotOrdered,target(L)); }
+inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); }
+inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }
+
+
+// Instruction must start at passed address.
+// Extra check for illtraps with ID.
+inline int Assembler::instr_len(unsigned char *instr) {
+ switch ((*instr) >> 6) {
+ case 0: return 2;
+ case 1: // fallthru
+ case 2: return 4;
+ case 3: return 6;
+ default:
+ // Control can't reach here.
+ // The switch expression examines just the leftmost two bytes
+ // of the main opcode. So the range of values is just [0..3].
+ // Having a default clause makes the compiler happy.
+ ShouldNotReachHere();
+ return 0;
+ }
+}
+
+// Move instr at pc right-justified into passed long int.
+// Return instr len in bytes as function result.
+// Note: 2-byte instr don't really need to be accessed unsigned
+// because leftmost two bits are always zero. We use
+// unsigned here for reasons of uniformity.
+inline unsigned int Assembler::get_instruction(unsigned char *pc, unsigned long *instr) {
+ unsigned int len = instr_len(pc);
+ switch (len) {
+ case 2:
+ *instr = *(unsigned short*) pc; break;
+ case 4:
+ *instr = *(unsigned int*) pc; break;
+ case 6:
+ // Must compose this case. Can't read 8 bytes and then cut off
+ // the rightmost two bytes. Could potentially access
+ // unallocated storage.
+ *instr = ((unsigned long)(*(unsigned int*) pc)) << 16 |
+ ((unsigned long)*(unsigned short*) (pc + 4)); break;
+ default:
+ // Control can't reach here.
+ // The length as returned from instr_len() can only be 2, 4, or 6 bytes.
+ // Having a default clause makes the compiler happy.
+ ShouldNotReachHere();
+ break;
+ }
+ return len;
+}
+
+// Check if instruction is the expected one.
+// Instruction is passed right-justified in inst.
+inline bool Assembler::is_equal(unsigned long inst, unsigned long idef) {
+ unsigned long imask;
+
+ if ((idef >> 32) != 0) { // 6byte instructions
+ switch (idef >> 40) { // select mask by main opcode
+ case 0xc0:
+ case 0xc2:
+ case 0xc4:
+ case 0xc6: imask = RIL_MASK; break;
+ case 0xec:
+ if ((idef & 0x00ffL) < 0x0080L) {
+ imask = RIE_MASK;
+ break;
+ }
+ // Fallthru for other sub opcodes.
+ default:
+#ifdef ASSERT
+ tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef);
+ tty->flush();
+#endif
+ ShouldNotReachHere();
+ return 0;
+ }
+ } else { // 4-byte instructions
+ switch (idef >> 24) { // Select mask by main opcode.
+ case 0x84:
+ case 0x85: imask = RSI_MASK; break;
+ case 0xa5:
+ case 0xa7: imask = RI_MASK; break;
+ case 0xb9: imask = RRE_MASK; break; // RRE_MASK or RRF_MASK. Opcode fields are at same bit positions.
+ default: {
+#ifdef ASSERT
+ tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef);
+ tty->flush();
+#endif
+ ShouldNotReachHere();
+ return 0;
+ }
+ }
+ }
+ return (inst & imask) == idef;
+}
+
+inline bool Assembler::is_equal(unsigned long inst, unsigned long idef, unsigned long imask) {
+ assert(imask != 0, "valid instruction mask required");
+ return (inst & imask) == idef;
+}
+
+// Check if instruction is the expected one.
+// Instruction is passed left-justified at inst.
+inline bool Assembler::is_equal(address iloc, unsigned long idef) {
+ unsigned long inst;
+ get_instruction(iloc, &inst);
+ return is_equal(inst, idef);
+}
+
+inline bool Assembler::is_equal(address iloc, unsigned long idef, unsigned long imask) {
+ unsigned long inst;
+ get_instruction(iloc, &inst);
+ return is_equal(inst, idef, imask);
+}
+
+inline bool Assembler::is_sigtrap_range_check(address pc) {
+ return (is_equal(pc, CLFIT_ZOPC, RIE_MASK) || is_equal(pc, CLRT_ZOPC, RRE_MASK));
+}
+
+inline bool Assembler::is_sigtrap_zero_check(address pc) {
+ return (is_equal(pc, CGIT_ZOPC, RIE_MASK) || is_equal(pc, CIT_ZOPC, RIE_MASK));
+}
+
+#endif // CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/bytes_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_BYTES_S390_HPP
+#define CPU_S390_VM_BYTES_S390_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+ // Efficient reading and writing of unaligned unsigned data in
+ // platform-specific byte ordering.
+
+ // Use regular load and store for unaligned access.
+ //
+ // On z/Architecture, unaligned loads and stores are supported when using the
+ // "traditional" load (LH, L/LY, LG) and store (STH, ST/STY, STG) instructions.
+ // The penalty for unaligned access is just very few (two or three) ticks,
+ // plus another few (two or three) ticks if the access crosses a cache line boundary.
+ //
+ // In short, it makes no sense on z/Architecture to piecemeal get or put unaligned data.
+
+ // Returns true if the byte ordering used by Java is different from
+ // the native byte ordering of the underlying machine.
+ // z/Arch is big endian, thus, a swap between native and Java ordering
+ // is always a no-op.
+ static inline bool is_Java_byte_ordering_different() { return false; }
+
+ // Only swap on little endian machines => suffix `_le'.
+ static inline u2 swap_u2_le(u2 x) { return x; }
+ static inline u4 swap_u4_le(u4 x) { return x; }
+ static inline u8 swap_u8_le(u8 x) { return x; }
+
+ static inline u2 get_native_u2(address p) { return *(u2*)p; }
+ static inline u4 get_native_u4(address p) { return *(u4*)p; }
+ static inline u8 get_native_u8(address p) { return *(u8*)p; }
+
+ static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; }
+ static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; }
+ static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; }
+
+#include "bytes_linux_s390.inline.hpp"
+
+ // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
+ static inline u2 get_Java_u2(address p) { return get_native_u2(p); }
+ static inline u4 get_Java_u4(address p) { return get_native_u4(p); }
+ static inline u8 get_Java_u8(address p) { return get_native_u8(p); }
+
+ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); }
+ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); }
+ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); }
+};
+
+#endif // CPU_S390_VM_BYTES_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_s390.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+#undef CHECK_BAILOUT
+#define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; }
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+ bool throw_index_out_of_bounds_exception) :
+ _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception),
+ _index(index) {
+ assert(info != NULL, "must have info");
+ _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ if (_info->deoptimize_on_exception()) {
+ address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+ return;
+ }
+
+ // Pass the array index in Z_R1_scratch which is not managed by linear scan.
+ if (_index->is_cpu_register()) {
+ __ lgr_if_needed(Z_R1_scratch, _index->as_register());
+ } else {
+ __ load_const_optimized(Z_R1_scratch, _index->as_jint());
+ }
+
+ Runtime1::StubID stub_id;
+ if (_throw_index_out_of_bounds_exception) {
+ stub_id = Runtime1::throw_index_exception_id;
+ } else {
+ stub_id = Runtime1::throw_range_check_failed_id;
+ }
+ ce->emit_call_c(Runtime1::entry_for (stub_id));
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+ _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ Metadata *m = _method->as_constant_ptr()->as_metadata();
+ bool success = __ set_metadata_constant(m, Z_R1_scratch);
+ if (!success) {
+ ce->compilation()->bailout("const section overflow");
+ return;
+ }
+ ce->store_parameter(/*_method->as_register()*/ Z_R1_scratch, 1);
+ ce->store_parameter(_bci, 0);
+ ce->emit_call_c(Runtime1::entry_for (Runtime1::counter_overflow_id));
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+ if (_offset != -1) {
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+ }
+ __ bind(_entry);
+ ce->emit_call_c(Runtime1::entry_for (Runtime1::throw_div0_exception_id));
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+ address a;
+ if (_info->deoptimize_on_exception()) {
+ // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+ a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id);
+ } else {
+ a = Runtime1::entry_for (Runtime1::throw_null_pointer_exception_id);
+ }
+
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+ __ bind(_entry);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+// Note: pass object in Z_R1_scratch
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ if (_obj->is_valid()) {
+ __ z_lgr(Z_R1_scratch, _obj->as_register()); // _obj contains the optional argument to the stub
+ }
+ address a = Runtime1::entry_for (_stub);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+ _result = result;
+ _klass = klass;
+ _klass_reg = klass_reg;
+ _info = new CodeEmitInfo(info);
+ assert(stub_id == Runtime1::new_instance_id ||
+ stub_id == Runtime1::fast_new_instance_id ||
+ stub_id == Runtime1::fast_new_instance_init_check_id,
+ "need new_instance id");
+ _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+ address a = Runtime1::entry_for (_stub_id);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+ __ z_brul(_continuation);
+}
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+ _klass_reg = klass_reg;
+ _length = length;
+ _result = result;
+ _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+ __ lgr_if_needed(Z_R13, _length->as_register());
+ address a = Runtime1::entry_for (Runtime1::new_type_array_id);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+ __ z_brul(_continuation);
+}
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+ _klass_reg = klass_reg;
+ _length = length;
+ _result = result;
+ _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11");
+ __ lgr_if_needed(Z_R13, _length->as_register());
+ address a = Runtime1::entry_for (Runtime1::new_object_array_id);
+ ce->emit_call_c(a);
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,");
+ __ z_brul(_continuation);
+}
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+ : MonitorAccessStub(obj_reg, lock_reg) {
+ _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ Runtime1::StubID enter_id;
+ if (ce->compilation()->has_fpu_code()) {
+ enter_id = Runtime1::monitorenter_id;
+ } else {
+ enter_id = Runtime1::monitorenter_nofpu_id;
+ }
+ __ lgr_if_needed(Z_R1_scratch, _obj_reg->as_register());
+ __ lgr_if_needed(Z_R13, _lock_reg->as_register()); // See LIRGenerator::syncTempOpr().
+ ce->emit_call_c(Runtime1::entry_for (enter_id));
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ // Move address of the BasicObjectLock into Z_R1_scratch.
+ if (_compute_lock) {
+ // Lock_reg was destroyed by fast unlocking attempt => recompute it.
+ ce->monitor_address(_monitor_ix, FrameMap::as_opr(Z_R1_scratch));
+ } else {
+ __ lgr_if_needed(Z_R1_scratch, _lock_reg->as_register());
+ }
+ // Note: non-blocking leaf routine => no call info needed.
+ Runtime1::StubID exit_id;
+ if (ce->compilation()->has_fpu_code()) {
+ exit_id = Runtime1::monitorexit_id;
+ } else {
+ exit_id = Runtime1::monitorexit_nofpu_id;
+ }
+ ce->emit_call_c(Runtime1::entry_for (exit_id));
+ CHECK_BAILOUT();
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
+// - Replace original code with a call to the stub.
+// At Runtime:
+// - call to stub, jump to runtime.
+// - in runtime: Preserve all registers (especially objects, i.e., source and destination object).
+// - in runtime: After initializing class, restore original code, reexecute instruction.
+
+int PatchingStub::_patch_info_offset = - (12 /* load const */ + 2 /*BASR*/);
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+#ifndef PRODUCT
+ const char* bc;
+ switch (_id) {
+ case access_field_id: bc = "patch site (access_field)"; break;
+ case load_klass_id: bc = "patch site (load_klass)"; break;
+ case load_mirror_id: bc = "patch site (load_mirror)"; break;
+ case load_appendix_id: bc = "patch site (load_appendix)"; break;
+ default: bc = "patch site (unknown patch id)"; break;
+ }
+ masm->block_comment(bc);
+#endif
+
+ masm->align(round_to(NativeGeneralJump::instruction_size, wordSize));
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+ // Copy original code here.
+ assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+ "not enough room for call");
+
+ NearLabel call_patch;
+
+ int being_initialized_entry = __ offset();
+
+ if (_id == load_klass_id) {
+ // Produce a copy of the load klass instruction for use by the case being initialized.
+#ifdef ASSERT
+ address start = __ pc();
+#endif
+ AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(_index));
+ __ load_const(_obj, addrlit);
+
+#ifdef ASSERT
+ for (int i = 0; i < _bytes_to_copy; i++) {
+ address ptr = (address)(_pc_start + i);
+ int a_byte = (*ptr) & 0xFF;
+ assert(a_byte == *start++, "should be the same code");
+ }
+#endif
+ } else if (_id == load_mirror_id || _id == load_appendix_id) {
+ // Produce a copy of the load mirror instruction for use by the case being initialized.
+#ifdef ASSERT
+ address start = __ pc();
+#endif
+ AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(_index));
+ __ load_const(_obj, addrlit);
+
+#ifdef ASSERT
+ for (int i = 0; i < _bytes_to_copy; i++) {
+ address ptr = (address)(_pc_start + i);
+ int a_byte = (*ptr) & 0xFF;
+ assert(a_byte == *start++, "should be the same code");
+ }
+#endif
+ } else {
+ // Make a copy the code which is going to be patched.
+ for (int i = 0; i < _bytes_to_copy; i++) {
+ address ptr = (address)(_pc_start + i);
+ int a_byte = (*ptr) & 0xFF;
+ __ emit_int8 (a_byte);
+ }
+ }
+
+ address end_of_patch = __ pc();
+ int bytes_to_skip = 0;
+ if (_id == load_mirror_id) {
+ int offset = __ offset();
+ if (CommentedAssembly) {
+ __ block_comment(" being_initialized check");
+ }
+
+ // Static field accesses have special semantics while the class
+ // initializer is being run, so we emit a test which can be used to
+ // check that this code is being executed by the initializing
+ // thread.
+ assert(_obj != noreg, "must be a valid register");
+ assert(_index >= 0, "must have oop index");
+ __ z_lg(Z_R1_scratch, java_lang_Class::klass_offset_in_bytes(), _obj);
+ __ z_cg(Z_thread, Address(Z_R1_scratch, InstanceKlass::init_thread_offset()));
+ __ branch_optimized(Assembler::bcondNotEqual, call_patch);
+
+ // Load_klass patches may execute the patched code before it's
+ // copied back into place so we need to jump back into the main
+ // code of the nmethod to continue execution.
+ __ branch_optimized(Assembler::bcondAlways, _patch_site_continuation);
+
+ // Make sure this extra code gets skipped.
+ bytes_to_skip += __ offset() - offset;
+ }
+
+ // Now emit the patch record telling the runtime how to find the
+ // pieces of the patch. We only need 3 bytes but to help the disassembler
+ // we make the data look like a the following add instruction:
+ // A R1, D2(X2, B2)
+ // which requires 4 bytes.
+ int sizeof_patch_record = 4;
+ bytes_to_skip += sizeof_patch_record;
+
+ // Emit the offsets needed to find the code to patch.
+ int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
+
+ // Emit the patch record: opcode of the add followed by 3 bytes patch record data.
+ __ emit_int8((int8_t)(A_ZOPC>>24));
+ __ emit_int8(being_initialized_entry_offset);
+ __ emit_int8(bytes_to_skip);
+ __ emit_int8(_bytes_to_copy);
+ address patch_info_pc = __ pc();
+ assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+ address entry = __ pc();
+ NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+ address target = NULL;
+ relocInfo::relocType reloc_type = relocInfo::none;
+ switch (_id) {
+ case access_field_id: target = Runtime1::entry_for (Runtime1::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for (Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+ case load_mirror_id: target = Runtime1::entry_for (Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+ case load_appendix_id: target = Runtime1::entry_for (Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+ default: ShouldNotReachHere();
+ }
+ __ bind(call_patch);
+
+ if (CommentedAssembly) {
+ __ block_comment("patch entry point");
+ }
+ // Cannot use call_c_opt() because its size is not constant.
+ __ load_const(Z_R1_scratch, target); // Must not optimize in order to keep constant _patch_info_offset constant.
+ __ z_basr(Z_R14, Z_R1_scratch);
+ assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+ ce->add_call_info_here(_info);
+ __ z_brcl(Assembler::bcondAlways, _patch_site_entry);
+ if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+ CodeSection* cs = __ code_section();
+ address pc = (address)_pc_start;
+ RelocIterator iter(cs, pc, pc + 1);
+ relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none);
+ }
+}
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ __ load_const_optimized(Z_R1_scratch, _trap_request); // Pass trap request in Z_R1_scratch.
+ ce->emit_call_c(Runtime1::entry_for (Runtime1::deoptimize_id));
+ CHECK_BAILOUT();
+ ce->add_call_info_here(_info);
+ DEBUG_ONLY(__ should_not_reach_here());
+}
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+ // Slow case: call to native.
+ __ bind(_entry);
+ __ lgr_if_needed(Z_ARG1, src()->as_register());
+ __ lgr_if_needed(Z_ARG2, src_pos()->as_register());
+ __ lgr_if_needed(Z_ARG3, dst()->as_register());
+ __ lgr_if_needed(Z_ARG4, dst_pos()->as_register());
+ __ lgr_if_needed(Z_ARG5, length()->as_register());
+
+ // Must align calls sites, otherwise they can't be updated atomically on MP hardware.
+ ce->align_call(lir_static_call);
+
+ assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0,
+ "must be aligned");
+
+ ce->emit_static_call_stub();
+
+ // Prepend each BRASL with a nop.
+ __ relocate(relocInfo::static_call_type);
+ __ z_nop();
+ __ z_brasl(Z_R14, SharedRuntime::get_resolve_static_call_stub());
+ ce->add_call_info_here(info());
+ ce->verify_oop_map(info());
+
+#ifndef PRODUCT
+ __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_slowcase_cnt);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+#endif
+
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+ // At this point we know that marking is in progress.
+ // If do_load() is true then we have to emit the
+ // load of the previous value; otherwise it has already
+ // been loaded into _pre_val.
+ __ bind(_entry);
+ ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
+ assert(pre_val()->is_register(), "Precondition.");
+
+ Register pre_val_reg = pre_val()->as_register();
+
+ if (do_load()) {
+ ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+ }
+
+ __ z_ltgr(Z_R1_scratch, pre_val_reg); // Pass oop in Z_R1_scratch to Runtime1::g1_pre_barrier_slow_id.
+ __ branch_optimized(Assembler::bcondZero, _continuation);
+ ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_pre_barrier_slow_id));
+ CHECK_BAILOUT();
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
+ assert(addr()->is_register(), "Precondition.");
+ assert(new_val()->is_register(), "Precondition.");
+ Register new_val_reg = new_val()->as_register();
+ __ z_ltgr(new_val_reg, new_val_reg);
+ __ branch_optimized(Assembler::bcondZero, _continuation);
+ __ z_lgr(Z_R1_scratch, addr()->as_pointer_register());
+ ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_post_barrier_slow_id));
+ CHECK_BAILOUT();
+ __ branch_optimized(Assembler::bcondAlways, _continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_DEFS_S390_HPP
+#define CPU_S390_VM_C1_DEFS_S390_HPP
+
+// Native word offsets from memory address (big endian).
+enum {
+ pd_lo_word_offset_in_bytes = BytesPerInt,
+ pd_hi_word_offset_in_bytes = 0
+};
+
+// Explicit rounding operations are not required to implement the strictFP mode.
+enum {
+ pd_strict_fp_requires_explicit_rounding = false
+};
+
+// registers
+enum {
+ pd_nof_cpu_regs_frame_map = 16, // Number of registers used during code emission.
+ // Treat all registers as caller save (values of callee save are hard to find if caller is in runtime).
+ // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14
+ pd_nof_cpu_regs_unallocated = 6,
+ pd_nof_caller_save_cpu_regs_frame_map = pd_nof_cpu_regs_frame_map - pd_nof_cpu_regs_unallocated, // Number of cpu registers killed by calls.
+ pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // Number of registers that are visible to register allocator.
+ pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map,// Number of registers visible linear scan.
+ pd_first_cpu_reg = 0,
+ pd_last_cpu_reg = 9, // Others are unallocated (see FrameMap::initialize()).
+
+ pd_nof_fpu_regs_frame_map = 16, // Number of registers used during code emission.
+ pd_nof_fcpu_regs_unallocated = 1, // Leave Z_F15 unallocated and use it as scratch register.
+ pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated, // Number of fpu registers killed by calls.
+ pd_nof_fpu_regs_reg_alloc = pd_nof_caller_save_fpu_regs_frame_map, // Number of registers that are visible to register allocator.
+ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // Number of registers visible to linear scan.
+ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+ pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated - 1,
+
+ pd_nof_xmm_regs_linearscan = 0,
+ pd_nof_caller_save_xmm_regs = 0,
+ pd_first_xmm_reg = -1,
+ pd_last_xmm_reg = -1
+};
+
+// For debug info: a float value in a register is saved in single precision by runtime stubs.
+enum {
+ pd_float_saved_as_double = false
+};
+
+#endif // CPU_S390_VM_C1_DEFS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
+#define CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
+
+// No FPU stack on ZARCH_64
+class FpuStackSim;
+
+#endif // CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 7;
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) {
+ LIR_Opr opr = LIR_OprFact::illegalOpr;
+ VMReg r_1 = reg->first();
+ VMReg r_2 = reg->second();
+ if (r_1->is_stack()) {
+ // Convert stack slot to an SP offset.
+ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+ // so we must add it in here.
+ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+ opr = LIR_OprFact::address(new LIR_Address(Z_SP_opr, st_off, type));
+ } else if (r_1->is_Register()) {
+ Register reg = r_1->as_Register();
+ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+ opr = as_long_opr(reg);
+ } else if (type == T_OBJECT || type == T_ARRAY) {
+ opr = as_oop_opr(reg);
+ } else if (type == T_METADATA) {
+ opr = as_metadata_opr(reg);
+ } else {
+ opr = as_opr(reg);
+ }
+ } else if (r_1->is_FloatRegister()) {
+ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+ FloatRegister f = r_1->as_FloatRegister();
+ if (type == T_FLOAT) {
+ opr = as_float_opr(f);
+ } else {
+ opr = as_double_opr(f);
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+ return opr;
+}
+
+// FrameMap
+//--------------------------------------------------------
+
+FloatRegister FrameMap::_fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister
+int FrameMap::_fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr.
+
+// Some useful constant RInfo's:
+LIR_Opr FrameMap::Z_R0_opr;
+LIR_Opr FrameMap::Z_R1_opr;
+LIR_Opr FrameMap::Z_R2_opr;
+LIR_Opr FrameMap::Z_R3_opr;
+LIR_Opr FrameMap::Z_R4_opr;
+LIR_Opr FrameMap::Z_R5_opr;
+LIR_Opr FrameMap::Z_R6_opr;
+LIR_Opr FrameMap::Z_R7_opr;
+LIR_Opr FrameMap::Z_R8_opr;
+LIR_Opr FrameMap::Z_R9_opr;
+LIR_Opr FrameMap::Z_R10_opr;
+LIR_Opr FrameMap::Z_R11_opr;
+LIR_Opr FrameMap::Z_R12_opr;
+LIR_Opr FrameMap::Z_R13_opr;
+LIR_Opr FrameMap::Z_R14_opr;
+LIR_Opr FrameMap::Z_R15_opr;
+
+LIR_Opr FrameMap::Z_R0_oop_opr;
+LIR_Opr FrameMap::Z_R1_oop_opr;
+LIR_Opr FrameMap::Z_R2_oop_opr;
+LIR_Opr FrameMap::Z_R3_oop_opr;
+LIR_Opr FrameMap::Z_R4_oop_opr;
+LIR_Opr FrameMap::Z_R5_oop_opr;
+LIR_Opr FrameMap::Z_R6_oop_opr;
+LIR_Opr FrameMap::Z_R7_oop_opr;
+LIR_Opr FrameMap::Z_R8_oop_opr;
+LIR_Opr FrameMap::Z_R9_oop_opr;
+LIR_Opr FrameMap::Z_R10_oop_opr;
+LIR_Opr FrameMap::Z_R11_oop_opr;
+LIR_Opr FrameMap::Z_R12_oop_opr;
+LIR_Opr FrameMap::Z_R13_oop_opr;
+LIR_Opr FrameMap::Z_R14_oop_opr;
+LIR_Opr FrameMap::Z_R15_oop_opr;
+
+LIR_Opr FrameMap::Z_R0_metadata_opr;
+LIR_Opr FrameMap::Z_R1_metadata_opr;
+LIR_Opr FrameMap::Z_R2_metadata_opr;
+LIR_Opr FrameMap::Z_R3_metadata_opr;
+LIR_Opr FrameMap::Z_R4_metadata_opr;
+LIR_Opr FrameMap::Z_R5_metadata_opr;
+LIR_Opr FrameMap::Z_R6_metadata_opr;
+LIR_Opr FrameMap::Z_R7_metadata_opr;
+LIR_Opr FrameMap::Z_R8_metadata_opr;
+LIR_Opr FrameMap::Z_R9_metadata_opr;
+LIR_Opr FrameMap::Z_R10_metadata_opr;
+LIR_Opr FrameMap::Z_R11_metadata_opr;
+LIR_Opr FrameMap::Z_R12_metadata_opr;
+LIR_Opr FrameMap::Z_R13_metadata_opr;
+LIR_Opr FrameMap::Z_R14_metadata_opr;
+LIR_Opr FrameMap::Z_R15_metadata_opr;
+
+LIR_Opr FrameMap::Z_SP_opr;
+LIR_Opr FrameMap::Z_FP_opr;
+
+LIR_Opr FrameMap::Z_R2_long_opr;
+LIR_Opr FrameMap::Z_R10_long_opr;
+LIR_Opr FrameMap::Z_R11_long_opr;
+
+LIR_Opr FrameMap::Z_F0_opr;
+LIR_Opr FrameMap::Z_F0_double_opr;
+
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+
+// c1 rnr -> FloatRegister
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+ assert(_init_done, "tables not initialized");
+ debug_only(fpu_range_check(rnr);)
+ return _fpu_rnr2reg[rnr];
+}
+
+void FrameMap::map_float_register(int rnr, FloatRegister reg) {
+ debug_only(fpu_range_check(rnr);)
+ debug_only(fpu_range_check(reg->encoding());)
+ _fpu_rnr2reg[rnr] = reg; // mapping c1 regnr. -> FloatRegister
+ _fpu_reg2rnr[reg->encoding()] = rnr; // mapping assembler encoding -> c1 regnr.
+}
+
+void FrameMap::initialize() {
+ assert(!_init_done, "once");
+
+ DEBUG_ONLY(int allocated = 0;)
+ DEBUG_ONLY(int unallocated = 0;)
+
+ // Register usage:
+ // Z_thread (Z_R8)
+ // Z_fp (Z_R9)
+ // Z_SP (Z_R15)
+ DEBUG_ONLY(allocated++); map_register(0, Z_R2);
+ DEBUG_ONLY(allocated++); map_register(1, Z_R3);
+ DEBUG_ONLY(allocated++); map_register(2, Z_R4);
+ DEBUG_ONLY(allocated++); map_register(3, Z_R5);
+ DEBUG_ONLY(allocated++); map_register(4, Z_R6);
+ DEBUG_ONLY(allocated++); map_register(5, Z_R7);
+ DEBUG_ONLY(allocated++); map_register(6, Z_R10);
+ DEBUG_ONLY(allocated++); map_register(7, Z_R11);
+ DEBUG_ONLY(allocated++); map_register(8, Z_R12);
+ DEBUG_ONLY(allocated++); map_register(9, Z_R13); // <- last register visible in RegAlloc
+ DEBUG_ONLY(unallocated++); map_register(11, Z_R0); // Z_R0_scratch
+ DEBUG_ONLY(unallocated++); map_register(12, Z_R1); // Z_R1_scratch
+ DEBUG_ONLY(unallocated++); map_register(10, Z_R14); // return pc; TODO: Try to let c1/c2 allocate R14.
+
+ // The following registers are usually unavailable.
+ DEBUG_ONLY(unallocated++); map_register(13, Z_R8);
+ DEBUG_ONLY(unallocated++); map_register(14, Z_R9);
+ DEBUG_ONLY(unallocated++); map_register(15, Z_R15);
+ assert(allocated-1 == pd_last_cpu_reg, "wrong number/mapping of allocated CPU registers");
+ assert(unallocated == pd_nof_cpu_regs_unallocated, "wrong number of unallocated CPU registers");
+ assert(nof_cpu_regs == allocated+unallocated, "wrong number of CPU registers");
+
+ int j = 0;
+ for (int i = 0; i < nof_fpu_regs; i++) {
+ if (as_FloatRegister(i) == Z_fscratch_1) continue; // unallocated
+ map_float_register(j++, as_FloatRegister(i));
+ }
+ assert(j == nof_fpu_regs-1, "missed one fpu reg?");
+ map_float_register(j++, Z_fscratch_1);
+
+ _init_done = true;
+
+ Z_R0_opr = as_opr(Z_R0);
+ Z_R1_opr = as_opr(Z_R1);
+ Z_R2_opr = as_opr(Z_R2);
+ Z_R3_opr = as_opr(Z_R3);
+ Z_R4_opr = as_opr(Z_R4);
+ Z_R5_opr = as_opr(Z_R5);
+ Z_R6_opr = as_opr(Z_R6);
+ Z_R7_opr = as_opr(Z_R7);
+ Z_R8_opr = as_opr(Z_R8);
+ Z_R9_opr = as_opr(Z_R9);
+ Z_R10_opr = as_opr(Z_R10);
+ Z_R11_opr = as_opr(Z_R11);
+ Z_R12_opr = as_opr(Z_R12);
+ Z_R13_opr = as_opr(Z_R13);
+ Z_R14_opr = as_opr(Z_R14);
+ Z_R15_opr = as_opr(Z_R15);
+
+ Z_R0_oop_opr = as_oop_opr(Z_R0);
+ Z_R1_oop_opr = as_oop_opr(Z_R1);
+ Z_R2_oop_opr = as_oop_opr(Z_R2);
+ Z_R3_oop_opr = as_oop_opr(Z_R3);
+ Z_R4_oop_opr = as_oop_opr(Z_R4);
+ Z_R5_oop_opr = as_oop_opr(Z_R5);
+ Z_R6_oop_opr = as_oop_opr(Z_R6);
+ Z_R7_oop_opr = as_oop_opr(Z_R7);
+ Z_R8_oop_opr = as_oop_opr(Z_R8);
+ Z_R9_oop_opr = as_oop_opr(Z_R9);
+ Z_R10_oop_opr = as_oop_opr(Z_R10);
+ Z_R11_oop_opr = as_oop_opr(Z_R11);
+ Z_R12_oop_opr = as_oop_opr(Z_R12);
+ Z_R13_oop_opr = as_oop_opr(Z_R13);
+ Z_R14_oop_opr = as_oop_opr(Z_R14);
+ Z_R15_oop_opr = as_oop_opr(Z_R15);
+
+ Z_R0_metadata_opr = as_metadata_opr(Z_R0);
+ Z_R1_metadata_opr = as_metadata_opr(Z_R1);
+ Z_R2_metadata_opr = as_metadata_opr(Z_R2);
+ Z_R3_metadata_opr = as_metadata_opr(Z_R3);
+ Z_R4_metadata_opr = as_metadata_opr(Z_R4);
+ Z_R5_metadata_opr = as_metadata_opr(Z_R5);
+ Z_R6_metadata_opr = as_metadata_opr(Z_R6);
+ Z_R7_metadata_opr = as_metadata_opr(Z_R7);
+ Z_R8_metadata_opr = as_metadata_opr(Z_R8);
+ Z_R9_metadata_opr = as_metadata_opr(Z_R9);
+ Z_R10_metadata_opr = as_metadata_opr(Z_R10);
+ Z_R11_metadata_opr = as_metadata_opr(Z_R11);
+ Z_R12_metadata_opr = as_metadata_opr(Z_R12);
+ Z_R13_metadata_opr = as_metadata_opr(Z_R13);
+ Z_R14_metadata_opr = as_metadata_opr(Z_R14);
+ Z_R15_metadata_opr = as_metadata_opr(Z_R15);
+
+ // TODO: needed? Or can we make Z_R9 available for linear scan allocation.
+ Z_FP_opr = as_pointer_opr(Z_fp);
+ Z_SP_opr = as_pointer_opr(Z_SP);
+
+ Z_R2_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R2), cpu_reg2rnr(Z_R2));
+ Z_R10_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R10), cpu_reg2rnr(Z_R10));
+ Z_R11_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R11), cpu_reg2rnr(Z_R11));
+
+ Z_F0_opr = as_float_opr(Z_F0);
+ Z_F0_double_opr = as_double_opr(Z_F0);
+
+ // All allocated cpu regs are caller saved.
+ for (int c1rnr = 0; c1rnr < max_nof_caller_save_cpu_regs; c1rnr++) {
+ _caller_save_cpu_regs[c1rnr] = as_opr(cpu_rnr2reg(c1rnr));
+ }
+
+ // All allocated fpu regs are caller saved.
+ for (int c1rnr = 0; c1rnr < nof_caller_save_fpu_regs; c1rnr++) {
+ _caller_save_fpu_regs[c1rnr] = as_float_opr(nr2floatreg(c1rnr));
+ }
+}
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+ return Address(Z_SP, sp_offset);
+}
+
+VMReg FrameMap::fpu_regname (int n) {
+ return nr2floatreg(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+ return Z_SP_opr;
+}
+
+// JSR 292
+// On ZARCH_64, there is no need to save the SP, because neither
+// method handle intrinsics nor compiled lambda forms modify it.
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+ return LIR_OprFact::illegalOpr;
+}
+
+bool FrameMap::validate_frame() {
+ return true;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_FRAMEMAP_S390_HPP
+#define CPU_S390_VM_C1_FRAMEMAP_S390_HPP
+
+ public:
+
+ enum {
+ nof_reg_args = 5, // Registers Z_ARG1 - Z_ARG5 are available for parameter passing.
+ first_available_sp_in_frame = frame::z_abi_16_size,
+ frame_pad_in_bytes = 0
+ };
+
+ static const int pd_c_runtime_reserved_arg_size;
+
+ static LIR_Opr Z_R0_opr;
+ static LIR_Opr Z_R1_opr;
+ static LIR_Opr Z_R2_opr;
+ static LIR_Opr Z_R3_opr;
+ static LIR_Opr Z_R4_opr;
+ static LIR_Opr Z_R5_opr;
+ static LIR_Opr Z_R6_opr;
+ static LIR_Opr Z_R7_opr;
+ static LIR_Opr Z_R8_opr;
+ static LIR_Opr Z_R9_opr;
+ static LIR_Opr Z_R10_opr;
+ static LIR_Opr Z_R11_opr;
+ static LIR_Opr Z_R12_opr;
+ static LIR_Opr Z_R13_opr;
+ static LIR_Opr Z_R14_opr;
+ static LIR_Opr Z_R15_opr;
+
+ static LIR_Opr Z_R0_oop_opr;
+ static LIR_Opr Z_R1_oop_opr;
+ static LIR_Opr Z_R2_oop_opr;
+ static LIR_Opr Z_R3_oop_opr;
+ static LIR_Opr Z_R4_oop_opr;
+ static LIR_Opr Z_R5_oop_opr;
+ static LIR_Opr Z_R6_oop_opr;
+ static LIR_Opr Z_R7_oop_opr;
+ static LIR_Opr Z_R8_oop_opr;
+ static LIR_Opr Z_R9_oop_opr;
+ static LIR_Opr Z_R10_oop_opr;
+ static LIR_Opr Z_R11_oop_opr;
+ static LIR_Opr Z_R12_oop_opr;
+ static LIR_Opr Z_R13_oop_opr;
+ static LIR_Opr Z_R14_oop_opr;
+ static LIR_Opr Z_R15_oop_opr;
+
+ static LIR_Opr Z_R0_metadata_opr;
+ static LIR_Opr Z_R1_metadata_opr;
+ static LIR_Opr Z_R2_metadata_opr;
+ static LIR_Opr Z_R3_metadata_opr;
+ static LIR_Opr Z_R4_metadata_opr;
+ static LIR_Opr Z_R5_metadata_opr;
+ static LIR_Opr Z_R6_metadata_opr;
+ static LIR_Opr Z_R7_metadata_opr;
+ static LIR_Opr Z_R8_metadata_opr;
+ static LIR_Opr Z_R9_metadata_opr;
+ static LIR_Opr Z_R10_metadata_opr;
+ static LIR_Opr Z_R11_metadata_opr;
+ static LIR_Opr Z_R12_metadata_opr;
+ static LIR_Opr Z_R13_metadata_opr;
+ static LIR_Opr Z_R14_metadata_opr;
+ static LIR_Opr Z_R15_metadata_opr;
+
+ static LIR_Opr Z_SP_opr;
+ static LIR_Opr Z_FP_opr;
+
+ static LIR_Opr Z_R2_long_opr;
+ static LIR_Opr Z_R10_long_opr;
+ static LIR_Opr Z_R11_long_opr;
+
+ static LIR_Opr Z_F0_opr;
+ static LIR_Opr Z_F0_double_opr;
+
+ private:
+ static FloatRegister _fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister
+ static int _fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr.
+
+ static void map_float_register(int rnr, FloatRegister reg);
+
+ // FloatRegister -> c1 rnr
+ static int fpu_reg2rnr (FloatRegister reg) {
+ assert(_init_done, "tables not initialized");
+ int c1rnr = _fpu_reg2rnr[reg->encoding()];
+ debug_only(fpu_range_check(c1rnr);)
+ return c1rnr;
+ }
+
+ public:
+
+ static LIR_Opr as_long_opr(Register r) {
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+ }
+ static LIR_Opr as_pointer_opr(Register r) {
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+ }
+
+ static LIR_Opr as_float_opr(FloatRegister r) {
+ return LIR_OprFact::single_fpu(fpu_reg2rnr(r));
+ }
+ static LIR_Opr as_double_opr(FloatRegister r) {
+ return LIR_OprFact::double_fpu(fpu_reg2rnr(r));
+ }
+
+ static FloatRegister nr2floatreg (int rnr);
+
+ static VMReg fpu_regname (int n);
+
+ // No callee saved registers (saved values are not accessible if callee is in runtime).
+ static bool is_caller_save_register (LIR_Opr opr) { return true; }
+ static bool is_caller_save_register (Register r) { return true; }
+
+ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+ static int last_cpu_reg() { return pd_last_cpu_reg; }
+
+#endif // CPU_S390_VM_C1_FRAMEMAP_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,3037 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+#define __ _masm->
+
+#ifndef PRODUCT
+#undef __
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm) : _masm)->
+#endif
+
+//------------------------------------------------------------
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+ // Not used on ZARCH_64
+ ShouldNotCallThis();
+ return false;
+}
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+ return FrameMap::Z_R2_oop_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+ return FrameMap::Z_R2_opr;
+}
+
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+ return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+// Inline cache check: done before the frame is built.
+// The inline cached class is in Z_inline_cache(Z_R9).
+// We fetch the class of the receiver and compare it with the cached class.
+// If they do not match we jump to the slow case.
+int LIR_Assembler::check_icache() {
+ Register receiver = receiverOpr()->as_register();
+ int offset = __ offset();
+ __ inline_cache_check(receiver, Z_inline_cache);
+ return offset;
+}
+
+void LIR_Assembler::osr_entry() {
+ // On-stack-replacement entry sequence (interpreter frame layout described in interpreter_sparc.cpp):
+ //
+ // 1. Create a new compiled activation.
+ // 2. Initialize local variables in the compiled activation. The expression stack must be empty
+ // at the osr_bci; it is not initialized.
+ // 3. Jump to the continuation address in compiled code to resume execution.
+
+ // OSR entry point
+ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+ BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+ ValueStack* entry_state = osr_entry->end()->state();
+ int number_of_locks = entry_state->locks_size();
+
+ // Create a frame for the compiled activation.
+ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+ // OSR buffer is
+ //
+ // locals[nlocals-1..0]
+ // monitors[number_of_locks-1..0]
+ //
+ // Locals is a direct copy of the interpreter frame so in the osr buffer
+ // the first slot in the local array is the last local from the interpreter
+ // and the last slot is local[0] (receiver) from the interpreter
+ //
+ // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+ // in the interpreter frame (the method lock if a sync method)
+
+ // Initialize monitors in the compiled activation.
+ // I0: pointer to osr buffer
+ //
+ // All other registers are dead at this point and the locals will be
+ // copied into place by code emitted in the IR.
+
+ Register OSR_buf = osrBufferPointer()->as_register();
+ { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+ int monitor_offset = BytesPerWord * method()->max_locals() +
+ (2 * BytesPerWord) * (number_of_locks - 1);
+ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+ // the OSR buffer using 2 word entries: first the lock and then
+ // the oop.
+ for (int i = 0; i < number_of_locks; i++) {
+ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+ // Verify the interpreter's monitor has a non-null object.
+ __ asm_assert_mem8_isnot_zero(slot_offset + 1*BytesPerWord, OSR_buf, "locked object is NULL", __LINE__);
+ // Copy the lock field into the compiled activation.
+ __ z_lg(Z_R1_scratch, slot_offset + 0, OSR_buf);
+ __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_lock(i));
+ __ z_lg(Z_R1_scratch, slot_offset + 1*BytesPerWord, OSR_buf);
+ __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_object(i));
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+
+address LIR_Assembler::emit_call_c(address a) {
+ __ align_call_far_patchable(__ pc());
+ address call_addr = __ call_c_opt(a);
+ if (call_addr == NULL) {
+ bailout("const section overflow");
+ }
+ return call_addr;
+}
+
+int LIR_Assembler::emit_exception_handler() {
+ // If the last instruction is a call (typically to do a throw which
+ // is coming at the end after block reordering) the return address
+ // must still point into the code area in order to avoid assertion
+ // failures when searching for the corresponding bci. => Add a nop.
+ // (was bug 5/14/1999 - gri)
+ __ nop();
+
+ // Generate code for exception handler.
+ address handler_base = __ start_a_stub(exception_handler_size);
+ if (handler_base == NULL) {
+ // Not enough space left for the handler.
+ bailout("exception handler overflow");
+ return -1;
+ }
+
+ int offset = code_offset();
+
+ address a = Runtime1::entry_for (Runtime1::handle_exception_from_callee_id);
+ address call_addr = emit_call_c(a);
+ CHECK_BAILOUT_(-1);
+ __ should_not_reach_here();
+ guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+ __ end_a_stub();
+
+ return offset;
+}
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+ if (CommentedAssembly) {
+ _masm->block_comment("Unwind handler");
+ }
+#endif
+
+ int offset = code_offset();
+ Register exception_oop_callee_saved = Z_R10; // Z_R10 is callee-saved.
+ Register Rtmp1 = Z_R11;
+ Register Rtmp2 = Z_R12;
+
+ // Fetch the exception from TLS and clear out exception related thread state.
+ Address exc_oop_addr = Address(Z_thread, JavaThread::exception_oop_offset());
+ Address exc_pc_addr = Address(Z_thread, JavaThread::exception_pc_offset());
+ __ z_lg(Z_EXC_OOP, exc_oop_addr);
+ __ clear_mem(exc_oop_addr, sizeof(oop));
+ __ clear_mem(exc_pc_addr, sizeof(intptr_t));
+
+ __ bind(_unwind_handler_entry);
+ __ verify_not_null_oop(Z_EXC_OOP);
+ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+ __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP); // Preserve the exception.
+ }
+
+ // Preform needed unlocking.
+ MonitorExitStub* stub = NULL;
+ if (method()->is_synchronized()) {
+ // Runtime1::monitorexit_id expects lock address in Z_R1_scratch.
+ LIR_Opr lock = FrameMap::as_opr(Z_R1_scratch);
+ monitor_address(0, lock);
+ stub = new MonitorExitStub(lock, true, 0);
+ __ unlock_object(Rtmp1, Rtmp2, lock->as_register(), *stub->entry());
+ __ bind(*stub->continuation());
+ }
+
+ if (compilation()->env()->dtrace_method_probes()) {
+ ShouldNotReachHere(); // Not supported.
+#if 0
+ __ mov(rdi, r15_thread);
+ __ mov_metadata(rsi, method()->constant_encoding());
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
+#endif
+ }
+
+ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+ __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved); // Restore the exception.
+ }
+
+ // Remove the activation and dispatch to the unwind handler.
+ __ pop_frame();
+ __ z_lg(Z_EXC_PC, _z_abi16(return_pc), Z_SP);
+
+ // Z_EXC_OOP: exception oop
+ // Z_EXC_PC: exception pc
+
+ // Dispatch to the unwind logic.
+ __ load_const_optimized(Z_R5, Runtime1::entry_for (Runtime1::unwind_exception_id));
+ __ z_br(Z_R5);
+
+ // Emit the slow path assembly.
+ if (stub != NULL) {
+ stub->emit_code(this);
+ }
+
+ return offset;
+}
+
+int LIR_Assembler::emit_deopt_handler() {
+ // If the last instruction is a call (typically to do a throw which
+ // is coming at the end after block reordering) the return address
+ // must still point into the code area in order to avoid assertion
+ // failures when searching for the corresponding bci. => Add a nop.
+ // (was bug 5/14/1999 - gri)
+ __ nop();
+
+ // Generate code for exception handler.
+ address handler_base = __ start_a_stub(deopt_handler_size);
+ if (handler_base == NULL) {
+ // Not enough space left for the handler.
+ bailout("deopt handler overflow");
+ return -1;
+ } int offset = code_offset();
+ // Size must be constant (see HandlerImpl::emit_deopt_handler).
+ __ load_const(Z_R1_scratch, SharedRuntime::deopt_blob()->unpack());
+ __ call(Z_R1_scratch);
+ guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+ __ end_a_stub();
+
+ return offset;
+}
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+ if (o == NULL) {
+ __ clear_reg(reg, true/*64bit*/, false/*set cc*/); // Must not kill cc set by cmove.
+ } else {
+ AddressLiteral a = __ allocate_oop_address(o);
+ bool success = __ load_oop_from_toc(reg, a, reg);
+ if (!success) {
+ bailout("const section overflow");
+ }
+ }
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+ // Allocate a new index in table to hold the object once it's been patched.
+ int oop_index = __ oop_recorder()->allocate_oop_index(NULL);
+ PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index);
+
+ AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(oop_index));
+ assert(addrlit.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
+ // The NULL will be dynamically patched later so the sequence to
+ // load the address literal must not be optimized.
+ __ load_const(reg, addrlit);
+
+ patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+void LIR_Assembler::metadata2reg(Metadata* md, Register reg) {
+ bool success = __ set_metadata_constant(md, reg);
+ if (!success) {
+ bailout("const section overflow");
+ return;
+ }
+}
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
+ // Allocate a new index in table to hold the klass once it's been patched.
+ int index = __ oop_recorder()->allocate_metadata_index(NULL);
+ PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
+ AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(index));
+ assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc");
+ // The NULL will be dynamically patched later so the sequence to
+ // load the address literal must not be optimized.
+ __ load_const(reg, addrlit);
+
+ patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+ switch (op->code()) {
+ case lir_idiv:
+ case lir_irem:
+ arithmetic_idiv(op->code(),
+ op->in_opr1(),
+ op->in_opr2(),
+ op->in_opr3(),
+ op->result_opr(),
+ op->info());
+ break;
+ default: ShouldNotReachHere(); break;
+ }
+}
+
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+ if (op->block() != NULL) { _branch_target_blocks.append(op->block()); }
+ if (op->ublock() != NULL) { _branch_target_blocks.append(op->ublock()); }
+#endif
+
+ if (op->cond() == lir_cond_always) {
+ if (op->info() != NULL) { add_debug_info_for_branch(op->info()); }
+ __ branch_optimized(Assembler::bcondAlways, *(op->label()));
+ } else {
+ Assembler::branch_condition acond = Assembler::bcondZero;
+ if (op->code() == lir_cond_float_branch) {
+ assert(op->ublock() != NULL, "must have unordered successor");
+ __ branch_optimized(Assembler::bcondNotOrdered, *(op->ublock()->label()));
+ }
+ switch (op->cond()) {
+ case lir_cond_equal: acond = Assembler::bcondEqual; break;
+ case lir_cond_notEqual: acond = Assembler::bcondNotEqual; break;
+ case lir_cond_less: acond = Assembler::bcondLow; break;
+ case lir_cond_lessEqual: acond = Assembler::bcondNotHigh; break;
+ case lir_cond_greaterEqual: acond = Assembler::bcondNotLow; break;
+ case lir_cond_greater: acond = Assembler::bcondHigh; break;
+ case lir_cond_belowEqual: acond = Assembler::bcondNotHigh; break;
+ case lir_cond_aboveEqual: acond = Assembler::bcondNotLow; break;
+ default: ShouldNotReachHere();
+ }
+ __ branch_optimized(acond,*(op->label()));
+ }
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+ LIR_Opr src = op->in_opr();
+ LIR_Opr dest = op->result_opr();
+
+ switch (op->bytecode()) {
+ case Bytecodes::_i2l:
+ __ move_reg_if_needed(dest->as_register_lo(), T_LONG, src->as_register(), T_INT);
+ break;
+
+ case Bytecodes::_l2i:
+ __ move_reg_if_needed(dest->as_register(), T_INT, src->as_register_lo(), T_LONG);
+ break;
+
+ case Bytecodes::_i2b:
+ __ move_reg_if_needed(dest->as_register(), T_BYTE, src->as_register(), T_INT);
+ break;
+
+ case Bytecodes::_i2c:
+ __ move_reg_if_needed(dest->as_register(), T_CHAR, src->as_register(), T_INT);
+ break;
+
+ case Bytecodes::_i2s:
+ __ move_reg_if_needed(dest->as_register(), T_SHORT, src->as_register(), T_INT);
+ break;
+
+ case Bytecodes::_f2d:
+ assert(dest->is_double_fpu(), "check");
+ __ move_freg_if_needed(dest->as_double_reg(), T_DOUBLE, src->as_float_reg(), T_FLOAT);
+ break;
+
+ case Bytecodes::_d2f:
+ assert(dest->is_single_fpu(), "check");
+ __ move_freg_if_needed(dest->as_float_reg(), T_FLOAT, src->as_double_reg(), T_DOUBLE);
+ break;
+
+ case Bytecodes::_i2f:
+ __ z_cefbr(dest->as_float_reg(), src->as_register());
+ break;
+
+ case Bytecodes::_i2d:
+ __ z_cdfbr(dest->as_double_reg(), src->as_register());
+ break;
+
+ case Bytecodes::_l2f:
+ __ z_cegbr(dest->as_float_reg(), src->as_register_lo());
+ break;
+ case Bytecodes::_l2d:
+ __ z_cdgbr(dest->as_double_reg(), src->as_register_lo());
+ break;
+
+ case Bytecodes::_f2i:
+ case Bytecodes::_f2l: {
+ Label done;
+ FloatRegister Rsrc = src->as_float_reg();
+ Register Rdst = (op->bytecode() == Bytecodes::_f2i ? dest->as_register() : dest->as_register_lo());
+ __ clear_reg(Rdst, true, false);
+ __ z_cebr(Rsrc, Rsrc);
+ __ z_brno(done); // NaN -> 0
+ if (op->bytecode() == Bytecodes::_f2i) {
+ __ z_cfebr(Rdst, Rsrc, Assembler::to_zero);
+ } else { // op->bytecode() == Bytecodes::_f2l
+ __ z_cgebr(Rdst, Rsrc, Assembler::to_zero);
+ }
+ __ bind(done);
+ }
+ break;
+
+ case Bytecodes::_d2i:
+ case Bytecodes::_d2l: {
+ Label done;
+ FloatRegister Rsrc = src->as_double_reg();
+ Register Rdst = (op->bytecode() == Bytecodes::_d2i ? dest->as_register() : dest->as_register_lo());
+ __ clear_reg(Rdst, true, false); // Don't set CC.
+ __ z_cdbr(Rsrc, Rsrc);
+ __ z_brno(done); // NaN -> 0
+ if (op->bytecode() == Bytecodes::_d2i) {
+ __ z_cfdbr(Rdst, Rsrc, Assembler::to_zero);
+ } else { // Bytecodes::_d2l
+ __ z_cgdbr(Rdst, Rsrc, Assembler::to_zero);
+ }
+ __ bind(done);
+ }
+ break;
+
+ default: ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::align_call(LIR_Code code) {
+ // End of call instruction must be 4 byte aligned.
+ int offset = __ offset();
+ switch (code) {
+ case lir_icvirtual_call:
+ offset += MacroAssembler::load_const_from_toc_size();
+ // no break
+ case lir_static_call:
+ case lir_optvirtual_call:
+ case lir_dynamic_call:
+ offset += NativeCall::call_far_pcrelative_displacement_offset;
+ break;
+ case lir_virtual_call: // currently, sparc-specific for niagara
+ default: ShouldNotReachHere();
+ }
+ if ((offset & (NativeCall::call_far_pcrelative_displacement_alignment-1)) != 0) {
+ __ nop();
+ }
+}
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+ assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0,
+ "must be aligned (offset=%d)", __ offset());
+ assert(rtype == relocInfo::none ||
+ rtype == relocInfo::opt_virtual_call_type ||
+ rtype == relocInfo::static_call_type, "unexpected rtype");
+ // Prepend each BRASL with a nop.
+ __ relocate(rtype);
+ __ z_nop();
+ __ z_brasl(Z_R14, op->addr());
+ add_call_info(code_offset(), op->info());
+}
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+ address virtual_call_oop_addr = NULL;
+ AddressLiteral empty_ic((address) Universe::non_oop_word());
+ virtual_call_oop_addr = __ pc();
+ bool success = __ load_const_from_toc(Z_inline_cache, empty_ic);
+ if (!success) {
+ bailout("const section overflow");
+ return;
+ }
+
+ // CALL to fixup routine. Fixup routine uses ScopeDesc info
+ // to determine who we intended to call.
+ __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr));
+ call(op, relocInfo::none);
+}
+
+// not supported
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+ ShouldNotReachHere();
+}
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+ if (from_reg != to_reg) __ z_lgr(to_reg, from_reg);
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+ assert(src->is_constant(), "should not call otherwise");
+ assert(dest->is_stack(), "should not call otherwise");
+ LIR_Const* c = src->as_constant_ptr();
+
+ unsigned int lmem = 0;
+ unsigned int lcon = 0;
+ int64_t cbits = 0;
+ Address dest_addr;
+ switch (c->type()) {
+ case T_INT: // fall through
+ case T_FLOAT:
+ dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+ lmem = 4; lcon = 4; cbits = c->as_jint_bits();
+ break;
+
+ case T_ADDRESS:
+ dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+ lmem = 8; lcon = 4; cbits = c->as_jint_bits();
+ break;
+
+ case T_OBJECT:
+ dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+ if (c->as_jobject() == NULL) {
+ __ store_const(dest_addr, (int64_t)NULL_WORD, 8, 8);
+ } else {
+ jobject2reg(c->as_jobject(), Z_R1_scratch);
+ __ reg2mem_opt(Z_R1_scratch, dest_addr, true);
+ }
+ return;
+
+ case T_LONG: // fall through
+ case T_DOUBLE:
+ dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+ lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits());
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ __ store_const(dest_addr, cbits, lmem, lcon);
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+ assert(src->is_constant(), "should not call otherwise");
+ assert(dest->is_address(), "should not call otherwise");
+ // See special case in LIRGenerator::do_StoreIndexed.
+ // T_BYTE: Special case for card mark store.
+ assert(type == T_BYTE || !dest->as_address_ptr()->index()->is_valid(), "not supported");
+ LIR_Const* c = src->as_constant_ptr();
+ Address addr = as_Address(dest->as_address_ptr());
+
+ int store_offset = -1;
+ unsigned int lmem = 0;
+ unsigned int lcon = 0;
+ int64_t cbits = 0;
+ switch (type) {
+ case T_INT: // fall through
+ case T_FLOAT:
+ lmem = 4; lcon = 4; cbits = c->as_jint_bits();
+ break;
+
+ case T_ADDRESS:
+ lmem = 8; lcon = 4; cbits = c->as_jint_bits();
+ break;
+
+ case T_OBJECT: // fall through
+ case T_ARRAY:
+ if (c->as_jobject() == NULL) {
+ if (UseCompressedOops && !wide) {
+ store_offset = __ store_const(addr, (int32_t)NULL_WORD, 4, 4);
+ } else {
+ store_offset = __ store_const(addr, (int64_t)NULL_WORD, 8, 8);
+ }
+ } else {
+ jobject2reg(c->as_jobject(), Z_R1_scratch);
+ if (UseCompressedOops && !wide) {
+ __ encode_heap_oop(Z_R1_scratch);
+ store_offset = __ reg2mem_opt(Z_R1_scratch, addr, false);
+ } else {
+ store_offset = __ reg2mem_opt(Z_R1_scratch, addr, true);
+ }
+ }
+ assert(store_offset >= 0, "check");
+ break;
+
+ case T_LONG: // fall through
+ case T_DOUBLE:
+ lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits());
+ break;
+
+ case T_BOOLEAN: // fall through
+ case T_BYTE:
+ lmem = 1; lcon = 1; cbits = (int8_t)(c->as_jint());
+ break;
+
+ case T_CHAR: // fall through
+ case T_SHORT:
+ lmem = 2; lcon = 2; cbits = (int16_t)(c->as_jint());
+ break;
+
+ default:
+ ShouldNotReachHere();
+ };
+
+ // Index register is normally not supported, but for
+ // LIRGenerator::CardTableModRef_post_barrier we make an exception.
+ if (type == T_BYTE && dest->as_address_ptr()->index()->is_valid()) {
+ __ load_const_optimized(Z_R0_scratch, (int8_t)(c->as_jint()));
+ store_offset = __ offset();
+ if (Immediate::is_uimm12(addr.disp())) {
+ __ z_stc(Z_R0_scratch, addr);
+ } else {
+ __ z_stcy(Z_R0_scratch, addr);
+ }
+ }
+
+ if (store_offset == -1) {
+ store_offset = __ store_const(addr, cbits, lmem, lcon);
+ assert(store_offset >= 0, "check");
+ }
+
+ if (info != NULL) {
+ add_debug_info_for_null_check(store_offset, info);
+ }
+}
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+ assert(src->is_constant(), "should not call otherwise");
+ assert(dest->is_register(), "should not call otherwise");
+ LIR_Const* c = src->as_constant_ptr();
+
+ switch (c->type()) {
+ case T_INT: {
+ assert(patch_code == lir_patch_none, "no patching handled here");
+ __ load_const_optimized(dest->as_register(), c->as_jint());
+ break;
+ }
+
+ case T_ADDRESS: {
+ assert(patch_code == lir_patch_none, "no patching handled here");
+ __ load_const_optimized(dest->as_register(), c->as_jint());
+ break;
+ }
+
+ case T_LONG: {
+ assert(patch_code == lir_patch_none, "no patching handled here");
+ __ load_const_optimized(dest->as_register_lo(), (intptr_t)c->as_jlong());
+ break;
+ }
+
+ case T_OBJECT: {
+ if (patch_code != lir_patch_none) {
+ jobject2reg_with_patching(dest->as_register(), info);
+ } else {
+ jobject2reg(c->as_jobject(), dest->as_register());
+ }
+ break;
+ }
+
+ case T_METADATA: {
+ if (patch_code != lir_patch_none) {
+ klass2reg_with_patching(dest->as_register(), info);
+ } else {
+ metadata2reg(c->as_metadata(), dest->as_register());
+ }
+ break;
+ }
+
+ case T_FLOAT: {
+ Register toc_reg = Z_R1_scratch;
+ __ load_toc(toc_reg);
+ address const_addr = __ float_constant(c->as_jfloat());
+ if (const_addr == NULL) {
+ bailout("const section overflow");
+ break;
+ }
+ int displ = const_addr - _masm->code()->consts()->start();
+ if (dest->is_single_fpu()) {
+ __ z_ley(dest->as_float_reg(), displ, toc_reg);
+ } else {
+ assert(dest->is_single_cpu(), "Must be a cpu register.");
+ __ z_ly(dest->as_register(), displ, toc_reg);
+ }
+ }
+ break;
+
+ case T_DOUBLE: {
+ Register toc_reg = Z_R1_scratch;
+ __ load_toc(toc_reg);
+ address const_addr = __ double_constant(c->as_jdouble());
+ if (const_addr == NULL) {
+ bailout("const section overflow");
+ break;
+ }
+ int displ = const_addr - _masm->code()->consts()->start();
+ if (dest->is_double_fpu()) {
+ __ z_ldy(dest->as_double_reg(), displ, toc_reg);
+ } else {
+ assert(dest->is_double_cpu(), "Must be a long register.");
+ __ z_lg(dest->as_register_lo(), displ, toc_reg);
+ }
+ }
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+ if (addr->base()->is_illegal()) {
+ Unimplemented();
+ }
+
+ Register base = addr->base()->as_pointer_register();
+
+ if (addr->index()->is_illegal()) {
+ return Address(base, addr->disp());
+ } else if (addr->index()->is_cpu_register()) {
+ Register index = addr->index()->as_pointer_register();
+ return Address(base, index, addr->disp());
+ } else if (addr->index()->is_constant()) {
+ intptr_t addr_offset = addr->index()->as_constant_ptr()->as_jint() + addr->disp();
+ return Address(base, addr_offset);
+ } else {
+ ShouldNotReachHere();
+ return Address();
+ }
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+ switch (type) {
+ case T_INT:
+ case T_FLOAT: {
+ Register tmp = Z_R1_scratch;
+ Address from = frame_map()->address_for_slot(src->single_stack_ix());
+ Address to = frame_map()->address_for_slot(dest->single_stack_ix());
+ __ mem2reg_opt(tmp, from, false);
+ __ reg2mem_opt(tmp, to, false);
+ break;
+ }
+ case T_ADDRESS:
+ case T_OBJECT: {
+ Register tmp = Z_R1_scratch;
+ Address from = frame_map()->address_for_slot(src->single_stack_ix());
+ Address to = frame_map()->address_for_slot(dest->single_stack_ix());
+ __ mem2reg_opt(tmp, from, true);
+ __ reg2mem_opt(tmp, to, true);
+ break;
+ }
+ case T_LONG:
+ case T_DOUBLE: {
+ Register tmp = Z_R1_scratch;
+ Address from = frame_map()->address_for_double_slot(src->double_stack_ix());
+ Address to = frame_map()->address_for_double_slot(dest->double_stack_ix());
+ __ mem2reg_opt(tmp, from, true);
+ __ reg2mem_opt(tmp, to, true);
+ break;
+ }
+
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+// 4-byte accesses only! Don't use it to access 8 bytes!
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+ ShouldNotCallThis();
+ return 0; // unused
+}
+
+// 4-byte accesses only! Don't use it to access 8 bytes!
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+ ShouldNotCallThis();
+ return 0; // unused
+}
+
+void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
+ CodeEmitInfo* info, bool wide, bool unaligned) {
+
+ assert(type != T_METADATA, "load of metadata ptr not supported");
+ LIR_Address* addr = src_opr->as_address_ptr();
+ LIR_Opr to_reg = dest;
+
+ Register src = addr->base()->as_pointer_register();
+ Register disp_reg = Z_R0;
+ int disp_value = addr->disp();
+ bool needs_patching = (patch_code != lir_patch_none);
+
+ if (addr->base()->type() == T_OBJECT) {
+ __ verify_oop(src);
+ }
+
+ PatchingStub* patch = NULL;
+ if (needs_patching) {
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ assert(!to_reg->is_double_cpu() ||
+ patch_code == lir_patch_none ||
+ patch_code == lir_patch_normal, "patching doesn't match register");
+ }
+
+ if (addr->index()->is_illegal()) {
+ if (!Immediate::is_simm20(disp_value)) {
+ if (needs_patching) {
+ __ load_const(Z_R1_scratch, (intptr_t)0);
+ } else {
+ __ load_const_optimized(Z_R1_scratch, disp_value);
+ }
+ disp_reg = Z_R1_scratch;
+ disp_value = 0;
+ }
+ } else {
+ if (!Immediate::is_simm20(disp_value)) {
+ __ load_const_optimized(Z_R1_scratch, disp_value);
+ __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register());
+ disp_reg = Z_R1_scratch;
+ disp_value = 0;
+ }
+ disp_reg = addr->index()->as_pointer_register();
+ }
+
+ // Remember the offset of the load. The patching_epilog must be done
+ // before the call to add_debug_info, otherwise the PcDescs don't get
+ // entered in increasing order.
+ int offset = code_offset();
+
+ assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up");
+
+ bool short_disp = Immediate::is_uimm12(disp_value);
+
+ switch (type) {
+ case T_BOOLEAN: // fall through
+ case T_BYTE : __ z_lb(dest->as_register(), disp_value, disp_reg, src); break;
+ case T_CHAR : __ z_llgh(dest->as_register(), disp_value, disp_reg, src); break;
+ case T_SHORT :
+ if (short_disp) {
+ __ z_lh(dest->as_register(), disp_value, disp_reg, src);
+ } else {
+ __ z_lhy(dest->as_register(), disp_value, disp_reg, src);
+ }
+ break;
+ case T_INT :
+ if (short_disp) {
+ __ z_l(dest->as_register(), disp_value, disp_reg, src);
+ } else {
+ __ z_ly(dest->as_register(), disp_value, disp_reg, src);
+ }
+ break;
+ case T_ADDRESS:
+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+ __ z_llgf(dest->as_register(), disp_value, disp_reg, src);
+ __ decode_klass_not_null(dest->as_register());
+ } else {
+ __ z_lg(dest->as_register(), disp_value, disp_reg, src);
+ }
+ break;
+ case T_ARRAY : // fall through
+ case T_OBJECT:
+ {
+ if (UseCompressedOops && !wide) {
+ __ z_llgf(dest->as_register(), disp_value, disp_reg, src);
+ __ oop_decoder(dest->as_register(), dest->as_register(), true);
+ } else {
+ __ z_lg(dest->as_register(), disp_value, disp_reg, src);
+ }
+ break;
+ }
+ case T_FLOAT:
+ if (short_disp) {
+ __ z_le(dest->as_float_reg(), disp_value, disp_reg, src);
+ } else {
+ __ z_ley(dest->as_float_reg(), disp_value, disp_reg, src);
+ }
+ break;
+ case T_DOUBLE:
+ if (short_disp) {
+ __ z_ld(dest->as_double_reg(), disp_value, disp_reg, src);
+ } else {
+ __ z_ldy(dest->as_double_reg(), disp_value, disp_reg, src);
+ }
+ break;
+ case T_LONG : __ z_lg(dest->as_register_lo(), disp_value, disp_reg, src); break;
+ default : ShouldNotReachHere();
+ }
+ if (type == T_ARRAY || type == T_OBJECT) {
+ __ verify_oop(dest->as_register());
+ }
+
+ if (patch != NULL) {
+ patching_epilog(patch, patch_code, src, info);
+ }
+ if (info != NULL) add_debug_info_for_null_check(offset, info);
+}
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+ assert(src->is_stack(), "should not call otherwise");
+ assert(dest->is_register(), "should not call otherwise");
+
+ if (dest->is_single_cpu()) {
+ if (type == T_ARRAY || type == T_OBJECT) {
+ __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true);
+ __ verify_oop(dest->as_register());
+ } else if (type == T_METADATA) {
+ __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true);
+ } else {
+ __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), false);
+ }
+ } else if (dest->is_double_cpu()) {
+ Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix());
+ __ mem2reg_opt(dest->as_register_lo(), src_addr_LO, true);
+ } else if (dest->is_single_fpu()) {
+ Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
+ __ mem2freg_opt(dest->as_float_reg(), src_addr, false);
+ } else if (dest->is_double_fpu()) {
+ Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
+ __ mem2freg_opt(dest->as_double_reg(), src_addr, true);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+ assert(src->is_register(), "should not call otherwise");
+ assert(dest->is_stack(), "should not call otherwise");
+
+ if (src->is_single_cpu()) {
+ const Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
+ if (type == T_OBJECT || type == T_ARRAY) {
+ __ verify_oop(src->as_register());
+ __ reg2mem_opt(src->as_register(), dst, true);
+ } else if (type == T_METADATA) {
+ __ reg2mem_opt(src->as_register(), dst, true);
+ } else {
+ __ reg2mem_opt(src->as_register(), dst, false);
+ }
+ } else if (src->is_double_cpu()) {
+ Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix());
+ __ reg2mem_opt(src->as_register_lo(), dstLO, true);
+ } else if (src->is_single_fpu()) {
+ Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+ __ freg2mem_opt(src->as_float_reg(), dst_addr, false);
+ } else if (src->is_double_fpu()) {
+ Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+ __ freg2mem_opt(src->as_double_reg(), dst_addr, true);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) {
+ if (from_reg->is_float_kind() && to_reg->is_float_kind()) {
+ if (from_reg->is_double_fpu()) {
+ // double to double moves
+ assert(to_reg->is_double_fpu(), "should match");
+ __ z_ldr(to_reg->as_double_reg(), from_reg->as_double_reg());
+ } else {
+ // float to float moves
+ assert(to_reg->is_single_fpu(), "should match");
+ __ z_ler(to_reg->as_float_reg(), from_reg->as_float_reg());
+ }
+ } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) {
+ if (from_reg->is_double_cpu()) {
+ __ z_lgr(to_reg->as_pointer_register(), from_reg->as_pointer_register());
+ } else if (to_reg->is_double_cpu()) {
+ // int to int moves
+ __ z_lgr(to_reg->as_register_lo(), from_reg->as_register());
+ } else {
+ // int to int moves
+ __ z_lgr(to_reg->as_register(), from_reg->as_register());
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+ if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) {
+ __ verify_oop(to_reg->as_register());
+ }
+}
+
+void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type,
+ LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack,
+ bool wide, bool unaligned) {
+ assert(type != T_METADATA, "store of metadata ptr not supported");
+ LIR_Address* addr = dest_opr->as_address_ptr();
+
+ Register dest = addr->base()->as_pointer_register();
+ Register disp_reg = Z_R0;
+ int disp_value = addr->disp();
+ bool needs_patching = (patch_code != lir_patch_none);
+
+ if (addr->base()->is_oop_register()) {
+ __ verify_oop(dest);
+ }
+
+ PatchingStub* patch = NULL;
+ if (needs_patching) {
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ assert(!from->is_double_cpu() ||
+ patch_code == lir_patch_none ||
+ patch_code == lir_patch_normal, "patching doesn't match register");
+ }
+
+ assert(!needs_patching || (!Immediate::is_simm20(disp_value) && addr->index()->is_illegal()), "assumption");
+ if (addr->index()->is_illegal()) {
+ if (!Immediate::is_simm20(disp_value)) {
+ if (needs_patching) {
+ __ load_const(Z_R1_scratch, (intptr_t)0);
+ } else {
+ __ load_const_optimized(Z_R1_scratch, disp_value);
+ }
+ disp_reg = Z_R1_scratch;
+ disp_value = 0;
+ }
+ } else {
+ if (!Immediate::is_simm20(disp_value)) {
+ __ load_const_optimized(Z_R1_scratch, disp_value);
+ __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register());
+ disp_reg = Z_R1_scratch;
+ disp_value = 0;
+ }
+ disp_reg = addr->index()->as_pointer_register();
+ }
+
+ assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up");
+
+ if (type == T_ARRAY || type == T_OBJECT) {
+ __ verify_oop(from->as_register());
+ }
+
+ bool short_disp = Immediate::is_uimm12(disp_value);
+
+ // Remember the offset of the store. The patching_epilog must be done
+ // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get
+ // entered in increasing order.
+ int offset = code_offset();
+ switch (type) {
+ case T_BOOLEAN: // fall through
+ case T_BYTE :
+ if (short_disp) {
+ __ z_stc(from->as_register(), disp_value, disp_reg, dest);
+ } else {
+ __ z_stcy(from->as_register(), disp_value, disp_reg, dest);
+ }
+ break;
+ case T_CHAR : // fall through
+ case T_SHORT :
+ if (short_disp) {
+ __ z_sth(from->as_register(), disp_value, disp_reg, dest);
+ } else {
+ __ z_sthy(from->as_register(), disp_value, disp_reg, dest);
+ }
+ break;
+ case T_INT :
+ if (short_disp) {
+ __ z_st(from->as_register(), disp_value, disp_reg, dest);
+ } else {
+ __ z_sty(from->as_register(), disp_value, disp_reg, dest);
+ }
+ break;
+ case T_LONG : __ z_stg(from->as_register_lo(), disp_value, disp_reg, dest); break;
+ case T_ADDRESS: __ z_stg(from->as_register(), disp_value, disp_reg, dest); break;
+ break;
+ case T_ARRAY : // fall through
+ case T_OBJECT:
+ {
+ if (UseCompressedOops && !wide) {
+ Register compressed_src = Z_R14;
+ __ z_lgr(compressed_src, from->as_register());
+ __ encode_heap_oop(compressed_src);
+ offset = code_offset();
+ if (short_disp) {
+ __ z_st(compressed_src, disp_value, disp_reg, dest);
+ } else {
+ __ z_sty(compressed_src, disp_value, disp_reg, dest);
+ }
+ } else {
+ __ z_stg(from->as_register(), disp_value, disp_reg, dest);
+ }
+ break;
+ }
+ case T_FLOAT :
+ if (short_disp) {
+ __ z_ste(from->as_float_reg(), disp_value, disp_reg, dest);
+ } else {
+ __ z_stey(from->as_float_reg(), disp_value, disp_reg, dest);
+ }
+ break;
+ case T_DOUBLE:
+ if (short_disp) {
+ __ z_std(from->as_double_reg(), disp_value, disp_reg, dest);
+ } else {
+ __ z_stdy(from->as_double_reg(), disp_value, disp_reg, dest);
+ }
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ if (patch != NULL) {
+ patching_epilog(patch, patch_code, dest, info);
+ }
+
+ if (info != NULL) add_debug_info_for_null_check(offset, info);
+}
+
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+ assert(result->is_illegal() ||
+ (result->is_single_cpu() && result->as_register() == Z_R2) ||
+ (result->is_double_cpu() && result->as_register_lo() == Z_R2) ||
+ (result->is_single_fpu() && result->as_float_reg() == Z_F0) ||
+ (result->is_double_fpu() && result->as_double_reg() == Z_F0), "convention");
+
+ AddressLiteral pp(os::get_polling_page());
+ __ load_const_optimized(Z_R1_scratch, pp);
+
+ // Pop the frame before the safepoint code.
+ int retPC_offset = initial_frame_size_in_bytes() + _z_abi16(return_pc);
+ if (Displacement::is_validDisp(retPC_offset)) {
+ __ z_lg(Z_R14, retPC_offset, Z_SP);
+ __ add2reg(Z_SP, initial_frame_size_in_bytes());
+ } else {
+ __ add2reg(Z_SP, initial_frame_size_in_bytes());
+ __ restore_return_pc();
+ }
+
+ // We need to mark the code position where the load from the safepoint
+ // polling page was emitted as relocInfo::poll_return_type here.
+ __ relocate(relocInfo::poll_return_type);
+ __ load_from_polling_page(Z_R1_scratch);
+
+ __ z_br(Z_R14); // Return to caller.
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+ AddressLiteral pp(os::get_polling_page());
+ __ load_const_optimized(tmp->as_register_lo(), pp);
+ guarantee(info != NULL, "Shouldn't be NULL");
+ add_debug_info_for_branch(info);
+ int offset = __ offset();
+ __ relocate(relocInfo::poll_type);
+ __ load_from_polling_page(tmp->as_register_lo());
+ return offset;
+}
+
+void LIR_Assembler::emit_static_call_stub() {
+
+ // Stub is fixed up when the corresponding call is converted from calling
+ // compiled code to calling interpreted code.
+
+ address call_pc = __ pc();
+ address stub = __ start_a_stub(call_stub_size);
+ if (stub == NULL) {
+ bailout("static call stub overflow");
+ return;
+ }
+
+ int start = __ offset();
+
+ __ relocate(static_stub_Relocation::spec(call_pc));
+
+ // See also Matcher::interpreter_method_oop_reg().
+ AddressLiteral meta = __ allocate_metadata_address(NULL);
+ bool success = __ load_const_from_toc(Z_method, meta);
+
+ __ set_inst_mark();
+ AddressLiteral a((address)-1);
+ success = success && __ load_const_from_toc(Z_R1, a);
+ if (!success) {
+ bailout("const section overflow");
+ return;
+ }
+
+ __ z_br(Z_R1);
+ assert(__ offset() - start <= call_stub_size, "stub too big");
+ __ end_a_stub(); // Update current stubs pointer and restore insts_end.
+}
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+ bool unsigned_comp = condition == lir_cond_belowEqual || condition == lir_cond_aboveEqual;
+ if (opr1->is_single_cpu()) {
+ Register reg1 = opr1->as_register();
+ if (opr2->is_single_cpu()) {
+ // cpu register - cpu register
+ if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+ __ z_clgr(reg1, opr2->as_register());
+ } else {
+ assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+ if (unsigned_comp) {
+ __ z_clr(reg1, opr2->as_register());
+ } else {
+ __ z_cr(reg1, opr2->as_register());
+ }
+ }
+ } else if (opr2->is_stack()) {
+ // cpu register - stack
+ if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+ __ z_cg(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+ } else {
+ if (unsigned_comp) {
+ __ z_cly(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+ } else {
+ __ z_cy(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+ }
+ }
+ } else if (opr2->is_constant()) {
+ // cpu register - constant
+ LIR_Const* c = opr2->as_constant_ptr();
+ if (c->type() == T_INT) {
+ if (unsigned_comp) {
+ __ z_clfi(reg1, c->as_jint());
+ } else {
+ __ z_cfi(reg1, c->as_jint());
+ }
+ } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
+ // In 64bit oops are single register.
+ jobject o = c->as_jobject();
+ if (o == NULL) {
+ __ z_ltgr(reg1, reg1);
+ } else {
+ jobject2reg(o, Z_R1_scratch);
+ __ z_cgr(reg1, Z_R1_scratch);
+ }
+ } else {
+ fatal("unexpected type: %s", basictype_to_str(c->type()));
+ }
+ // cpu register - address
+ } else if (opr2->is_address()) {
+ if (op->info() != NULL) {
+ add_debug_info_for_null_check_here(op->info());
+ }
+ if (unsigned_comp) {
+ __ z_cly(reg1, as_Address(opr2->as_address_ptr()));
+ } else {
+ __ z_cy(reg1, as_Address(opr2->as_address_ptr()));
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+
+ } else if (opr1->is_double_cpu()) {
+ assert(!unsigned_comp, "unexpected");
+ Register xlo = opr1->as_register_lo();
+ Register xhi = opr1->as_register_hi();
+ if (opr2->is_double_cpu()) {
+ __ z_cgr(xlo, opr2->as_register_lo());
+ } else if (opr2->is_constant()) {
+ // cpu register - constant 0
+ assert(opr2->as_jlong() == (jlong)0, "only handles zero");
+ __ z_ltgr(xlo, xlo);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ } else if (opr1->is_single_fpu()) {
+ if (opr2->is_single_fpu()) {
+ __ z_cebr(opr1->as_float_reg(), opr2->as_float_reg());
+ } else {
+ // stack slot
+ Address addr = frame_map()->address_for_slot(opr2->single_stack_ix());
+ if (Immediate::is_uimm12(addr.disp())) {
+ __ z_ceb(opr1->as_float_reg(), addr);
+ } else {
+ __ z_ley(Z_fscratch_1, addr);
+ __ z_cebr(opr1->as_float_reg(), Z_fscratch_1);
+ }
+ }
+ } else if (opr1->is_double_fpu()) {
+ if (opr2->is_double_fpu()) {
+ __ z_cdbr(opr1->as_double_reg(), opr2->as_double_reg());
+ } else {
+ // stack slot
+ Address addr = frame_map()->address_for_slot(opr2->double_stack_ix());
+ if (Immediate::is_uimm12(addr.disp())) {
+ __ z_cdb(opr1->as_double_reg(), addr);
+ } else {
+ __ z_ldy(Z_fscratch_1, addr);
+ __ z_cdbr(opr1->as_double_reg(), Z_fscratch_1);
+ }
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
+ Label done;
+ Register dreg = dst->as_register();
+
+ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+ assert((left->is_single_fpu() && right->is_single_fpu()) ||
+ (left->is_double_fpu() && right->is_double_fpu()), "unexpected operand types");
+ bool is_single = left->is_single_fpu();
+ bool is_unordered_less = (code == lir_ucmp_fd2i);
+ FloatRegister lreg = is_single ? left->as_float_reg() : left->as_double_reg();
+ FloatRegister rreg = is_single ? right->as_float_reg() : right->as_double_reg();
+ if (is_single) {
+ __ z_cebr(lreg, rreg);
+ } else {
+ __ z_cdbr(lreg, rreg);
+ }
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi(dreg, 0);
+ __ z_locgr(dreg, one, is_unordered_less ? Assembler::bcondHigh : Assembler::bcondHighOrNotOrdered);
+ __ z_locgr(dreg, minus_one, is_unordered_less ? Assembler::bcondLowOrNotOrdered : Assembler::bcondLow);
+ } else {
+ __ clear_reg(dreg, true, false);
+ __ z_bre(done); // if (left == right) dst = 0
+
+ // if (left > right || ((code ~= cmpg) && (left <> right)) dst := 1
+ __ z_lhi(dreg, 1);
+ __ z_brc(is_unordered_less ? Assembler::bcondHigh : Assembler::bcondHighOrNotOrdered, done);
+
+ // if (left < right || ((code ~= cmpl) && (left <> right)) dst := -1
+ __ z_lhi(dreg, -1);
+ }
+ } else {
+ assert(code == lir_cmp_l2i, "check");
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_cgr(left->as_register_lo(), right->as_register_lo());
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi(dreg, 0);
+ __ z_locgr(dreg, one, Assembler::bcondHigh);
+ __ z_locgr(dreg, minus_one, Assembler::bcondLow);
+ } else {
+ __ z_cgr(left->as_register_lo(), right->as_register_lo());
+ __ z_lghi(dreg, 0); // eq value
+ __ z_bre(done);
+ __ z_lghi(dreg, 1); // gt value
+ __ z_brh(done);
+ __ z_lghi(dreg, -1); // lt value
+ }
+ }
+ __ bind(done);
+}
+
+// result = condition ? opr1 : opr2
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+ Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
+ switch (condition) {
+ case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break;
+ case lir_cond_notEqual: acond = Assembler::bcondNotEqual; ncond = Assembler::bcondEqual; break;
+ case lir_cond_less: acond = Assembler::bcondLow; ncond = Assembler::bcondNotLow; break;
+ case lir_cond_lessEqual: acond = Assembler::bcondNotHigh; ncond = Assembler::bcondHigh; break;
+ case lir_cond_greaterEqual: acond = Assembler::bcondNotLow; ncond = Assembler::bcondLow; break;
+ case lir_cond_greater: acond = Assembler::bcondHigh; ncond = Assembler::bcondNotHigh; break;
+ case lir_cond_belowEqual: acond = Assembler::bcondNotHigh; ncond = Assembler::bcondHigh; break;
+ case lir_cond_aboveEqual: acond = Assembler::bcondNotLow; ncond = Assembler::bcondLow; break;
+ default: ShouldNotReachHere();
+ }
+
+ if (opr1->is_cpu_register()) {
+ reg2reg(opr1, result);
+ } else if (opr1->is_stack()) {
+ stack2reg(opr1, result, result->type());
+ } else if (opr1->is_constant()) {
+ const2reg(opr1, result, lir_patch_none, NULL);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ if (VM_Version::has_LoadStoreConditional() && !opr2->is_constant()) {
+ // Optimized version that does not require a branch.
+ if (opr2->is_single_cpu()) {
+ assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move");
+ __ z_locgr(result->as_register(), opr2->as_register(), ncond);
+ } else if (opr2->is_double_cpu()) {
+ assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
+ assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
+ __ z_locgr(result->as_register_lo(), opr2->as_register_lo(), ncond);
+ } else if (opr2->is_single_stack()) {
+ __ z_loc(result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()), ncond);
+ } else if (opr2->is_double_stack()) {
+ __ z_locg(result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix()), ncond);
+ } else {
+ ShouldNotReachHere();
+ }
+ } else {
+ Label skip;
+ __ z_brc(acond, skip);
+ if (opr2->is_cpu_register()) {
+ reg2reg(opr2, result);
+ } else if (opr2->is_stack()) {
+ stack2reg(opr2, result, result->type());
+ } else if (opr2->is_constant()) {
+ const2reg(opr2, result, lir_patch_none, NULL);
+ } else {
+ ShouldNotReachHere();
+ }
+ __ bind(skip);
+ }
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+ CodeEmitInfo* info, bool pop_fpu_stack) {
+ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+ if (left->is_single_cpu()) {
+ assert(left == dest, "left and dest must be equal");
+ Register lreg = left->as_register();
+
+ if (right->is_single_cpu()) {
+ // cpu register - cpu register
+ Register rreg = right->as_register();
+ switch (code) {
+ case lir_add: __ z_ar (lreg, rreg); break;
+ case lir_sub: __ z_sr (lreg, rreg); break;
+ case lir_mul: __ z_msr(lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+
+ } else if (right->is_stack()) {
+ // cpu register - stack
+ Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
+ switch (code) {
+ case lir_add: __ z_ay(lreg, raddr); break;
+ case lir_sub: __ z_sy(lreg, raddr); break;
+ default: ShouldNotReachHere();
+ }
+
+ } else if (right->is_constant()) {
+ // cpu register - constant
+ jint c = right->as_constant_ptr()->as_jint();
+ switch (code) {
+ case lir_add: __ z_agfi(lreg, c); break;
+ case lir_sub: __ z_agfi(lreg, -c); break; // note: -min_jint == min_jint
+ case lir_mul: __ z_msfi(lreg, c); break;
+ default: ShouldNotReachHere();
+ }
+
+ } else {
+ ShouldNotReachHere();
+ }
+
+ } else if (left->is_double_cpu()) {
+ assert(left == dest, "left and dest must be equal");
+ Register lreg_lo = left->as_register_lo();
+ Register lreg_hi = left->as_register_hi();
+
+ if (right->is_double_cpu()) {
+ // cpu register - cpu register
+ Register rreg_lo = right->as_register_lo();
+ Register rreg_hi = right->as_register_hi();
+ assert_different_registers(lreg_lo, rreg_lo);
+ switch (code) {
+ case lir_add:
+ __ z_agr(lreg_lo, rreg_lo);
+ break;
+ case lir_sub:
+ __ z_sgr(lreg_lo, rreg_lo);
+ break;
+ case lir_mul:
+ __ z_msgr(lreg_lo, rreg_lo);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ } else if (right->is_constant()) {
+ // cpu register - constant
+ jlong c = right->as_constant_ptr()->as_jlong_bits();
+ switch (code) {
+ case lir_add: __ z_agfi(lreg_lo, c); break;
+ case lir_sub:
+ if (c != min_jint) {
+ __ z_agfi(lreg_lo, -c);
+ } else {
+ // -min_jint cannot be represented as simm32 in z_agfi
+ // min_jint sign extended: 0xffffffff80000000
+ // -min_jint as 64 bit integer: 0x0000000080000000
+ // 0x80000000 can be represented as uimm32 in z_algfi
+ // lreg_lo := lreg_lo + -min_jint == lreg_lo + 0x80000000
+ __ z_algfi(lreg_lo, UCONST64(0x80000000));
+ }
+ break;
+ case lir_mul: __ z_msgfi(lreg_lo, c); break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ } else {
+ ShouldNotReachHere();
+ }
+
+ } else if (left->is_single_fpu()) {
+ assert(left == dest, "left and dest must be equal");
+ FloatRegister lreg = left->as_float_reg();
+ FloatRegister rreg = right->is_single_fpu() ? right->as_float_reg() : fnoreg;
+ Address raddr;
+
+ if (rreg == fnoreg) {
+ assert(right->is_single_stack(), "constants should be loaded into register");
+ raddr = frame_map()->address_for_slot(right->single_stack_ix());
+ if (!Immediate::is_uimm12(raddr.disp())) {
+ __ mem2freg_opt(rreg = Z_fscratch_1, raddr, false);
+ }
+ }
+
+ if (rreg != fnoreg) {
+ switch (code) {
+ case lir_add: __ z_aebr(lreg, rreg); break;
+ case lir_sub: __ z_sebr(lreg, rreg); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ z_meebr(lreg, rreg); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ z_debr(lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ switch (code) {
+ case lir_add: __ z_aeb(lreg, raddr); break;
+ case lir_sub: __ z_seb(lreg, raddr); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ z_meeb(lreg, raddr); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ z_deb(lreg, raddr); break;
+ default: ShouldNotReachHere();
+ }
+ }
+ } else if (left->is_double_fpu()) {
+ assert(left == dest, "left and dest must be equal");
+ FloatRegister lreg = left->as_double_reg();
+ FloatRegister rreg = right->is_double_fpu() ? right->as_double_reg() : fnoreg;
+ Address raddr;
+
+ if (rreg == fnoreg) {
+ assert(right->is_double_stack(), "constants should be loaded into register");
+ raddr = frame_map()->address_for_slot(right->double_stack_ix());
+ if (!Immediate::is_uimm12(raddr.disp())) {
+ __ mem2freg_opt(rreg = Z_fscratch_1, raddr, true);
+ }
+ }
+
+ if (rreg != fnoreg) {
+ switch (code) {
+ case lir_add: __ z_adbr(lreg, rreg); break;
+ case lir_sub: __ z_sdbr(lreg, rreg); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ z_mdbr(lreg, rreg); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ z_ddbr(lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ switch (code) {
+ case lir_add: __ z_adb(lreg, raddr); break;
+ case lir_sub: __ z_sdb(lreg, raddr); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ z_mdb(lreg, raddr); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ z_ddb(lreg, raddr); break;
+ default: ShouldNotReachHere();
+ }
+ }
+ } else if (left->is_address()) {
+ assert(left == dest, "left and dest must be equal");
+ assert(code == lir_add, "unsupported operation");
+ assert(right->is_constant(), "unsupported operand");
+ jint c = right->as_constant_ptr()->as_jint();
+ LIR_Address* lir_addr = left->as_address_ptr();
+ Address addr = as_Address(lir_addr);
+ switch (lir_addr->type()) {
+ case T_INT:
+ __ add2mem_32(addr, c, Z_R1_scratch);
+ break;
+ case T_LONG:
+ __ add2mem_64(addr, c, Z_R1_scratch);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::fpop() {
+ // do nothing
+}
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
+ switch (code) {
+ case lir_sqrt: {
+ assert(!thread->is_valid(), "there is no need for a thread_reg for dsqrt");
+ FloatRegister src_reg = value->as_double_reg();
+ FloatRegister dst_reg = dest->as_double_reg();
+ __ z_sqdbr(dst_reg, src_reg);
+ break;
+ }
+ case lir_abs: {
+ assert(!thread->is_valid(), "there is no need for a thread_reg for fabs");
+ FloatRegister src_reg = value->as_double_reg();
+ FloatRegister dst_reg = dest->as_double_reg();
+ __ z_lpdbr(dst_reg, src_reg);
+ break;
+ }
+ default: {
+ ShouldNotReachHere();
+ break;
+ }
+ }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+ if (left->is_single_cpu()) {
+ Register reg = left->as_register();
+ if (right->is_constant()) {
+ int val = right->as_constant_ptr()->as_jint();
+ switch (code) {
+ case lir_logic_and: __ z_nilf(reg, val); break;
+ case lir_logic_or: __ z_oilf(reg, val); break;
+ case lir_logic_xor: __ z_xilf(reg, val); break;
+ default: ShouldNotReachHere();
+ }
+ } else if (right->is_stack()) {
+ Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
+ switch (code) {
+ case lir_logic_and: __ z_ny(reg, raddr); break;
+ case lir_logic_or: __ z_oy(reg, raddr); break;
+ case lir_logic_xor: __ z_xy(reg, raddr); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ Register rright = right->as_register();
+ switch (code) {
+ case lir_logic_and: __ z_nr(reg, rright); break;
+ case lir_logic_or : __ z_or(reg, rright); break;
+ case lir_logic_xor: __ z_xr(reg, rright); break;
+ default: ShouldNotReachHere();
+ }
+ }
+ move_regs(reg, dst->as_register());
+ } else {
+ Register l_lo = left->as_register_lo();
+ if (right->is_constant()) {
+ __ load_const_optimized(Z_R1_scratch, right->as_constant_ptr()->as_jlong());
+ switch (code) {
+ case lir_logic_and:
+ __ z_ngr(l_lo, Z_R1_scratch);
+ break;
+ case lir_logic_or:
+ __ z_ogr(l_lo, Z_R1_scratch);
+ break;
+ case lir_logic_xor:
+ __ z_xgr(l_lo, Z_R1_scratch);
+ break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ Register r_lo;
+ if (right->type() == T_OBJECT || right->type() == T_ARRAY) {
+ r_lo = right->as_register();
+ } else {
+ r_lo = right->as_register_lo();
+ }
+ switch (code) {
+ case lir_logic_and:
+ __ z_ngr(l_lo, r_lo);
+ break;
+ case lir_logic_or:
+ __ z_ogr(l_lo, r_lo);
+ break;
+ case lir_logic_xor:
+ __ z_xgr(l_lo, r_lo);
+ break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ Register dst_lo = dst->as_register_lo();
+
+ move_regs(l_lo, dst_lo);
+ }
+}
+
+// See operand selection in LIRGenerator::do_ArithmeticOp_Int().
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
+ if (left->is_double_cpu()) {
+ // 64 bit integer case
+ assert(left->is_double_cpu(), "left must be register");
+ assert(right->is_double_cpu() || is_power_of_2_long(right->as_jlong()),
+ "right must be register or power of 2 constant");
+ assert(result->is_double_cpu(), "result must be register");
+
+ Register lreg = left->as_register_lo();
+ Register dreg = result->as_register_lo();
+
+ if (right->is_constant()) {
+ // Convert division by a power of two into some shifts and logical operations.
+ Register treg1 = Z_R0_scratch;
+ Register treg2 = Z_R1_scratch;
+ jlong divisor = right->as_jlong();
+ jlong log_divisor = log2_long(right->as_jlong());
+
+ if (divisor == min_jlong) {
+ // Min_jlong is special. Result is '0' except for min_jlong/min_jlong = 1.
+ if (dreg == lreg) {
+ NearLabel done;
+ __ load_const_optimized(treg2, min_jlong);
+ __ z_cgr(lreg, treg2);
+ __ z_lghi(dreg, 0); // Preserves condition code.
+ __ z_brne(done);
+ __ z_lghi(dreg, 1); // min_jlong / min_jlong = 1
+ __ bind(done);
+ } else {
+ assert_different_registers(dreg, lreg);
+ NearLabel done;
+ __ z_lghi(dreg, 0);
+ __ compare64_and_branch(lreg, min_jlong, Assembler::bcondNotEqual, done);
+ __ z_lghi(dreg, 1);
+ __ bind(done);
+ }
+ return;
+ }
+ __ move_reg_if_needed(dreg, T_LONG, lreg, T_LONG);
+ if (divisor == 2) {
+ __ z_srlg(treg2, dreg, 63); // dividend < 0 ? 1 : 0
+ } else {
+ __ z_srag(treg2, dreg, 63); // dividend < 0 ? -1 : 0
+ __ and_imm(treg2, divisor - 1, treg1, true);
+ }
+ if (code == lir_idiv) {
+ __ z_agr(dreg, treg2);
+ __ z_srag(dreg, dreg, log_divisor);
+ } else {
+ assert(code == lir_irem, "check");
+ __ z_agr(treg2, dreg);
+ __ and_imm(treg2, ~(divisor - 1), treg1, true);
+ __ z_sgr(dreg, treg2);
+ }
+ return;
+ }
+
+ // Divisor is not a power of 2 constant.
+ Register rreg = right->as_register_lo();
+ Register treg = temp->as_register_lo();
+ assert(right->is_double_cpu(), "right must be register");
+ assert(lreg == Z_R11, "see ldivInOpr()");
+ assert(rreg != lreg, "right register must not be same as left register");
+ assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10) ||
+ (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see ldivInOpr(), ldivOutOpr(), lremOutOpr()");
+
+ Register R1 = lreg->predecessor();
+ Register R2 = rreg;
+ assert(code != lir_idiv || lreg==dreg, "see code below");
+ if (code == lir_idiv) {
+ __ z_lcgr(lreg, lreg);
+ } else {
+ __ clear_reg(dreg, true, false);
+ }
+ NearLabel done;
+ __ compare64_and_branch(R2, -1, Assembler::bcondEqual, done);
+ if (code == lir_idiv) {
+ __ z_lcgr(lreg, lreg); // Revert lcgr above.
+ }
+ if (ImplicitDiv0Checks) {
+ // No debug info because the idiv won't trap.
+ // Add_debug_info_for_div0 would instantiate another DivByZeroStub,
+ // which is unnecessary, too.
+ add_debug_info_for_div0(__ offset(), info);
+ }
+ __ z_dsgr(R1, R2);
+ __ bind(done);
+ return;
+ }
+
+ // 32 bit integer case
+
+ assert(left->is_single_cpu(), "left must be register");
+ assert(right->is_single_cpu() || is_power_of_2(right->as_jint()), "right must be register or power of 2 constant");
+ assert(result->is_single_cpu(), "result must be register");
+
+ Register lreg = left->as_register();
+ Register dreg = result->as_register();
+
+ if (right->is_constant()) {
+ // Convert division by a power of two into some shifts and logical operations.
+ Register treg1 = Z_R0_scratch;
+ Register treg2 = Z_R1_scratch;
+ jlong divisor = right->as_jint();
+ jlong log_divisor = log2_long(right->as_jint());
+ __ move_reg_if_needed(dreg, T_LONG, lreg, T_INT); // sign extend
+ if (divisor == 2) {
+ __ z_srlg(treg2, dreg, 63); // dividend < 0 ? 1 : 0
+ } else {
+ __ z_srag(treg2, dreg, 63); // dividend < 0 ? -1 : 0
+ __ and_imm(treg2, divisor - 1, treg1, true);
+ }
+ if (code == lir_idiv) {
+ __ z_agr(dreg, treg2);
+ __ z_srag(dreg, dreg, log_divisor);
+ } else {
+ assert(code == lir_irem, "check");
+ __ z_agr(treg2, dreg);
+ __ and_imm(treg2, ~(divisor - 1), treg1, true);
+ __ z_sgr(dreg, treg2);
+ }
+ return;
+ }
+
+ // Divisor is not a power of 2 constant.
+ Register rreg = right->as_register();
+ Register treg = temp->as_register();
+ assert(right->is_single_cpu(), "right must be register");
+ assert(lreg == Z_R11, "left register must be rax,");
+ assert(rreg != lreg, "right register must not be same as left register");
+ assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10)
+ || (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see divInOpr(), divOutOpr(), remOutOpr()");
+
+ Register R1 = lreg->predecessor();
+ Register R2 = rreg;
+ __ move_reg_if_needed(lreg, T_LONG, lreg, T_INT); // sign extend
+ if (ImplicitDiv0Checks) {
+ // No debug info because the idiv won't trap.
+ // Add_debug_info_for_div0 would instantiate another DivByZeroStub,
+ // which is unnecessary, too.
+ add_debug_info_for_div0(__ offset(), info);
+ }
+ __ z_dsgfr(R1, R2);
+}
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+ assert(exceptionOop->as_register() == Z_EXC_OOP, "should match");
+ assert(exceptionPC->as_register() == Z_EXC_PC, "should match");
+
+ // Exception object is not added to oop map by LinearScan
+ // (LinearScan assumes that no oops are in fixed registers).
+ info->add_register_oop(exceptionOop);
+
+ // Reuse the debug info from the safepoint poll for the throw op itself.
+ __ get_PC(Z_EXC_PC);
+ add_call_info(__ offset(), info); // for exception handler
+ address stub = Runtime1::entry_for (compilation()->has_fpu_code() ? Runtime1::handle_exception_id
+ : Runtime1::handle_exception_nofpu_id);
+ emit_call_c(stub);
+}
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+ assert(exceptionOop->as_register() == Z_EXC_OOP, "should match");
+
+ __ branch_optimized(Assembler::bcondAlways, _unwind_handler_entry);
+}
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+ ciArrayKlass* default_type = op->expected_type();
+ Register src = op->src()->as_register();
+ Register dst = op->dst()->as_register();
+ Register src_pos = op->src_pos()->as_register();
+ Register dst_pos = op->dst_pos()->as_register();
+ Register length = op->length()->as_register();
+ Register tmp = op->tmp()->as_register();
+
+ CodeStub* stub = op->stub();
+ int flags = op->flags();
+ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+ if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+ // If we don't know anything, just go through the generic arraycopy.
+ if (default_type == NULL) {
+ Label done;
+ // Save outgoing arguments in callee saved registers (C convention) in case
+ // a call to System.arraycopy is needed.
+ Register callee_saved_src = Z_R10;
+ Register callee_saved_src_pos = Z_R11;
+ Register callee_saved_dst = Z_R12;
+ Register callee_saved_dst_pos = Z_R13;
+ Register callee_saved_length = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved.
+
+ __ lgr_if_needed(callee_saved_src, src);
+ __ lgr_if_needed(callee_saved_src_pos, src_pos);
+ __ lgr_if_needed(callee_saved_dst, dst);
+ __ lgr_if_needed(callee_saved_dst_pos, dst_pos);
+ __ lgr_if_needed(callee_saved_length, length);
+
+ // C function requires 64 bit values.
+ __ z_lgfr(src_pos, src_pos);
+ __ z_lgfr(dst_pos, dst_pos);
+ __ z_lgfr(length, length);
+
+ address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+
+ address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+ // Pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint.
+
+ // The arguments are in the corresponding registers.
+ assert(Z_ARG1 == src, "assumption");
+ assert(Z_ARG2 == src_pos, "assumption");
+ assert(Z_ARG3 == dst, "assumption");
+ assert(Z_ARG4 == dst_pos, "assumption");
+ assert(Z_ARG5 == length, "assumption");
+ if (copyfunc_addr == NULL) { // Use C version if stub was not generated.
+ emit_call_c(C_entry);
+ } else {
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_generic_arraycopystub_cnt);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+ }
+#endif
+ emit_call_c(copyfunc_addr);
+ }
+ CHECK_BAILOUT();
+
+ __ compare32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation());
+
+ if (copyfunc_addr != NULL) {
+ __ z_lgr(tmp, Z_RET);
+ __ z_xilf(tmp, -1);
+ }
+
+ // Restore values from callee saved registers so they are where the stub
+ // expects them.
+ __ lgr_if_needed(src, callee_saved_src);
+ __ lgr_if_needed(src_pos, callee_saved_src_pos);
+ __ lgr_if_needed(dst, callee_saved_dst);
+ __ lgr_if_needed(dst_pos, callee_saved_dst_pos);
+ __ lgr_if_needed(length, callee_saved_length);
+
+ if (copyfunc_addr != NULL) {
+ __ z_sr(length, tmp);
+ __ z_ar(src_pos, tmp);
+ __ z_ar(dst_pos, tmp);
+ }
+ __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+
+ __ bind(*stub->continuation());
+ return;
+ }
+
+ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
+
+ int elem_size = type2aelembytes(basic_type);
+ int shift_amount;
+
+ switch (elem_size) {
+ case 1 :
+ shift_amount = 0;
+ break;
+ case 2 :
+ shift_amount = 1;
+ break;
+ case 4 :
+ shift_amount = 2;
+ break;
+ case 8 :
+ shift_amount = 3;
+ break;
+ default:
+ shift_amount = -1;
+ ShouldNotReachHere();
+ }
+
+ Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+ Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+ Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+ Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+ // Length and pos's are all sign extended at this point on 64bit.
+
+ // test for NULL
+ if (flags & LIR_OpArrayCopy::src_null_check) {
+ __ compareU64_and_branch(src, (intptr_t)0, Assembler::bcondZero, *stub->entry());
+ }
+ if (flags & LIR_OpArrayCopy::dst_null_check) {
+ __ compareU64_and_branch(dst, (intptr_t)0, Assembler::bcondZero, *stub->entry());
+ }
+
+ // Check if negative.
+ if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+ __ compare32_and_branch(src_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry());
+ }
+ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+ __ compare32_and_branch(dst_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry());
+ }
+
+ // If the compiler was not able to prove that exact type of the source or the destination
+ // of the arraycopy is an array type, check at runtime if the source or the destination is
+ // an instance type.
+ if (flags & LIR_OpArrayCopy::type_check) {
+ assert(Klass::_lh_neutral_value == 0, "or replace z_lt instructions");
+
+ if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+ __ load_klass(tmp, dst);
+ __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+ __ branch_optimized(Assembler::bcondNotLow, *stub->entry());
+ }
+
+ if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+ __ load_klass(tmp, src);
+ __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+ __ branch_optimized(Assembler::bcondNotLow, *stub->entry());
+ }
+ }
+
+ if (flags & LIR_OpArrayCopy::src_range_check) {
+ __ z_la(tmp, Address(src_pos, length));
+ __ z_cl(tmp, src_length_addr);
+ __ branch_optimized(Assembler::bcondHigh, *stub->entry());
+ }
+ if (flags & LIR_OpArrayCopy::dst_range_check) {
+ __ z_la(tmp, Address(dst_pos, length));
+ __ z_cl(tmp, dst_length_addr);
+ __ branch_optimized(Assembler::bcondHigh, *stub->entry());
+ }
+
+ if (flags & LIR_OpArrayCopy::length_positive_check) {
+ __ z_ltr(length, length);
+ __ branch_optimized(Assembler::bcondNegative, *stub->entry());
+ }
+
+ // Stubs require 64 bit values.
+ __ z_lgfr(src_pos, src_pos); // int -> long
+ __ z_lgfr(dst_pos, dst_pos); // int -> long
+ __ z_lgfr(length, length); // int -> long
+
+ if (flags & LIR_OpArrayCopy::type_check) {
+ // We don't know the array types are compatible.
+ if (basic_type != T_OBJECT) {
+ // Simple test for basic type arrays.
+ if (UseCompressedClassPointers) {
+ __ z_l(tmp, src_klass_addr);
+ __ z_c(tmp, dst_klass_addr);
+ } else {
+ __ z_lg(tmp, src_klass_addr);
+ __ z_cg(tmp, dst_klass_addr);
+ }
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+ } else {
+ // For object arrays, if src is a sub class of dst then we can
+ // safely do the copy.
+ NearLabel cont, slow;
+ Register src_klass = Z_R1_scratch;
+ Register dst_klass = Z_R10;
+
+ __ load_klass(src_klass, src);
+ __ load_klass(dst_klass, dst);
+
+ __ check_klass_subtype_fast_path(src_klass, dst_klass, tmp, &cont, &slow, NULL);
+
+ store_parameter(src_klass, 0); // sub
+ store_parameter(dst_klass, 1); // super
+ emit_call_c(Runtime1::entry_for (Runtime1::slow_subtype_check_id));
+ CHECK_BAILOUT();
+ // Sets condition code 0 for match (2 otherwise).
+ __ branch_optimized(Assembler::bcondEqual, cont);
+
+ __ bind(slow);
+
+ address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+ if (copyfunc_addr != NULL) { // use stub if available
+ // Src is not a sub class of dst so we have to do a
+ // per-element check.
+
+ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+ if ((flags & mask) != mask) {
+ // Check that at least both of them object arrays.
+ assert(flags & mask, "one of the two should be known to be an object array");
+
+ if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+ __ load_klass(tmp, src);
+ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+ __ load_klass(tmp, dst);
+ }
+ Address klass_lh_addr(tmp, Klass::layout_helper_offset());
+ jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+ __ load_const_optimized(Z_R1_scratch, objArray_lh);
+ __ z_c(Z_R1_scratch, klass_lh_addr);
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+ }
+
+ // Save outgoing arguments in callee saved registers (C convention) in case
+ // a call to System.arraycopy is needed.
+ Register callee_saved_src = Z_R10;
+ Register callee_saved_src_pos = Z_R11;
+ Register callee_saved_dst = Z_R12;
+ Register callee_saved_dst_pos = Z_R13;
+ Register callee_saved_length = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved.
+
+ __ lgr_if_needed(callee_saved_src, src);
+ __ lgr_if_needed(callee_saved_src_pos, src_pos);
+ __ lgr_if_needed(callee_saved_dst, dst);
+ __ lgr_if_needed(callee_saved_dst_pos, dst_pos);
+ __ lgr_if_needed(callee_saved_length, length);
+
+ __ z_llgfr(length, length); // Higher 32bits must be null.
+
+ __ z_sllg(Z_ARG1, src_pos, shift_amount); // index -> byte offset
+ __ z_sllg(Z_ARG2, dst_pos, shift_amount); // index -> byte offset
+
+ __ z_la(Z_ARG1, Address(src, Z_ARG1, arrayOopDesc::base_offset_in_bytes(basic_type)));
+ assert_different_registers(Z_ARG1, dst, dst_pos, length);
+ __ z_la(Z_ARG2, Address(dst, Z_ARG2, arrayOopDesc::base_offset_in_bytes(basic_type)));
+ assert_different_registers(Z_ARG2, dst, length);
+
+ __ z_lgr(Z_ARG3, length);
+ assert_different_registers(Z_ARG3, dst);
+
+ __ load_klass(Z_ARG5, dst);
+ __ z_lg(Z_ARG5, Address(Z_ARG5, ObjArrayKlass::element_klass_offset()));
+ __ z_lg(Z_ARG4, Address(Z_ARG5, Klass::super_check_offset_offset()));
+ emit_call_c(copyfunc_addr);
+ CHECK_BAILOUT();
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ NearLabel failed;
+ __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, failed);
+ __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_cnt);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+ __ bind(failed);
+ }
+#endif
+
+ __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation());
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+ }
+#endif
+
+ __ z_lgr(tmp, Z_RET);
+ __ z_xilf(tmp, -1);
+
+ // Restore previously spilled arguments
+ __ lgr_if_needed(src, callee_saved_src);
+ __ lgr_if_needed(src_pos, callee_saved_src_pos);
+ __ lgr_if_needed(dst, callee_saved_dst);
+ __ lgr_if_needed(dst_pos, callee_saved_dst_pos);
+ __ lgr_if_needed(length, callee_saved_length);
+
+ __ z_sr(length, tmp);
+ __ z_ar(src_pos, tmp);
+ __ z_ar(dst_pos, tmp);
+ }
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+
+ __ bind(cont);
+ }
+ }
+
+#ifdef ASSERT
+ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+ // Sanity check the known type with the incoming class. For the
+ // primitive case the types must match exactly with src.klass and
+ // dst.klass each exactly matching the default type. For the
+ // object array case, if no type check is needed then either the
+ // dst type is exactly the expected type and the src type is a
+ // subtype which we can't check or src is the same array as dst
+ // but not necessarily exactly of type default_type.
+ NearLabel known_ok, halt;
+ metadata2reg(default_type->constant_encoding(), tmp);
+ if (UseCompressedClassPointers) {
+ __ encode_klass_not_null(tmp);
+ }
+
+ if (basic_type != T_OBJECT) {
+ if (UseCompressedClassPointers) { __ z_c (tmp, dst_klass_addr); }
+ else { __ z_cg(tmp, dst_klass_addr); }
+ __ branch_optimized(Assembler::bcondNotEqual, halt);
+ if (UseCompressedClassPointers) { __ z_c (tmp, src_klass_addr); }
+ else { __ z_cg(tmp, src_klass_addr); }
+ __ branch_optimized(Assembler::bcondEqual, known_ok);
+ } else {
+ if (UseCompressedClassPointers) { __ z_c (tmp, dst_klass_addr); }
+ else { __ z_cg(tmp, dst_klass_addr); }
+ __ branch_optimized(Assembler::bcondEqual, known_ok);
+ __ compareU64_and_branch(src, dst, Assembler::bcondEqual, known_ok);
+ }
+ __ bind(halt);
+ __ stop("incorrect type information in arraycopy");
+ __ bind(known_ok);
+ }
+#endif
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ __ load_const_optimized(Z_R1_scratch, Runtime1::arraycopy_count_address(basic_type));
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+ }
+#endif
+
+ __ z_sllg(tmp, src_pos, shift_amount); // index -> byte offset
+ __ z_sllg(Z_R1_scratch, dst_pos, shift_amount); // index -> byte offset
+
+ assert_different_registers(Z_ARG1, dst, dst_pos, length);
+ __ z_la(Z_ARG1, Address(src, tmp, arrayOopDesc::base_offset_in_bytes(basic_type)));
+ assert_different_registers(Z_ARG2, length);
+ __ z_la(Z_ARG2, Address(dst, Z_R1_scratch, arrayOopDesc::base_offset_in_bytes(basic_type)));
+ __ lgr_if_needed(Z_ARG3, length);
+
+ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+ const char *name;
+ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+ __ call_VM_leaf(entry);
+
+ __ bind(*stub->continuation());
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+ if (dest->is_single_cpu()) {
+ if (left->type() == T_OBJECT) {
+ switch (code) {
+ case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+ case lir_shr: __ z_srag (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+ case lir_ushr: __ z_srlg (dest->as_register(), left->as_register(), 0, count->as_register()); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts");
+ Register masked_count = Z_R1_scratch;
+ __ z_lr(masked_count, count->as_register());
+ __ z_nill(masked_count, 31);
+ switch (code) {
+ case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), 0, masked_count); break;
+ case lir_shr: __ z_sra (dest->as_register(), 0, masked_count); break;
+ case lir_ushr: __ z_srl (dest->as_register(), 0, masked_count); break;
+ default: ShouldNotReachHere();
+ }
+ }
+ } else {
+ switch (code) {
+ case lir_shl: __ z_sllg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+ case lir_shr: __ z_srag (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+ case lir_ushr: __ z_srlg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break;
+ default: ShouldNotReachHere();
+ }
+ }
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+ if (left->type() == T_OBJECT) {
+ count = count & 63; // Shouldn't shift by more than sizeof(intptr_t).
+ Register l = left->as_register();
+ Register d = dest->as_register_lo();
+ switch (code) {
+ case lir_shl: __ z_sllg (d, l, count); break;
+ case lir_shr: __ z_srag (d, l, count); break;
+ case lir_ushr: __ z_srlg (d, l, count); break;
+ default: ShouldNotReachHere();
+ }
+ return;
+ }
+ if (dest->is_single_cpu()) {
+ assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts");
+ count = count & 0x1F; // Java spec
+ switch (code) {
+ case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), count); break;
+ case lir_shr: __ z_sra (dest->as_register(), count); break;
+ case lir_ushr: __ z_srl (dest->as_register(), count); break;
+ default: ShouldNotReachHere();
+ }
+ } else if (dest->is_double_cpu()) {
+ count = count & 63; // Java spec
+ Register l = left->as_pointer_register();
+ Register d = dest->as_pointer_register();
+ switch (code) {
+ case lir_shl: __ z_sllg (d, l, count); break;
+ case lir_shr: __ z_srag (d, l, count); break;
+ case lir_ushr: __ z_srlg (d, l, count); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+ if (op->init_check()) {
+ // Make sure klass is initialized & doesn't have finalizer.
+ const int state_offset = in_bytes(InstanceKlass::init_state_offset());
+ Register iklass = op->klass()->as_register();
+ add_debug_info_for_null_check_here(op->stub()->info());
+ if (Immediate::is_uimm12(state_offset)) {
+ __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized);
+ } else {
+ __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized);
+ }
+ __ branch_optimized(Assembler::bcondNotEqual, *op->stub()->entry()); // Use long branch, because slow_case might be far.
+ }
+ __ allocate_object(op->obj()->as_register(),
+ op->tmp1()->as_register(),
+ op->tmp2()->as_register(),
+ op->header_size(),
+ op->object_size(),
+ op->klass()->as_register(),
+ *op->stub()->entry());
+ __ bind(*op->stub()->continuation());
+ __ verify_oop(op->obj()->as_register());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+ Register len = op->len()->as_register();
+ __ move_reg_if_needed(len, T_LONG, len, T_INT); // sign extend
+
+ if (UseSlowPath ||
+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+ __ z_brul(*op->stub()->entry());
+ } else {
+ __ allocate_array(op->obj()->as_register(),
+ op->len()->as_register(),
+ op->tmp1()->as_register(),
+ op->tmp2()->as_register(),
+ arrayOopDesc::header_size(op->type()),
+ type2aelembytes(op->type()),
+ op->klass()->as_register(),
+ *op->stub()->entry());
+ }
+ __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+ Register recv, Register tmp1, Label* update_done) {
+ uint i;
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ Label next_test;
+ // See if the receiver is receiver[n].
+ Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
+ __ z_cg(recv, receiver_addr);
+ __ z_brne(next_test);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+ __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+ __ branch_optimized(Assembler::bcondAlways, *update_done);
+ __ bind(next_test);
+ }
+
+ // Didn't find receiver; find next empty slot and fill it in.
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ Label next_test;
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
+ __ z_ltg(Z_R0_scratch, recv_addr);
+ __ z_brne(next_test);
+ __ z_stg(recv, recv_addr);
+ __ load_const_optimized(tmp1, DataLayout::counter_increment);
+ __ z_stg(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)), mdo);
+ __ branch_optimized(Assembler::bcondAlways, *update_done);
+ __ bind(next_test);
+ }
+}
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+ Unimplemented();
+}
+
+void LIR_Assembler::store_parameter(Register r, int param_num) {
+ assert(param_num >= 0, "invalid num");
+ int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame;
+ assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+ __ z_stg(r, offset_in_bytes, Z_SP);
+}
+
+void LIR_Assembler::store_parameter(jint c, int param_num) {
+ assert(param_num >= 0, "invalid num");
+ int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame;
+ assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+ __ store_const(Address(Z_SP, offset_in_bytes), c, Z_R1_scratch, true);
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+ // We always need a stub for the failure case.
+ CodeStub* stub = op->stub();
+ Register obj = op->object()->as_register();
+ Register k_RInfo = op->tmp1()->as_register();
+ Register klass_RInfo = op->tmp2()->as_register();
+ Register dst = op->result_opr()->as_register();
+ Register Rtmp1 = Z_R1_scratch;
+ ciKlass* k = op->klass();
+
+ assert(!op->tmp3()->is_valid(), "tmp3's not needed");
+
+ // Check if it needs to be profiled.
+ ciMethodData* md = NULL;
+ ciProfileData* data = NULL;
+
+ if (op->should_profile()) {
+ ciMethod* method = op->profiled_method();
+ assert(method != NULL, "Should have method");
+ int bci = op->profiled_bci();
+ md = method->method_data_or_null();
+ assert(md != NULL, "Sanity");
+ data = md->bci_to_data(bci);
+ assert(data != NULL, "need data for type check");
+ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+ }
+
+ // Temp operands do not overlap with inputs, if this is their last
+ // use (end of range is exclusive), so a register conflict is possible.
+ if (obj == k_RInfo) {
+ k_RInfo = dst;
+ } else if (obj == klass_RInfo) {
+ klass_RInfo = dst;
+ }
+ assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+ if (op->should_profile()) {
+ NearLabel not_null;
+ __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondNotEqual, not_null);
+ // Object is null; update MDO and exit.
+ Register mdo = klass_RInfo;
+ metadata2reg(md->constant_encoding(), mdo);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+ int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+ __ or2mem_8(data_addr, header_bits);
+ __ branch_optimized(Assembler::bcondAlways, *obj_is_null);
+ __ bind(not_null);
+ } else {
+ __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondEqual, *obj_is_null);
+ }
+
+ NearLabel profile_cast_failure, profile_cast_success;
+ Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+ Label *success_target = op->should_profile() ? &profile_cast_success : success;
+
+ // Patching may screw with our temporaries on sparc,
+ // so let's do it before loading the class.
+ if (k->is_loaded()) {
+ metadata2reg(k->constant_encoding(), k_RInfo);
+ } else {
+ klass2reg_with_patching(k_RInfo, op->info_for_patch());
+ }
+ assert(obj != k_RInfo, "must be different");
+
+ __ verify_oop(obj);
+
+ // Get object class.
+ // Not a safepoint as obj null check happens earlier.
+ if (op->fast_check()) {
+ if (UseCompressedClassPointers) {
+ __ load_klass(klass_RInfo, obj);
+ __ compareU64_and_branch(k_RInfo, klass_RInfo, Assembler::bcondNotEqual, *failure_target);
+ } else {
+ __ z_cg(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
+ __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+ }
+ // Successful cast, fall through to profile or jump.
+ } else {
+ bool need_slow_path = !k->is_loaded() ||
+ ((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset()));
+ intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L;
+ __ load_klass(klass_RInfo, obj);
+ // Perform the fast part of the checking logic.
+ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1,
+ (need_slow_path ? success_target : NULL),
+ failure_target, NULL,
+ RegisterOrConstant(super_check_offset));
+ if (need_slow_path) {
+ // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+ address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+ store_parameter(klass_RInfo, 0); // sub
+ store_parameter(k_RInfo, 1); // super
+ emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
+ CHECK_BAILOUT();
+ __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+ // Fall through to success case.
+ }
+ }
+
+ if (op->should_profile()) {
+ Register mdo = klass_RInfo, recv = k_RInfo;
+ assert_different_registers(obj, mdo, recv);
+ __ bind(profile_cast_success);
+ metadata2reg(md->constant_encoding(), mdo);
+ __ load_klass(recv, obj);
+ type_profile_helper(mdo, md, data, recv, Rtmp1, success);
+ __ branch_optimized(Assembler::bcondAlways, *success);
+
+ __ bind(profile_cast_failure);
+ metadata2reg(md->constant_encoding(), mdo);
+ __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1);
+ __ branch_optimized(Assembler::bcondAlways, *failure);
+ } else {
+ __ branch_optimized(Assembler::bcondAlways, *success);
+ }
+}
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+ LIR_Code code = op->code();
+ if (code == lir_store_check) {
+ Register value = op->object()->as_register();
+ Register array = op->array()->as_register();
+ Register k_RInfo = op->tmp1()->as_register();
+ Register klass_RInfo = op->tmp2()->as_register();
+ Register Rtmp1 = Z_R1_scratch;
+
+ CodeStub* stub = op->stub();
+
+ // Check if it needs to be profiled.
+ ciMethodData* md = NULL;
+ ciProfileData* data = NULL;
+
+ assert_different_registers(value, k_RInfo, klass_RInfo);
+
+ if (op->should_profile()) {
+ ciMethod* method = op->profiled_method();
+ assert(method != NULL, "Should have method");
+ int bci = op->profiled_bci();
+ md = method->method_data_or_null();
+ assert(md != NULL, "Sanity");
+ data = md->bci_to_data(bci);
+ assert(data != NULL, "need data for type check");
+ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+ }
+ NearLabel profile_cast_success, profile_cast_failure, done;
+ Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+ Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+ if (op->should_profile()) {
+ NearLabel not_null;
+ __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondNotEqual, not_null);
+ // Object is null; update MDO and exit.
+ Register mdo = klass_RInfo;
+ metadata2reg(md->constant_encoding(), mdo);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+ int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+ __ or2mem_8(data_addr, header_bits);
+ __ branch_optimized(Assembler::bcondAlways, done);
+ __ bind(not_null);
+ } else {
+ __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondEqual, done);
+ }
+
+ add_debug_info_for_null_check_here(op->info_for_exception());
+ __ load_klass(k_RInfo, array);
+ __ load_klass(klass_RInfo, value);
+
+ // Get instance klass (it's already uncompressed).
+ __ z_lg(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+ // Perform the fast part of the checking logic.
+ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+ // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+ address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id);
+ store_parameter(klass_RInfo, 0); // sub
+ store_parameter(k_RInfo, 1); // super
+ emit_call_c(a); // Sets condition code 0 for match (2 otherwise).
+ CHECK_BAILOUT();
+ __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
+ // Fall through to success case.
+
+ if (op->should_profile()) {
+ Register mdo = klass_RInfo, recv = k_RInfo;
+ assert_different_registers(value, mdo, recv);
+ __ bind(profile_cast_success);
+ metadata2reg(md->constant_encoding(), mdo);
+ __ load_klass(recv, value);
+ type_profile_helper(mdo, md, data, recv, Rtmp1, &done);
+ __ branch_optimized(Assembler::bcondAlways, done);
+
+ __ bind(profile_cast_failure);
+ metadata2reg(md->constant_encoding(), mdo);
+ __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1);
+ __ branch_optimized(Assembler::bcondAlways, *stub->entry());
+ }
+
+ __ bind(done);
+ } else {
+ if (code == lir_checkcast) {
+ Register obj = op->object()->as_register();
+ Register dst = op->result_opr()->as_register();
+ NearLabel success;
+ emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+ __ bind(success);
+ __ lgr_if_needed(dst, obj);
+ } else {
+ if (code == lir_instanceof) {
+ Register obj = op->object()->as_register();
+ Register dst = op->result_opr()->as_register();
+ NearLabel success, failure, done;
+ emit_typecheck_helper(op, &success, &failure, &failure);
+ __ bind(failure);
+ __ clear_reg(dst);
+ __ branch_optimized(Assembler::bcondAlways, done);
+ __ bind(success);
+ __ load_const_optimized(dst, 1);
+ __ bind(done);
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+ }
+}
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+ Register addr = op->addr()->as_pointer_register();
+ Register t1_cmp = Z_R1_scratch;
+ if (op->code() == lir_cas_long) {
+ assert(VM_Version::supports_cx8(), "wrong machine");
+ Register cmp_value_lo = op->cmp_value()->as_register_lo();
+ Register new_value_lo = op->new_value()->as_register_lo();
+ __ z_lgr(t1_cmp, cmp_value_lo);
+ // Perform the compare and swap operation.
+ __ z_csg(t1_cmp, new_value_lo, 0, addr);
+ } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+ Register cmp_value = op->cmp_value()->as_register();
+ Register new_value = op->new_value()->as_register();
+ if (op->code() == lir_cas_obj) {
+ if (UseCompressedOops) {
+ t1_cmp = op->tmp1()->as_register();
+ Register t2_new = op->tmp2()->as_register();
+ assert_different_registers(cmp_value, new_value, addr, t1_cmp, t2_new);
+ __ oop_encoder(t1_cmp, cmp_value, true /*maybe null*/);
+ __ oop_encoder(t2_new, new_value, true /*maybe null*/);
+ __ z_cs(t1_cmp, t2_new, 0, addr);
+ } else {
+ __ z_lgr(t1_cmp, cmp_value);
+ __ z_csg(t1_cmp, new_value, 0, addr);
+ }
+ } else {
+ __ z_lr(t1_cmp, cmp_value);
+ __ z_cs(t1_cmp, new_value, 0, addr);
+ }
+ } else {
+ ShouldNotReachHere(); // new lir_cas_??
+ }
+}
+
+void LIR_Assembler::set_24bit_FPU() {
+ ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::reset_FPU() {
+ ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::breakpoint() {
+ Unimplemented();
+ // __ breakpoint_trap();
+}
+
+void LIR_Assembler::push(LIR_Opr opr) {
+ ShouldNotCallThis(); // unused
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+ ShouldNotCallThis(); // unused
+}
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
+ Address addr = frame_map()->address_for_monitor_lock(monitor_no);
+ __ add2reg(dst_opr->as_register(), addr.disp(), addr.base());
+}
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+ Register obj = op->obj_opr()->as_register(); // May not be an oop.
+ Register hdr = op->hdr_opr()->as_register();
+ Register lock = op->lock_opr()->as_register();
+ if (!UseFastLocking) {
+ __ branch_optimized(Assembler::bcondAlways, *op->stub()->entry());
+ } else if (op->code() == lir_lock) {
+ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+ // Add debug info for NullPointerException only if one is possible.
+ if (op->info() != NULL) {
+ add_debug_info_for_null_check_here(op->info());
+ }
+ __ lock_object(hdr, obj, lock, *op->stub()->entry());
+ // done
+ } else if (op->code() == lir_unlock) {
+ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+ __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+ } else {
+ ShouldNotReachHere();
+ }
+ __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+ ciMethod* method = op->profiled_method();
+ int bci = op->profiled_bci();
+ ciMethod* callee = op->profiled_callee();
+
+ // Update counter for all call types.
+ ciMethodData* md = method->method_data_or_null();
+ assert(md != NULL, "Sanity");
+ ciProfileData* data = md->bci_to_data(bci);
+ assert(data->is_CounterData(), "need CounterData for calls");
+ assert(op->mdo()->is_single_cpu(), "mdo must be allocated");
+ Register mdo = op->mdo()->as_register();
+ assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
+ Register tmp1 = op->tmp1()->as_register_lo();
+ metadata2reg(md->constant_encoding(), mdo);
+
+ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+ Bytecodes::Code bc = method->java_code_at_bci(bci);
+ const bool callee_is_static = callee->is_loaded() && callee->is_static();
+ // Perform additional virtual call profiling for invokevirtual and
+ // invokeinterface bytecodes.
+ if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+ !callee_is_static && // Required for optimized MH invokes.
+ C1ProfileVirtualCalls) {
+ assert(op->recv()->is_single_cpu(), "recv must be allocated");
+ Register recv = op->recv()->as_register();
+ assert_different_registers(mdo, tmp1, recv);
+ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+ ciKlass* known_klass = op->known_holder();
+ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+ // We know the type that will be seen at this call site; we can
+ // statically update the MethodData* rather than needing to do
+ // dynamic tests on the receiver type.
+
+ // NOTE: we should probably put a lock around this search to
+ // avoid collisions by concurrent compilations.
+ ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+ uint i;
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ ciKlass* receiver = vc_data->receiver(i);
+ if (known_klass->equals(receiver)) {
+ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+ __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+ return;
+ }
+ }
+
+ // Receiver type not found in profile data. Select an empty slot.
+
+ // Note that this is less efficient than it should be because it
+ // always does a write to the receiver part of the
+ // VirtualCallData rather than just the first time.
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ ciKlass* receiver = vc_data->receiver(i);
+ if (receiver == NULL) {
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+ metadata2reg(known_klass->constant_encoding(), tmp1);
+ __ z_stg(tmp1, recv_addr);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+ __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1);
+ return;
+ }
+ }
+ } else {
+ __ load_klass(recv, recv);
+ NearLabel update_done;
+ type_profile_helper(mdo, md, data, recv, tmp1, &update_done);
+ // Receiver did not match any saved receiver and there is no empty row for it.
+ // Increment total counter to indicate polymorphic case.
+ __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1);
+ __ bind(update_done);
+ }
+ } else {
+ // static call
+ __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1);
+ }
+}
+
+void LIR_Assembler::align_backward_branch_target() {
+ __ align(OptoLoopAlignment);
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay* op) {
+ ShouldNotCallThis(); // There are no delay slots on ZARCH_64.
+}
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+ assert(left->is_register(), "can only handle registers");
+
+ if (left->is_single_cpu()) {
+ __ z_lcr(dest->as_register(), left->as_register());
+ } else if (left->is_single_fpu()) {
+ __ z_lcebr(dest->as_float_reg(), left->as_float_reg());
+ } else if (left->is_double_fpu()) {
+ __ z_lcdbr(dest->as_double_reg(), left->as_double_reg());
+ } else {
+ assert(left->is_double_cpu(), "Must be a long");
+ __ z_lcgr(dest->as_register_lo(), left->as_register_lo());
+ }
+}
+
+void LIR_Assembler::fxch(int i) {
+ ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::fld(int i) {
+ ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::ffree(int i) {
+ ShouldNotCallThis(); // x86 only
+}
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest,
+ const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+ assert(!tmp->is_valid(), "don't need temporary");
+ emit_call_c(dest);
+ CHECK_BAILOUT();
+ if (info != NULL) {
+ add_call_info_here(info);
+ }
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+ ShouldNotCallThis(); // not needed on ZARCH_64
+}
+
+void LIR_Assembler::membar() {
+ __ z_fence();
+}
+
+void LIR_Assembler::membar_acquire() {
+ __ z_acquire();
+}
+
+void LIR_Assembler::membar_release() {
+ __ z_release();
+}
+
+void LIR_Assembler::membar_loadload() {
+ __ z_acquire();
+}
+
+void LIR_Assembler::membar_storestore() {
+ __ z_release();
+}
+
+void LIR_Assembler::membar_loadstore() {
+ __ z_acquire();
+}
+
+void LIR_Assembler::membar_storeload() {
+ __ z_fence();
+}
+
+void LIR_Assembler::on_spin_wait() {
+ Unimplemented();
+}
+
+void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+ LIR_Address* addr = addr_opr->as_address_ptr();
+ assert(addr->scale() == LIR_Address::times_1, "scaling unsupported");
+ __ load_address(dest->as_pointer_register(), as_Address(addr));
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+ ShouldNotCallThis(); // unused
+}
+
+#ifdef ASSERT
+// Emit run-time assertion.
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+ Unimplemented();
+}
+#endif
+
+void LIR_Assembler::peephole(LIR_List*) {
+ // Do nothing for now.
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+ assert(code == lir_xadd, "lir_xchg not supported");
+ Address src_addr = as_Address(src->as_address_ptr());
+ Register base = src_addr.base();
+ intptr_t disp = src_addr.disp();
+ if (src_addr.index()->is_valid()) {
+ // LAA and LAAG do not support index register.
+ __ load_address(Z_R1_scratch, src_addr);
+ base = Z_R1_scratch;
+ disp = 0;
+ }
+ if (data->type() == T_INT) {
+ __ z_laa(dest->as_register(), data->as_register(), disp, base);
+ } else if (data->type() == T_LONG) {
+ assert(data->as_register_lo() == data->as_register_hi(), "should be a single register");
+ __ z_laag(dest->as_register_lo(), data->as_register_lo(), disp, base);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+ Register obj = op->obj()->as_register();
+ Register tmp1 = op->tmp()->as_pointer_register();
+ Register tmp2 = Z_R1_scratch;
+ Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+ ciKlass* exact_klass = op->exact_klass();
+ intptr_t current_klass = op->current_klass();
+ bool not_null = op->not_null();
+ bool no_conflict = op->no_conflict();
+
+ Label update, next, none, null_seen, init_klass;
+
+ bool do_null = !not_null;
+ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+ assert(do_null || do_update, "why are we here?");
+ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+ __ verify_oop(obj);
+
+ if (do_null || tmp1 != obj DEBUG_ONLY(|| true)) {
+ __ z_ltgr(tmp1, obj);
+ }
+ if (do_null) {
+ __ z_brnz(update);
+ if (!TypeEntries::was_null_seen(current_klass)) {
+ __ z_lg(tmp1, mdo_addr);
+ __ z_oill(tmp1, TypeEntries::null_seen);
+ __ z_stg(tmp1, mdo_addr);
+ }
+ if (do_update) {
+ __ z_bru(next);
+ }
+ } else {
+ __ asm_assert_ne("unexpect null obj", __LINE__);
+ }
+
+ __ bind(update);
+
+ if (do_update) {
+#ifdef ASSERT
+ if (exact_klass != NULL) {
+ __ load_klass(tmp1, tmp1);
+ metadata2reg(exact_klass->constant_encoding(), tmp2);
+ __ z_cgr(tmp1, tmp2);
+ __ asm_assert_eq("exact klass and actual klass differ", __LINE__);
+ }
+#endif
+
+ Label do_update;
+ __ z_lg(tmp2, mdo_addr);
+
+ if (!no_conflict) {
+ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+ if (exact_klass != NULL) {
+ metadata2reg(exact_klass->constant_encoding(), tmp1);
+ } else {
+ __ load_klass(tmp1, tmp1);
+ }
+
+ // Klass seen before: nothing to do (regardless of unknown bit).
+ __ z_lgr(Z_R0_scratch, tmp2);
+ assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+ __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF);
+ __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next);
+
+ // Already unknown: Nothing to do anymore.
+ __ z_tmll(tmp2, TypeEntries::type_unknown);
+ __ z_brc(Assembler::bcondAllOne, next);
+
+ if (TypeEntries::is_type_none(current_klass)) {
+ __ z_lgr(Z_R0_scratch, tmp2);
+ assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+ __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF);
+ __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, init_klass);
+ }
+ } else {
+ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+ // Already unknown: Nothing to do anymore.
+ __ z_tmll(tmp2, TypeEntries::type_unknown);
+ __ z_brc(Assembler::bcondAllOne, next);
+ }
+
+ // Different than before. Cannot keep accurate profile.
+ __ z_oill(tmp2, TypeEntries::type_unknown);
+ __ z_bru(do_update);
+ } else {
+ // There's a single possible klass at this profile point.
+ assert(exact_klass != NULL, "should be");
+ if (TypeEntries::is_type_none(current_klass)) {
+ metadata2reg(exact_klass->constant_encoding(), tmp1);
+ __ z_lgr(Z_R0_scratch, tmp2);
+ assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+ __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF);
+ __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next);
+#ifdef ASSERT
+ {
+ Label ok;
+ __ z_lgr(Z_R0_scratch, tmp2);
+ assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+ __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF);
+ __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, ok);
+ __ stop("unexpected profiling mismatch");
+ __ bind(ok);
+ }
+#endif
+
+ } else {
+ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+ // Already unknown: Nothing to do anymore.
+ __ z_tmll(tmp2, TypeEntries::type_unknown);
+ __ z_brc(Assembler::bcondAllOne, next);
+ __ z_oill(tmp2, TypeEntries::type_unknown);
+ __ z_bru(do_update);
+ }
+ }
+
+ __ bind(init_klass);
+ // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+ __ z_ogr(tmp2, tmp1);
+
+ __ bind(do_update);
+ __ z_stg(tmp2, mdo_addr);
+
+ __ bind(next);
+ }
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+ assert(op->crc()->is_single_cpu(), "crc must be register");
+ assert(op->val()->is_single_cpu(), "byte value must be register");
+ assert(op->result_opr()->is_single_cpu(), "result must be register");
+ Register crc = op->crc()->as_register();
+ Register val = op->val()->as_register();
+ Register res = op->result_opr()->as_register();
+
+ assert_different_registers(val, crc, res);
+
+ __ load_const_optimized(res, StubRoutines::crc_table_addr());
+ __ not_(crc, noreg, false); // ~crc
+ __ update_byte_crc32(crc, val, res);
+ __ not_(res, crc, false); // ~crc
+}
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
+#define CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
+
+ private:
+
+ // Record the type of the receiver in ReceiverTypeData.
+ void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+ Register recv, Register tmp1, Label* update_done);
+ // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+ void setup_md_access(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+ public:
+ address emit_call_c(address a);
+
+ void store_parameter(Register r, int param_num);
+ void store_parameter(jint c, int param_num);
+
+ void check_reserved_argument_area(int bytes) {
+ assert(bytes + FrameMap::first_available_sp_in_frame <= frame_map()->reserved_argument_area_size(),
+ "reserved_argument_area too small");
+ }
+
+ enum {
+ call_stub_size = 512, // See Compile::MAX_stubs_size and CompiledStaticCall::emit_to_interp_stub.
+ exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(128),
+ deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(64)
+ };
+
+#endif // CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1246 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_s390.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+void LIRItem::load_byte_item() {
+ // Byte loads use same registers as other loads.
+ load_item();
+}
+
+void LIRItem::load_nonconstant(int bits) {
+ LIR_Opr r = value()->operand();
+ if (_gen->can_inline_as_constant(value(), bits)) {
+ if (!r->is_constant()) {
+ r = LIR_OprFact::value_type(value()->type());
+ }
+ _result = r;
+ } else {
+ load_item();
+ }
+}
+
+inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
+ LIR_Opr r = li.value()->operand();
+ if (r->is_constant()) {
+ // Constants get loaded with sign extend on this platform.
+ ll->move(li.result(), dst);
+ } else {
+ if (!r->is_register()) {
+ li.load_item_force(dst);
+ }
+ LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
+ ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
+ }
+}
+
+//--------------------------------------------------------------
+// LIRGenerator
+//--------------------------------------------------------------
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::as_oop_opr(Z_EXC_OOP); }
+LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::as_opr(Z_EXC_PC); }
+LIR_Opr LIRGenerator::divInOpr() { return FrameMap::Z_R11_opr; }
+LIR_Opr LIRGenerator::divOutOpr() { return FrameMap::Z_R11_opr; }
+LIR_Opr LIRGenerator::remOutOpr() { return FrameMap::Z_R10_opr; }
+LIR_Opr LIRGenerator::ldivInOpr() { return FrameMap::Z_R11_long_opr; }
+LIR_Opr LIRGenerator::ldivOutOpr() { return FrameMap::Z_R11_long_opr; }
+LIR_Opr LIRGenerator::lremOutOpr() { return FrameMap::Z_R10_long_opr; }
+LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); }
+LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::Z_R13_opr; }
+LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; }
+
+LIR_Opr LIRGenerator::result_register_for (ValueType* type, bool callee) {
+ LIR_Opr opr;
+ switch (type->tag()) {
+ case intTag: opr = FrameMap::Z_R2_opr; break;
+ case objectTag: opr = FrameMap::Z_R2_oop_opr; break;
+ case longTag: opr = FrameMap::Z_R2_long_opr; break;
+ case floatTag: opr = FrameMap::Z_F0_opr; break;
+ case doubleTag: opr = FrameMap::Z_F0_double_opr; break;
+
+ case addressTag:
+ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+ }
+
+ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+ return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+ return new_register(T_INT);
+}
+
+//--------- Loading items into registers. --------------------------------
+
+// z/Architecture cannot inline all constants.
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+ if (v->type()->as_IntConstant() != NULL) {
+ return Immediate::is_simm16(v->type()->as_IntConstant()->value());
+ } else if (v->type()->as_LongConstant() != NULL) {
+ return Immediate::is_simm16(v->type()->as_LongConstant()->value());
+ } else if (v->type()->as_ObjectConstant() != NULL) {
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
+ } else {
+ return false;
+ }
+}
+
+bool LIRGenerator::can_inline_as_constant(Value i, int bits) const {
+ if (i->type()->as_IntConstant() != NULL) {
+ return Assembler::is_simm(i->type()->as_IntConstant()->value(), bits);
+ } else if (i->type()->as_LongConstant() != NULL) {
+ return Assembler::is_simm(i->type()->as_LongConstant()->value(), bits);
+ } else {
+ return can_store_as_constant(i, as_BasicType(i->type()));
+ }
+}
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+ if (c->type() == T_INT) {
+ return Immediate::is_simm20(c->as_jint());
+ } else if (c->type() == T_LONG) {
+ return Immediate::is_simm20(c->as_jlong());
+ }
+ return false;
+}
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+ return new_register(longType);
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+ int shift, int disp, BasicType type) {
+ assert(base->is_register(), "must be");
+ if (index->is_constant()) {
+ intptr_t large_disp = ((intx)(index->as_constant_ptr()->as_jint()) << shift) + disp;
+ if (Displacement::is_validDisp(large_disp)) {
+ return new LIR_Address(base, large_disp, type);
+ }
+ // Index is illegal so replace it with the displacement loaded into a register.
+ index = new_pointer_register();
+ __ move(LIR_OprFact::intptrConst(large_disp), index);
+ return new LIR_Address(base, index, type);
+ } else {
+ if (shift > 0) {
+ LIR_Opr tmp = new_pointer_register();
+ __ shift_left(index, shift, tmp);
+ index = tmp;
+ }
+ return new LIR_Address(base, index, disp, type);
+ }
+}
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+ BasicType type, bool needs_card_mark) {
+ int elem_size = type2aelembytes(type);
+ int shift = exact_log2(elem_size);
+ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+
+ LIR_Address* addr;
+ if (index_opr->is_constant()) {
+ addr = new LIR_Address(array_opr,
+ offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
+ } else {
+ if (index_opr->type() == T_INT) {
+ LIR_Opr tmp = new_register(T_LONG);
+ __ convert(Bytecodes::_i2l, index_opr, tmp);
+ index_opr = tmp;
+ }
+ if (shift > 0) {
+ __ shift_left(index_opr, shift, index_opr);
+ }
+ addr = new LIR_Address(array_opr,
+ index_opr,
+ offset_in_bytes, type);
+ }
+ if (needs_card_mark) {
+ // This store will need a precise card mark, so go ahead and
+ // compute the full adddres instead of computing once for the
+ // store and again for the card mark.
+ LIR_Opr tmp = new_pointer_register();
+ __ leal(LIR_OprFact::address(addr), tmp);
+ return new LIR_Address(tmp, type);
+ } else {
+ return addr;
+ }
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+ LIR_Opr r = LIR_OprFact::illegalOpr;
+ if (type == T_LONG) {
+ r = LIR_OprFact::longConst(x);
+ } else if (type == T_INT) {
+ r = LIR_OprFact::intConst(x);
+ } else {
+ ShouldNotReachHere();
+ }
+ return r;
+}
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+ LIR_Opr pointer = new_pointer_register();
+ __ move(LIR_OprFact::intptrConst(counter), pointer);
+ LIR_Address* addr = new LIR_Address(pointer, type);
+ increment_counter(addr, step);
+}
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+ __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+}
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+ LIR_Opr scratch = FrameMap::Z_R1_opr;
+ __ load(new LIR_Address(base, disp, T_INT), scratch, info);
+ __ cmp(condition, scratch, c);
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+ __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+ __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+ if (tmp->is_valid()) {
+ if (is_power_of_2(c + 1)) {
+ __ move(left, tmp);
+ __ shift_left(left, log2_intptr(c + 1), left);
+ __ sub(left, tmp, result);
+ return true;
+ } else if (is_power_of_2(c - 1)) {
+ __ move(left, tmp);
+ __ shift_left(left, log2_intptr(c - 1), left);
+ __ add(left, tmp, result);
+ return true;
+ }
+ }
+ return false;
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+ BasicType type = item->type();
+ __ store(item, new LIR_Address(FrameMap::Z_SP_opr, in_bytes(offset_from_sp), type));
+}
+
+//----------------------------------------------------------------------
+// visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+ assert(x->is_pinned(),"");
+ bool needs_range_check = x->compute_needs_range_check();
+ bool use_length = x->length() != NULL;
+ bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+ bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+ !get_jobject_constant(x->value())->is_null_object() ||
+ x->should_profile());
+
+ LIRItem array(x->array(), this);
+ LIRItem index(x->index(), this);
+ LIRItem value(x->value(), this);
+ LIRItem length(this);
+
+ array.load_item();
+ index.load_nonconstant(20);
+
+ if (use_length && needs_range_check) {
+ length.set_instruction(x->length());
+ length.load_item();
+ }
+ if (needs_store_check) {
+ value.load_item();
+ } else {
+ value.load_for_store(x->elt_type());
+ }
+
+ set_no_result(x);
+
+ // The CodeEmitInfo must be duplicated for each different
+ // LIR-instruction because spilling can occur anywhere between two
+ // instructions and so the debug information must be different.
+ CodeEmitInfo* range_check_info = state_for (x);
+ CodeEmitInfo* null_check_info = NULL;
+ if (x->needs_null_check()) {
+ null_check_info = new CodeEmitInfo(range_check_info);
+ }
+
+ // Emit array address setup early so it schedules better.
+ LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+ if (value.result()->is_constant() && array_addr->index()->is_valid()) {
+ // Constants cannot be stored with index register on ZARCH_64 (see LIR_Assembler::const2mem()).
+ LIR_Opr tmp = new_pointer_register();
+ __ leal(LIR_OprFact::address(array_addr), tmp);
+ array_addr = new LIR_Address(tmp, x->elt_type());
+ }
+
+ if (GenerateRangeChecks && needs_range_check) {
+ if (use_length) {
+ __ cmp(lir_cond_belowEqual, length.result(), index.result());
+ __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+ } else {
+ array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+ // Range_check also does the null check.
+ null_check_info = NULL;
+ }
+ }
+
+ if (GenerateArrayStoreCheck && needs_store_check) {
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+
+ CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+ __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
+ }
+
+ if (obj_store) {
+ // Needs GC write barriers.
+ pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ __ move(value.result(), array_addr, null_check_info);
+ // Seems to be a precise.
+ post_barrier(LIR_OprFact::address(array_addr), value.result());
+ } else {
+ __ move(value.result(), array_addr, null_check_info);
+ }
+}
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+ assert(x->is_pinned(),"");
+ LIRItem obj(x->obj(), this);
+ obj.load_item();
+
+ set_no_result(x);
+
+ // "lock" stores the address of the monitor stack slot, so this is not an oop.
+ LIR_Opr lock = new_register(T_INT);
+
+ CodeEmitInfo* info_for_exception = NULL;
+ if (x->needs_null_check()) {
+ info_for_exception = state_for (x);
+ }
+ // This CodeEmitInfo must not have the xhandlers because here the
+ // object is already locked (xhandlers expect object to be unlocked).
+ CodeEmitInfo* info = state_for (x, x->state(), true);
+ monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
+ x->monitor_no(), info_for_exception, info);
+}
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+ assert(x->is_pinned(),"");
+
+ LIRItem obj(x->obj(), this);
+ obj.dont_load_item();
+
+ LIR_Opr lock = new_register(T_INT);
+ LIR_Opr obj_temp = new_register(T_INT);
+ set_no_result(x);
+ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+ LIRItem value(x->x(), this);
+ value.load_item();
+ LIR_Opr reg = rlock_result(x);
+ __ negate(value.result(), reg);
+}
+
+// for _fadd, _fmul, _fsub, _fdiv, _frem
+// _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ LIRItem* left_arg = &left;
+ LIRItem* right_arg = &right;
+ assert(!left.is_stack(), "can't both be memory operands");
+ left.load_item();
+
+ if (right.is_register() || right.is_constant()) {
+ right.load_item();
+ } else {
+ right.dont_load_item();
+ }
+
+ if ((x->op() == Bytecodes::_frem) || (x->op() == Bytecodes::_drem)) {
+ address entry;
+ switch (x->op()) {
+ case Bytecodes::_frem:
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+ break;
+ case Bytecodes::_drem:
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL);
+ set_result(x, result);
+ } else {
+ LIR_Opr reg = rlock(x);
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp(), tmp);
+ set_result(x, reg);
+ }
+}
+
+// for _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+ // Use shifts if divisior is a power of 2 otherwise use DSGR instruction.
+ // Instruction: DSGR R1, R2
+ // input : R1+1: dividend (R1, R1+1 designate a register pair, R1 must be even)
+ // R2: divisor
+ //
+ // output: R1+1: quotient
+ // R1: remainder
+ //
+ // Register selection: R1: Z_R10
+ // R1+1: Z_R11
+ // R2: to be chosen by register allocator (linear scan)
+
+ // R1, and R1+1 will be destroyed.
+
+ LIRItem right(x->y(), this);
+ LIRItem left(x->x() , this); // Visit left second, so that the is_register test is valid.
+
+ // Call state_for before load_item_force because state_for may
+ // force the evaluation of other instructions that are needed for
+ // correct debug info. Otherwise the live range of the fix
+ // register might be too long.
+ CodeEmitInfo* info = state_for (x);
+
+ LIR_Opr result = rlock_result(x);
+ LIR_Opr result_reg = result;
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ LIR_Opr divisor_opr = right.result();
+ if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jlong())) {
+ left.load_item();
+ right.dont_load_item();
+ } else {
+ left.load_item_force(ldivInOpr());
+ right.load_item();
+
+ // DSGR instruction needs register pair.
+ if (x->op() == Bytecodes::_ldiv) {
+ result_reg = ldivOutOpr();
+ tmp = lremOutOpr();
+ } else {
+ result_reg = lremOutOpr();
+ tmp = ldivOutOpr();
+ }
+ }
+
+ if (!ImplicitDiv0Checks) {
+ __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+ // Idiv/irem cannot trap (passing info would generate an assertion).
+ info = NULL;
+ }
+
+ if (x->op() == Bytecodes::_lrem) {
+ __ irem(left.result(), right.result(), result_reg, tmp, info);
+ } else if (x->op() == Bytecodes::_ldiv) {
+ __ idiv(left.result(), right.result(), result_reg, tmp, info);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ if (result_reg != result) {
+ __ move(result_reg, result);
+ }
+ } else {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+
+ left.load_item();
+ right.load_nonconstant(32);
+ rlock_result(x);
+ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+ }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+ // Use shifts if divisior is a power of 2 otherwise use DSGFR instruction.
+ // Instruction: DSGFR R1, R2
+ // input : R1+1: dividend (R1, R1+1 designate a register pair, R1 must be even)
+ // R2: divisor
+ //
+ // output: R1+1: quotient
+ // R1: remainder
+ //
+ // Register selection: R1: Z_R10
+ // R1+1: Z_R11
+ // R2: To be chosen by register allocator (linear scan).
+
+ // R1, and R1+1 will be destroyed.
+
+ LIRItem right(x->y(), this);
+ LIRItem left(x->x() , this); // Visit left second, so that the is_register test is valid.
+
+ // Call state_for before load_item_force because state_for may
+ // force the evaluation of other instructions that are needed for
+ // correct debug info. Otherwise the live range of the fix
+ // register might be too long.
+ CodeEmitInfo* info = state_for (x);
+
+ LIR_Opr result = rlock_result(x);
+ LIR_Opr result_reg = result;
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ LIR_Opr divisor_opr = right.result();
+ if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jint())) {
+ left.load_item();
+ right.dont_load_item();
+ } else {
+ left.load_item_force(divInOpr());
+ right.load_item();
+
+ // DSGFR instruction needs register pair.
+ if (x->op() == Bytecodes::_idiv) {
+ result_reg = divOutOpr();
+ tmp = remOutOpr();
+ } else {
+ result_reg = remOutOpr();
+ tmp = divOutOpr();
+ }
+ }
+
+ if (!ImplicitDiv0Checks) {
+ __ cmp(lir_cond_equal, right.result(), LIR_OprFact::intConst(0));
+ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+ // Idiv/irem cannot trap (passing info would generate an assertion).
+ info = NULL;
+ }
+
+ if (x->op() == Bytecodes::_irem) {
+ __ irem(left.result(), right.result(), result_reg, tmp, info);
+ } else if (x->op() == Bytecodes::_idiv) {
+ __ idiv(left.result(), right.result(), result_reg, tmp, info);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ if (result_reg != result) {
+ __ move(result_reg, result);
+ }
+ } else {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ LIRItem* left_arg = &left;
+ LIRItem* right_arg = &right;
+ if (x->is_commutative() && left.is_stack() && right.is_register()) {
+ // swap them if left is real stack (or cached) and right is real register(not cached)
+ left_arg = &right;
+ right_arg = &left;
+ }
+
+ left_arg->load_item();
+
+ // Do not need to load right, as we can handle stack and constants.
+ if (x->op() == Bytecodes::_imul) {
+ bool use_tmp = false;
+ if (right_arg->is_constant()) {
+ int iconst = right_arg->get_jint_constant();
+ if (is_power_of_2(iconst - 1) || is_power_of_2(iconst + 1)) {
+ use_tmp = true;
+ }
+ }
+ right_arg->dont_load_item();
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ if (use_tmp) {
+ tmp = new_register(T_INT);
+ }
+ rlock_result(x);
+
+ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp);
+ } else {
+ right_arg->dont_load_item();
+ rlock_result(x);
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp);
+ }
+ }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+ // If an operand with use count 1 is the left operand, then it is
+ // likely that no move for 2-operand-LIR-form is necessary.
+ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+ x->swap_operands();
+ }
+
+ ValueTag tag = x->type()->tag();
+ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+ switch (tag) {
+ case floatTag:
+ case doubleTag: do_ArithmeticOp_FPU(x); return;
+ case longTag: do_ArithmeticOp_Long(x); return;
+ case intTag: do_ArithmeticOp_Int(x); return;
+ }
+ ShouldNotReachHere();
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+ // count must always be in rcx
+ LIRItem value(x->x(), this);
+ LIRItem count(x->y(), this);
+
+ ValueTag elemType = x->type()->tag();
+ bool must_load_count = !count.is_constant();
+ if (must_load_count) {
+ count.load_item();
+ } else {
+ count.dont_load_item();
+ }
+ value.load_item();
+ LIR_Opr reg = rlock_result(x);
+
+ shift_op(x->op(), reg, value.result(), count.result(), LIR_OprFact::illegalOpr);
+}
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+ // IF an operand with use count 1 is the left operand, then it is
+ // likely that no move for 2-operand-LIR-form is necessary.
+ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+ x->swap_operands();
+ }
+
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+
+ left.load_item();
+ right.load_nonconstant(32);
+ LIR_Opr reg = rlock_result(x);
+
+ logic_op(x->op(), reg, left.result(), right.result());
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ left.load_item();
+ right.load_item();
+ LIR_Opr reg = rlock_result(x);
+ if (x->x()->type()->is_float_kind()) {
+ Bytecodes::Code code = x->op();
+ __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+ } else if (x->x()->type()->tag() == longTag) {
+ __ lcmp2int(left.result(), right.result(), reg);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+ assert(x->number_of_arguments() == 4, "wrong type");
+ LIRItem obj (x->argument_at(0), this); // object
+ LIRItem offset(x->argument_at(1), this); // offset of field
+ LIRItem cmp (x->argument_at(2), this); // Value to compare with field.
+ LIRItem val (x->argument_at(3), this); // Replace field with val if matches cmp.
+
+ // Get address of field.
+ obj.load_item();
+ offset.load_nonconstant(20);
+ cmp.load_item();
+ val.load_item();
+
+ LIR_Opr addr = new_pointer_register();
+ LIR_Address* a;
+ if (offset.result()->is_constant()) {
+ assert(Immediate::is_simm20(offset.result()->as_jlong()), "should have been loaded into register");
+ a = new LIR_Address(obj.result(),
+ offset.result()->as_jlong(),
+ as_BasicType(type));
+ } else {
+ a = new LIR_Address(obj.result(),
+ offset.result(),
+ 0,
+ as_BasicType(type));
+ }
+ __ leal(LIR_OprFact::address(a), addr);
+
+ if (type == objectType) { // Write-barrier needed for Object fields.
+ pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ }
+
+ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
+ if (type == objectType) {
+ __ cas_obj(addr, cmp.result(), val.result(), new_register(T_OBJECT), new_register(T_OBJECT));
+ } else if (type == intType) {
+ __ cas_int(addr, cmp.result(), val.result(), ill, ill);
+ } else if (type == longType) {
+ __ cas_long(addr, cmp.result(), val.result(), ill, ill);
+ } else {
+ ShouldNotReachHere();
+ }
+ // Generate conditional move of boolean result.
+ LIR_Opr result = rlock_result(x);
+ __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0),
+ result, as_BasicType(type));
+ if (type == objectType) { // Write-barrier needed for Object fields.
+ // Precise card mark since could either be object or array
+ post_barrier(addr, val.result());
+ }
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+ switch (x->id()) {
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_dsqrt: {
+ assert(x->number_of_arguments() == 1, "wrong type");
+ LIRItem value(x->argument_at(0), this);
+ value.load_item();
+ LIR_Opr dst = rlock_result(x);
+
+ switch (x->id()) {
+ case vmIntrinsics::_dsqrt: {
+ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+ break;
+ }
+ case vmIntrinsics::_dabs: {
+ __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+ break;
+ }
+ }
+ break;
+ }
+ case vmIntrinsics::_dlog10: // fall through
+ case vmIntrinsics::_dlog: // fall through
+ case vmIntrinsics::_dsin: // fall through
+ case vmIntrinsics::_dtan: // fall through
+ case vmIntrinsics::_dcos: // fall through
+ case vmIntrinsics::_dexp: {
+ assert(x->number_of_arguments() == 1, "wrong type");
+
+ address runtime_entry = NULL;
+ switch (x->id()) {
+ case vmIntrinsics::_dsin:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+ break;
+ case vmIntrinsics::_dcos:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+ break;
+ case vmIntrinsics::_dtan:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+ break;
+ case vmIntrinsics::_dlog:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+ break;
+ case vmIntrinsics::_dlog10:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+ break;
+ case vmIntrinsics::_dexp:
+ runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+ set_result(x, result);
+ break;
+ }
+ case vmIntrinsics::_dpow: {
+ assert(x->number_of_arguments() == 2, "wrong type");
+ address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+ LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+ set_result(x, result);
+ break;
+ }
+ }
+}
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+ assert(x->number_of_arguments() == 5, "wrong type");
+
+ // Copy stubs possibly call C code, e.g. G1 barriers, so we need to reserve room
+ // for the C ABI (see frame::z_abi_160).
+ BasicTypeArray sig; // Empty signature is precise enough.
+ frame_map()->c_calling_convention(&sig);
+
+ // Make all state_for calls early since they can emit code.
+ CodeEmitInfo* info = state_for (x, x->state());
+
+ LIRItem src(x->argument_at(0), this);
+ LIRItem src_pos(x->argument_at(1), this);
+ LIRItem dst(x->argument_at(2), this);
+ LIRItem dst_pos(x->argument_at(3), this);
+ LIRItem length(x->argument_at(4), this);
+
+ // Operands for arraycopy must use fixed registers, otherwise
+ // LinearScan will fail allocation (because arraycopy always needs a
+ // call).
+
+ src.load_item_force (FrameMap::as_oop_opr(Z_ARG1));
+ src_pos.load_item_force (FrameMap::as_opr(Z_ARG2));
+ dst.load_item_force (FrameMap::as_oop_opr(Z_ARG3));
+ dst_pos.load_item_force (FrameMap::as_opr(Z_ARG4));
+ length.load_item_force (FrameMap::as_opr(Z_ARG5));
+
+ LIR_Opr tmp = FrameMap::as_opr(Z_R7);
+
+ set_no_result(x);
+
+ int flags;
+ ciArrayKlass* expected_type;
+ arraycopy_helper(x, &flags, &expected_type);
+
+ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+ length.result(), tmp, expected_type, flags, info); // does add_safepoint
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+ LIRItem value(x->value(), this);
+
+ value.load_item();
+ LIR_Opr reg = rlock_result(x);
+ __ convert(x->op(), value.result(), reg);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+ print_if_not_loaded(x);
+
+ // This instruction can be deoptimized in the slow path : use
+ // Z_R2 as result register.
+ const LIR_Opr reg = result_register_for (x->type());
+
+ CodeEmitInfo* info = state_for (x, x->state());
+ LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+ LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+ LIR_Opr tmp3 = reg;
+ LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+ LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+ new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info);
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+ CodeEmitInfo* info = state_for (x, x->state());
+
+ LIRItem length(x->length(), this);
+ length.load_item();
+
+ LIR_Opr reg = result_register_for (x->type());
+ LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+ LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+ LIR_Opr tmp3 = reg;
+ LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+ LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+ LIR_Opr len = length.result();
+ BasicType elem_type = x->elt_type();
+
+ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+ // Evaluate state_for early since it may emit code.
+ CodeEmitInfo* info = state_for (x, x->state());
+ // In case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+ // and therefore provide the state before the parameters have been consumed.
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for (x, x->state_before());
+ }
+
+ LIRItem length(x->length(), this);
+ length.load_item();
+
+ const LIR_Opr reg = result_register_for (x->type());
+ LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr;
+ LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr;
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+ LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+ LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr;
+ LIR_Opr len = length.result();
+
+ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+ ciKlass* obj = ciObjArrayKlass::make(x->klass());
+ if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+ }
+ klass2reg_with_patching(klass_reg, obj, patching_info);
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+ Values* dims = x->dims();
+ int i = dims->length();
+ LIRItemList* items = new LIRItemList(i, i, NULL);
+ while (i-- > 0) {
+ LIRItem* size = new LIRItem(dims->at(i), this);
+ items->at_put(i, size);
+ }
+
+ // Evaluate state_for early since it may emit code.
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for (x, x->state_before());
+
+ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+ // clone all handlers (NOTE: Usually this is handled transparently
+ // by the CodeEmitInfo cloning logic in CodeStub constructors but
+ // is done explicitly here because a stub isn't being used).
+ x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+ }
+ CodeEmitInfo* info = state_for (x, x->state());
+
+ i = dims->length();
+ while (--i >= 0) {
+ LIRItem* size = items->at(i);
+ size->load_nonconstant(32);
+ // FrameMap::_reserved_argument_area_size includes the dimensions varargs, because
+ // it's initialized to hir()->max_stack() when the FrameMap is created.
+ store_stack_parameter(size->result(), in_ByteSize(i*sizeof(jint) + FrameMap::first_available_sp_in_frame));
+ }
+
+ LIR_Opr klass_reg = FrameMap::Z_R3_metadata_opr;
+ klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+ LIR_Opr rank = FrameMap::Z_R4_opr;
+ __ move(LIR_OprFact::intConst(x->rank()), rank);
+ LIR_Opr varargs = FrameMap::Z_R5_opr;
+ __ leal(LIR_OprFact::address(new LIR_Address(FrameMap::Z_SP_opr, FrameMap::first_available_sp_in_frame, T_INT)),
+ varargs);
+ LIR_OprList* args = new LIR_OprList(3);
+ args->append(klass_reg);
+ args->append(rank);
+ args->append(varargs);
+ LIR_Opr reg = result_register_for (x->type());
+ __ call_runtime(Runtime1::entry_for (Runtime1::new_multi_array_id),
+ LIR_OprFact::illegalOpr,
+ reg, args, info);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+ // Nothing to do.
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+ LIRItem obj(x->obj(), this);
+
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+ // Must do this before locking the destination register as an oop register,
+ // and before the obj is loaded (the latter is for deoptimization).
+ patching_info = state_for (x, x->state_before());
+ }
+ obj.load_item();
+
+ // info for exceptions
+ CodeEmitInfo* info_for_exception = state_for (x);
+
+ CodeStub* stub;
+ if (x->is_incompatible_class_change_check()) {
+ assert(patching_info == NULL, "can't patch this");
+ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+ } else {
+ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+ }
+ LIR_Opr reg = rlock_result(x);
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+ __ checkcast(reg, obj.result(), x->klass(),
+ tmp1, tmp2, tmp3,
+ x->direct_compare(), info_for_exception, patching_info, stub,
+ x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+ LIRItem obj(x->obj(), this);
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for (x, x->state_before());
+ }
+ // Ensure the result register is not the input register because the
+ // result is initialized before the patching safepoint.
+ obj.load_item();
+ LIR_Opr out_reg = rlock_result(x);
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+ __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+ x->direct_compare(), patching_info,
+ x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_If (If* x) {
+ assert(x->number_of_sux() == 2, "inconsistency");
+ ValueTag tag = x->x()->type()->tag();
+ bool is_safepoint = x->is_safepoint();
+
+ If::Condition cond = x->cond();
+
+ LIRItem xitem(x->x(), this);
+ LIRItem yitem(x->y(), this);
+ LIRItem* xin = &xitem;
+ LIRItem* yin = &yitem;
+
+ if (tag == longTag) {
+ // For longs, only conditions "eql", "neq", "lss", "geq" are valid;
+ // mirror for other conditions.
+ if (cond == If::gtr || cond == If::leq) {
+ cond = Instruction::mirror(cond);
+ xin = &yitem;
+ yin = &xitem;
+ }
+ xin->set_destroys_register();
+ }
+ xin->load_item();
+ // TODO: don't load long constants != 0L
+ if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) {
+ // inline long zero
+ yin->dont_load_item();
+ } else if (tag == longTag || tag == floatTag || tag == doubleTag) {
+ // Longs cannot handle constants at right side.
+ yin->load_item();
+ } else {
+ yin->dont_load_item();
+ }
+
+ // Add safepoint before generating condition code so it can be recomputed.
+ if (x->is_safepoint()) {
+ // Increment backedge counter if needed.
+ increment_backedge_counter(state_for (x, x->state_before()), x->profiled_bci());
+ // Use safepoint_poll_register() instead of LIR_OprFact::illegalOpr.
+ __ safepoint(safepoint_poll_register(), state_for (x, x->state_before()));
+ }
+ set_no_result(x);
+
+ LIR_Opr left = xin->result();
+ LIR_Opr right = yin->result();
+ __ cmp(lir_cond(cond), left, right);
+ // Generate branch profiling. Profiling code doesn't kill flags.
+ profile_branch(x, cond);
+ move_to_phi(x->state());
+ if (x->x()->type()->is_float_kind()) {
+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+ } else {
+ __ branch(lir_cond(cond), right->type(), x->tsux());
+ }
+ assert(x->default_sux() == x->fsux(), "wrong destination above");
+ __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+ return FrameMap::as_pointer_opr(Z_thread);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) {
+ __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::Z_R2_opr);
+ LIR_OprList* args = new LIR_OprList(1);
+ args->append(FrameMap::Z_R2_opr);
+ address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry);
+ __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args);
+}
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+ CodeEmitInfo* info) {
+ __ store(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+ CodeEmitInfo* info) {
+ __ load(address, result, info);
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+ BasicType type, bool is_volatile) {
+ LIR_Address* addr = new LIR_Address(src, offset, type);
+ bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+ if (is_obj) {
+ // Do the pre-write barrier, if any.
+ pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ __ move(data, addr);
+ assert(src->is_register(), "must be register");
+ // Seems to be a precise address.
+ post_barrier(LIR_OprFact::address(addr), data);
+ } else {
+ __ move(data, addr);
+ }
+}
+
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+ BasicType type, bool is_volatile) {
+ LIR_Address* addr = new LIR_Address(src, offset, type);
+ __ load(addr, dst);
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+ BasicType type = x->basic_type();
+ assert (x->is_add() && type != T_ARRAY && type != T_OBJECT, "not supported");
+ LIRItem src(x->object(), this);
+ LIRItem off(x->offset(), this);
+ LIRItem value(x->value(), this);
+
+ src.load_item();
+ value.load_item();
+ off.load_nonconstant(20);
+
+ LIR_Opr dst = rlock_result(x, type);
+ LIR_Opr data = value.result();
+ LIR_Opr offset = off.result();
+
+ LIR_Address* addr;
+ if (offset->is_constant()) {
+ assert(Immediate::is_simm20(offset->as_jlong()), "should have been loaded into register");
+ addr = new LIR_Address(src.result(), offset->as_jlong(), type);
+ } else {
+ addr = new LIR_Address(src.result(), offset, type);
+ }
+
+ __ xadd(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr);
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+ assert(UseCRC32Intrinsics, "or should not be here");
+ LIR_Opr result = rlock_result(x);
+
+ switch (x->id()) {
+ case vmIntrinsics::_updateCRC32: {
+ LIRItem crc(x->argument_at(0), this);
+ LIRItem val(x->argument_at(1), this);
+ // Registers destroyed by update_crc32.
+ crc.set_destroys_register();
+ val.set_destroys_register();
+ crc.load_item();
+ val.load_item();
+ __ update_crc32(crc.result(), val.result(), result);
+ break;
+ }
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32: {
+ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+ LIRItem crc(x->argument_at(0), this);
+ LIRItem buf(x->argument_at(1), this);
+ LIRItem off(x->argument_at(2), this);
+ LIRItem len(x->argument_at(3), this);
+ buf.load_item();
+ off.load_nonconstant();
+
+ LIR_Opr index = off.result();
+ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+ if (off.result()->is_constant()) {
+ index = LIR_OprFact::illegalOpr;
+ offset += off.result()->as_jint();
+ }
+ LIR_Opr base_op = buf.result();
+
+ if (index->is_valid()) {
+ LIR_Opr tmp = new_register(T_LONG);
+ __ convert(Bytecodes::_i2l, index, tmp);
+ index = tmp;
+ }
+
+ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
+
+ BasicTypeList signature(3);
+ signature.append(T_INT);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+ const LIR_Opr result_reg = result_register_for (x->type());
+
+ LIR_Opr arg1 = cc->at(0);
+ LIR_Opr arg2 = cc->at(1);
+ LIR_Opr arg3 = cc->at(2);
+
+ // CCallingConventionRequiresIntsAsLongs
+ crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
+ __ leal(LIR_OprFact::address(a), arg2);
+ load_int_as_long(gen()->lir(), len, arg3);
+
+ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
+ __ move(result_reg, result);
+ break;
+ }
+ default: {
+ ShouldNotReachHere();
+ }
+ }
+}
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ Unimplemented();
+}
+
+void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
+ fatal("FMA intrinsic is not implemented on this platform");
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+ fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+ return FrameMap::nr2floatreg(fpu_regnr());
+}
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+ return FrameMap::nr2floatreg(fpu_regnrHi());
+}
+
+// Reg2 unused.
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+ assert(!as_FloatRegister(reg2)->is_valid(), "Not used on this platform");
+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
+ (reg1 << LIR_OprDesc::reg2_shift) |
+ LIR_OprDesc::double_type |
+ LIR_OprDesc::fpu_register |
+ LIR_OprDesc::double_size);
+}
+
+#ifndef PRODUCT
+void LIR_Address::verify() const {
+ assert(base()->is_cpu_register(), "wrong base operand");
+ assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
+ "wrong type for addresses");
+}
+#endif // PRODUCT
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/debug.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+ // No FPU stack on ZARCH_64.
+ ShouldNotCallThis();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_LINEARSCAN_S390_HPP
+#define CPU_S390_VM_C1_LINEARSCAN_S390_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+ // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14
+ assert(FrameMap::Z_R14_opr->cpu_regnr() == 10, "wrong assumption below");
+ assert(FrameMap::Z_R0_opr->cpu_regnr() == 11, "wrong assumption below");
+ assert(FrameMap::Z_R1_opr->cpu_regnr() == 12, "wrong assumption below");
+ assert(FrameMap::Z_R8_opr->cpu_regnr() == 13, "wrong assumption below");
+ assert(FrameMap::Z_R9_opr->cpu_regnr() == 14, "wrong assumption below");
+ assert(FrameMap::Z_R15_opr->cpu_regnr() == 15, "wrong assumption below");
+ assert(reg_num >= 0, "invalid reg_num");
+ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+ // IBM Z requires one cpu registers for long,
+ // and one fpu register for double.
+ return 1;
+}
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+ return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+ return true; // No callee-saved registers on IBM Z.
+}
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+ // No special case behaviours.
+}
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+ return false; // No special case behaviours.
+}
+
+#endif // CPU_S390_VM_C1_LINEARSCAN_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+ Label ic_miss, ic_hit;
+ verify_oop(receiver);
+ int klass_offset = oopDesc::klass_offset_in_bytes();
+
+ if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+ if (VM_Version::has_CompareBranch()) {
+ z_cgij(receiver, 0, Assembler::bcondEqual, ic_miss);
+ } else {
+ z_ltgr(receiver, receiver);
+ z_bre(ic_miss);
+ }
+ }
+
+ compare_klass_ptr(iCache, klass_offset, receiver, false);
+ z_bre(ic_hit);
+
+ // If icache check fails, then jump to runtime routine.
+ // Note: RECEIVER must still contain the receiver!
+ load_const_optimized(Z_R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub()));
+ z_br(Z_R1_scratch);
+ align(CodeEntryAlignment);
+ bind(ic_hit);
+}
+
+void C1_MacroAssembler::explicit_null_check(Register base) {
+ ShouldNotCallThis(); // unused
+}
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
+ assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+ generate_stack_overflow_check(bang_size_in_bytes);
+ save_return_pc();
+ push_frame(frame_size_in_bytes); // TODO: Must we add z_abi_160?
+}
+
+void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) {
+ ShouldNotCallThis(); // unused
+}
+
+void C1_MacroAssembler::verified_entry() {
+ if (C1Breakpoint) z_illtrap(0xC1);
+}
+
+void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+ const int hdr_offset = oopDesc::mark_offset_in_bytes();
+ assert_different_registers(hdr, obj, disp_hdr);
+ NearLabel done;
+
+ verify_oop(obj);
+
+ // Load object header.
+ z_lg(hdr, Address(obj, hdr_offset));
+
+ // Save object being locked into the BasicObjectLock...
+ z_stg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+ if (UseBiasedLocking) {
+ biased_locking_enter(obj, hdr, Z_R1_scratch, Z_R0_scratch, done, &slow_case);
+ }
+
+ // and mark it as unlocked.
+ z_oill(hdr, markOopDesc::unlocked_value);
+ // Save unlocked object header into the displaced header location on the stack.
+ z_stg(hdr, Address(disp_hdr, (intptr_t)0));
+ // Test if object header is still the same (i.e. unlocked), and if so, store the
+ // displaced header address in the object header. If it is not the same, get the
+ // object header instead.
+ z_csg(hdr, disp_hdr, hdr_offset, obj);
+ // If the object header was the same, we're done.
+ if (PrintBiasedLockingStatistics) {
+ Unimplemented();
+#if 0
+ cond_inc32(Assembler::equal,
+ ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+#endif
+ }
+ branch_optimized(Assembler::bcondEqual, done);
+ // If the object header was not the same, it is now in the hdr register.
+ // => Test if it is a stack pointer into the same stack (recursive locking), i.e.:
+ //
+ // 1) (hdr & markOopDesc::lock_mask_in_place) == 0
+ // 2) rsp <= hdr
+ // 3) hdr <= rsp + page_size
+ //
+ // These 3 tests can be done by evaluating the following expression:
+ //
+ // (hdr - Z_SP) & (~(page_size-1) | markOopDesc::lock_mask_in_place)
+ //
+ // assuming both the stack pointer and page_size have their least
+ // significant 2 bits cleared and page_size is a power of 2
+ z_sgr(hdr, Z_SP);
+
+ load_const_optimized(Z_R0_scratch, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+ z_ngr(hdr, Z_R0_scratch); // AND sets CC (result eq/ne 0).
+ // For recursive locking, the result is zero. => Save it in the displaced header
+ // location (NULL in the displaced hdr location indicates recursive locking).
+ z_stg(hdr, Address(disp_hdr, (intptr_t)0));
+ // Otherwise we don't care about the result and handle locking via runtime call.
+ branch_optimized(Assembler::bcondNotZero, slow_case);
+ // done
+ bind(done);
+}
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+ const int aligned_mask = BytesPerWord -1;
+ const int hdr_offset = oopDesc::mark_offset_in_bytes();
+ assert_different_registers(hdr, obj, disp_hdr);
+ NearLabel done;
+
+ if (UseBiasedLocking) {
+ // Load object.
+ z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+ biased_locking_exit(obj, hdr, done);
+ }
+
+ // Load displaced header.
+ z_ltg(hdr, Address(disp_hdr, (intptr_t)0));
+ // If the loaded hdr is NULL we had recursive locking, and we are done.
+ z_bre(done);
+ if (!UseBiasedLocking) {
+ // Load object.
+ z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+ }
+ verify_oop(obj);
+ // Test if object header is pointing to the displaced header, and if so, restore
+ // the displaced header in the object. If the object header is not pointing to
+ // the displaced header, get the object header instead.
+ z_csg(disp_hdr, hdr, hdr_offset, obj);
+ // If the object header was not pointing to the displaced header,
+ // we do unlocking via runtime call.
+ branch_optimized(Assembler::bcondNotEqual, slow_case);
+ // done
+ bind(done);
+}
+
+void C1_MacroAssembler::try_allocate(
+ Register obj, // result: Pointer to object after successful allocation.
+ Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise.
+ int con_size_in_bytes, // Object size in bytes if known at compile time.
+ Register t1, // Temp register: Must be global register for incr_allocated_bytes.
+ Label& slow_case // Continuation point if fast allocation fails.
+) {
+ if (UseTLAB) {
+ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+ } else {
+ // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it.
+ z_brul(slow_case);
+ }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1) {
+ assert_different_registers(obj, klass, len, t1, Rzero);
+ if (UseBiasedLocking && !len->is_valid()) {
+ assert_different_registers(obj, klass, len, t1);
+ z_lg(t1, Address(klass, Klass::prototype_header_offset()));
+ } else {
+ // This assumes that all prototype bits fit in an int32_t.
+ load_const_optimized(t1, (intx)markOopDesc::prototype());
+ }
+ z_stg(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+ if (len->is_valid()) {
+ // Length will be in the klass gap, if one exists.
+ z_st(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+ } else if (UseCompressedClassPointers) {
+ store_klass_gap(Rzero, obj); // Zero klass gap for compressed oops.
+ }
+ store_klass(klass, obj, t1);
+}
+
+void C1_MacroAssembler::initialize_body(Register objectFields, Register len_in_bytes, Register Rzero) {
+ Label done;
+ assert_different_registers(objectFields, len_in_bytes, Rzero);
+
+ // Initialize object fields.
+ // See documentation for MVCLE instruction!!!
+ assert(objectFields->encoding()%2==0, "objectFields must be an even register");
+ assert(len_in_bytes->encoding() == (objectFields->encoding()+1), "objectFields and len_in_bytes must be a register pair");
+ assert(Rzero->encoding()%2==1, "Rzero must be an odd register");
+
+ // Use Rzero as src length, then mvcle will copy nothing
+ // and fill the object with the padding value 0.
+ move_long_ext(objectFields, as_Register(Rzero->encoding()-1), 0);
+ bind(done);
+}
+
+void C1_MacroAssembler::allocate_object(
+ Register obj, // Result: pointer to object after successful allocation.
+ Register t1, // temp register
+ Register t2, // temp register: Must be a global register for try_allocate.
+ int hdr_size, // object header size in words
+ int obj_size, // object size in words
+ Register klass, // object klass
+ Label& slow_case // Continuation point if fast allocation fails.
+) {
+ assert_different_registers(obj, t1, t2, klass);
+
+ // Allocate space and initialize header.
+ try_allocate(obj, noreg, obj_size * wordSize, t1, slow_case);
+
+ initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(
+ Register obj, // result: Pointer to object after successful allocation.
+ Register klass, // object klass
+ Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise.
+ int con_size_in_bytes, // Object size in bytes if known at compile time.
+ Register t1, // temp register
+ Register t2 // temp register
+ ) {
+ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+ "con_size_in_bytes is not multiple of alignment");
+ assert(var_size_in_bytes == noreg, "not implemented");
+ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+ const Register Rzero = t2;
+
+ z_xgr(Rzero, Rzero);
+ initialize_header(obj, klass, noreg, Rzero, t1);
+
+ // Clear rest of allocated space.
+ const int threshold = 4 * BytesPerWord;
+ if (con_size_in_bytes <= threshold) {
+ // Use explicit null stores.
+ // code size = 6*n bytes (n = number of fields to clear)
+ for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
+ z_stg(Rzero, Address(obj, i));
+ } else {
+ // Code size generated by initialize_body() is 16.
+ Register object_fields = Z_R0_scratch;
+ Register len_in_bytes = Z_R1_scratch;
+ z_la(object_fields, hdr_size_in_bytes, obj);
+ load_const_optimized(len_in_bytes, con_size_in_bytes - hdr_size_in_bytes);
+ initialize_body(object_fields, len_in_bytes, Rzero);
+ }
+
+ // Dtrace support is unimplemented.
+ // if (CURRENT_ENV->dtrace_alloc_probes()) {
+ // assert(obj == rax, "must be");
+ // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+ // }
+
+ verify_oop(obj);
+}
+
+void C1_MacroAssembler::allocate_array(
+ Register obj, // result: Pointer to array after successful allocation.
+ Register len, // array length
+ Register t1, // temp register
+ Register t2, // temp register
+ int hdr_size, // object header size in words
+ int elt_size, // element size in bytes
+ Register klass, // object klass
+ Label& slow_case // Continuation point if fast allocation fails.
+) {
+ assert_different_registers(obj, len, t1, t2, klass);
+
+ // Determine alignment mask.
+ assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+
+ // Check for negative or excessive length.
+ compareU64_and_branch(len, (int32_t)max_array_allocation_length, bcondHigh, slow_case);
+
+ // Compute array size.
+ // Note: If 0 <= len <= max_length, len*elt_size + header + alignment is
+ // smaller or equal to the largest integer. Also, since top is always
+ // aligned, we can do the alignment here instead of at the end address
+ // computation.
+ const Register arr_size = t2;
+ switch (elt_size) {
+ case 1: lgr_if_needed(arr_size, len); break;
+ case 2: z_sllg(arr_size, len, 1); break;
+ case 4: z_sllg(arr_size, len, 2); break;
+ case 8: z_sllg(arr_size, len, 3); break;
+ default: ShouldNotReachHere();
+ }
+ add2reg(arr_size, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
+ z_nill(arr_size, (~MinObjAlignmentInBytesMask) & 0xffff); // Align array size.
+
+ try_allocate(obj, arr_size, 0, t1, slow_case);
+
+ initialize_header(obj, klass, len, noreg, t1);
+
+ // Clear rest of allocated space.
+ Label done;
+ Register object_fields = t1;
+ Register Rzero = Z_R1_scratch;
+ z_aghi(arr_size, -(hdr_size * BytesPerWord));
+ z_bre(done); // Jump if size of fields is zero.
+ z_la(object_fields, hdr_size * BytesPerWord, obj);
+ z_xgr(Rzero, Rzero);
+ initialize_body(object_fields, arr_size, Rzero);
+ bind(done);
+
+ // Dtrace support is unimplemented.
+ // if (CURRENT_ENV->dtrace_alloc_probes()) {
+ // assert(obj == rax, "must be");
+ // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id)));
+ // }
+
+ verify_oop(obj);
+}
+
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+ Unimplemented();
+ // if (!VerifyOops) return;
+ // verify_oop_addr(Address(SP, stack_offset + STACK_BIAS));
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+ if (!VerifyOops) return;
+ NearLabel not_null;
+ compareU64_and_branch(r, (intptr_t)0, bcondNotEqual, not_null);
+ stop("non-null oop required");
+ bind(not_null);
+ verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(Register preserve1,
+ Register preserve2,
+ Register preserve3) {
+ Register dead_value = noreg;
+ for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
+ Register r = as_Register(i);
+ if (r != preserve1 && r != preserve2 && r != preserve3 && r != Z_SP && r != Z_thread) {
+ if (dead_value == noreg) {
+ load_const_optimized(r, 0xc1dead);
+ dead_value = r;
+ } else {
+ z_lgr(r, dead_value);
+ }
+ }
+ }
+}
+
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
+#define CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
+
+ void pd_init() { /* nothing to do */ }
+
+ public:
+ void try_allocate(
+ Register obj, // result: Pointer to object after successful allocation.
+ Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise.
+ int con_size_in_bytes, // Object size in bytes if known at compile time.
+ Register t1, // temp register
+ Label& slow_case // Continuation point if fast allocation fails.
+ );
+
+ void initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1);
+ void initialize_body(Register objectFields, Register len_in_bytes, Register Rzero);
+
+ // locking
+ // hdr : Used to hold locked markOop to be CASed into obj, contents destroyed.
+ // obj : Must point to the object to lock, contents preserved.
+ // disp_hdr: Must point to the displaced header location, contents preserved.
+ // Returns code offset at which to add null check debug information.
+ void lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case);
+
+ // unlocking
+ // hdr : Used to hold original markOop to be CASed back into obj, contents destroyed.
+ // obj : Must point to the object to lock, contents preserved.
+ // disp_hdr: Must point to the displaced header location, contents destroyed.
+ void unlock_object(Register hdr, Register obj, Register lock, Label& slow_case);
+
+ void initialize_object(
+ Register obj, // result: Pointer to object after successful allocation.
+ Register klass, // object klass
+ Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise.
+ int con_size_in_bytes, // Object size in bytes if known at compile time.
+ Register t1, // temp register
+ Register t2 // temp register
+ );
+
+ // Allocation of fixed-size objects.
+ // This can also be used to allocate fixed-size arrays, by setting
+ // hdr_size correctly and storing the array length afterwards.
+ void allocate_object(
+ Register obj, // result: Pointer to object after successful allocation.
+ Register t1, // temp register
+ Register t2, // temp register
+ int hdr_size, // object header size in words
+ int obj_size, // object size in words
+ Register klass, // object klass
+ Label& slow_case // Continuation point if fast allocation fails.
+ );
+
+ enum {
+ max_array_allocation_length = 0x01000000 // Sparc friendly value, requires sethi only.
+ };
+
+ // Allocation of arrays.
+ void allocate_array(
+ Register obj, // result: Pointer to array after successful allocation.
+ Register len, // array length
+ Register t1, // temp register
+ Register t2, // temp register
+ int hdr_size, // object header size in words
+ int elt_size, // element size in bytes
+ Register klass, // object klass
+ Label& slow_case // Continuation point if fast allocation fails.
+ );
+
+ // Invalidates registers in this window.
+ void invalidate_registers(Register preserve1 = noreg, Register preserve2 = noreg,
+ Register preserve3 = noreg) PRODUCT_RETURN;
+
+ void nop() { z_nop(); }
+
+ // This platform only uses signal-based null checks. The Label is not needed.
+ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
+#endif // CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_s390.inline.hpp"
+#include "registerSaver_s390.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry_point, int number_of_arguments) {
+ set_num_rt_args(0); // Nothing on stack.
+ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different");
+
+ // We cannot trust that code generated by the C++ compiler saves R14
+ // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
+ // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
+ // Therefore we load the PC into Z_R1_scratch and let set_last_Java_frame() save
+ // it into the frame anchor.
+ address pc = get_PC(Z_R1_scratch);
+ int call_offset = (int)(pc - addr_at(0));
+ set_last_Java_frame(Z_SP, Z_R1_scratch);
+
+ // ARG1 must hold thread address.
+ z_lgr(Z_ARG1, Z_thread);
+
+ address return_pc = NULL;
+ align_call_far_patchable(this->pc());
+ return_pc = call_c_opt(entry_point);
+ assert(return_pc != NULL, "const section overflow");
+
+ reset_last_Java_frame();
+
+ // Check for pending exceptions.
+ {
+ load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
+
+ // This used to conditionally jump to forward_exception however it is
+ // possible if we relocate that the branch will not reach. So we must jump
+ // around so we can always reach.
+
+ Label ok;
+ z_bre(ok); // Bcondequal is the same as bcondZero.
+
+ // exception pending => forward to exception handler
+
+ // Make sure that the vm_results are cleared.
+ if (oop_result1->is_valid()) {
+ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
+ }
+ if (metadata_result->is_valid()) {
+ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(jlong));
+ }
+ if (frame_size() == no_frame_size) {
+ // Pop the stub frame.
+ pop_frame();
+ restore_return_pc();
+ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
+ z_br(Z_R1);
+ } else if (_stub_id == Runtime1::forward_exception_id) {
+ should_not_reach_here();
+ } else {
+ load_const_optimized(Z_R1, Runtime1::entry_for (Runtime1::forward_exception_id));
+ z_br(Z_R1);
+ }
+
+ bind(ok);
+ }
+
+ // Get oop results if there are any and reset the values in the thread.
+ if (oop_result1->is_valid()) {
+ get_vm_result(oop_result1);
+ }
+ if (metadata_result->is_valid()) {
+ get_vm_result_2(metadata_result);
+ }
+
+ return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg1);
+ return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg1);
+ assert(arg2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg2);
+ return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg1);
+ assert(arg2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg2);
+ assert(arg3 != Z_ARG3, "smashed argument");
+ lgr_if_needed(Z_ARG4, arg3);
+ return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+#ifndef PRODUCT
+#undef __
+#define __ (Verbose ? (sasm->block_comment(FILE_AND_LINE),sasm):sasm)->
+#endif // !PRODUCT
+
+#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+static OopMap* generate_oop_map(StubAssembler* sasm) {
+ RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers;
+ int frame_size_in_slots =
+ RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+ sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+ return RegisterSaver::generate_oop_map(sasm, reg_set);
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true, Register return_pc = Z_R14) {
+ __ block_comment("save_live_registers");
+ RegisterSaver::RegisterSet reg_set =
+ save_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers;
+ int frame_size_in_slots =
+ RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+ sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+ return RegisterSaver::save_live_registers(sasm, reg_set, return_pc);
+}
+
+static OopMap* save_live_registers_except_r2(StubAssembler* sasm, bool save_fpu_registers = true) {
+ if (!save_fpu_registers) {
+ __ unimplemented(FILE_AND_LINE);
+ }
+ __ block_comment("save_live_registers");
+ RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers_except_r2;
+ int frame_size_in_slots =
+ RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+ sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+ return RegisterSaver::save_live_registers(sasm, reg_set);
+}
+
+static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) {
+ __ block_comment("save_volatile_registers");
+ RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers;
+ int frame_size_in_slots =
+ RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size;
+ sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
+ return RegisterSaver::save_live_registers(sasm, reg_set, return_pc);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+ __ block_comment("restore_live_registers");
+ RegisterSaver::RegisterSet reg_set =
+ restore_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers;
+ RegisterSaver::restore_live_registers(sasm, reg_set);
+}
+
+static void restore_live_registers_except_r2(StubAssembler* sasm, bool restore_fpu_registers = true) {
+ if (!restore_fpu_registers) {
+ __ unimplemented(FILE_AND_LINE);
+ }
+ __ block_comment("restore_live_registers_except_r2");
+ RegisterSaver::restore_live_registers(sasm, RegisterSaver::all_registers_except_r2);
+}
+
+static void restore_volatile_registers(StubAssembler* sasm) {
+ __ block_comment("restore_volatile_registers");
+ RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers;
+ RegisterSaver::restore_live_registers(sasm, reg_set);
+}
+
+void Runtime1::initialize_pd() {
+ // Nothing to do.
+}
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+ // Make a frame and preserve the caller's caller-save registers.
+ OopMap* oop_map = save_live_registers(sasm);
+ int call_offset;
+ if (!has_argument) {
+ call_offset = __ call_RT(noreg, noreg, target);
+ } else {
+ call_offset = __ call_RT(noreg, noreg, target, Z_R1_scratch, Z_R0_scratch);
+ }
+ OopMapSet* oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ __ should_not_reach_here();
+ return oop_maps;
+}
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+ // Incoming parameters: Z_EXC_OOP and Z_EXC_PC.
+ // Keep copies in callee-saved registers during runtime call.
+ const Register exception_oop_callee_saved = Z_R11;
+ const Register exception_pc_callee_saved = Z_R12;
+ // Other registers used in this stub.
+ const Register handler_addr = Z_R4;
+
+ // Verify that only exception_oop, is valid at this time.
+ __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC);
+
+ // Check that fields in JavaThread for exception oop and issuing pc are set.
+ __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0);
+ __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0);
+
+ // Save exception_oop and pc in callee-saved register to preserve it
+ // during runtime calls.
+ __ verify_not_null_oop(Z_EXC_OOP);
+ __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP);
+ __ lgr_if_needed(exception_pc_callee_saved, Z_EXC_PC);
+
+ __ push_frame_abi160(0); // Runtime code needs the z_abi_160.
+
+ // Search the exception handler address of the caller (using the return address).
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Z_thread, Z_EXC_PC);
+ // Z_RET(Z_R2): exception handler address of the caller.
+
+ __ pop_frame();
+
+ __ invalidate_registers(exception_oop_callee_saved, exception_pc_callee_saved, Z_RET);
+
+ // Move result of call into correct register.
+ __ lgr_if_needed(handler_addr, Z_RET);
+
+ // Restore exception oop and pc to Z_EXC_OOP and Z_EXC_PC (required convention of exception handler).
+ __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved);
+ __ lgr_if_needed(Z_EXC_PC, exception_pc_callee_saved);
+
+ // Verify that there is really a valid exception in Z_EXC_OOP.
+ __ verify_not_null_oop(Z_EXC_OOP);
+
+ __ z_br(handler_addr); // Jump to exception handler.
+}
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+ // Make a frame and preserve the caller's caller-save registers.
+ OopMap* oop_map = save_live_registers(sasm);
+
+ // Call the runtime patching routine, returns non-zero if nmethod got deopted.
+ int call_offset = __ call_RT(noreg, noreg, target);
+ OopMapSet* oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ // Re-execute the patched instruction or, if the nmethod was
+ // deoptmized, return to the deoptimization handler entry that will
+ // cause re-execution of the current bytecode.
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+ __ z_ltr(Z_RET, Z_RET); // return value == 0
+
+ restore_live_registers(sasm);
+
+ __ z_bcr(Assembler::bcondZero, Z_R14);
+
+ // Return to the deoptimization handler entry for unpacking and
+ // rexecute if we simply returned then we'd deopt as if any call we
+ // patched had just returned.
+ AddressLiteral dest(deopt_blob->unpack_with_reexecution());
+ __ load_const_optimized(Z_R1_scratch, dest);
+ __ z_br(Z_R1_scratch);
+
+ return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+
+ // for better readability
+ const bool must_gc_arguments = true;
+ const bool dont_gc_arguments = false;
+
+ // Default value; overwritten for some optimized stubs that are
+ // called from methods that do not use the fpu.
+ bool save_fpu_registers = true;
+
+ // Stub code and info for the different stubs.
+ OopMapSet* oop_maps = NULL;
+ switch (id) {
+ case forward_exception_id:
+ {
+ oop_maps = generate_handle_exception(id, sasm);
+ // will not return
+ }
+ break;
+
+ case new_instance_id:
+ case fast_new_instance_id:
+ case fast_new_instance_init_check_id:
+ {
+ Register klass = Z_R11; // Incoming
+ Register obj = Z_R2; // Result
+
+ if (id == new_instance_id) {
+ __ set_info("new_instance", dont_gc_arguments);
+ } else if (id == fast_new_instance_id) {
+ __ set_info("fast new_instance", dont_gc_arguments);
+ } else {
+ assert(id == fast_new_instance_init_check_id, "bad StubID");
+ __ set_info("fast new_instance init check", dont_gc_arguments);
+ }
+
+ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+ UseTLAB && FastTLABRefill) {
+ // Sapjvm: must call RT to generate allocation events.
+ }
+
+ OopMap* map = save_live_registers_except_r2(sasm);
+ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers_except_r2(sasm);
+
+ __ verify_oop(obj);
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case counter_overflow_id:
+ {
+ // Arguments :
+ // bci : stack param 0
+ // method : stack param 1
+ //
+ Register bci = Z_ARG2, method = Z_ARG3;
+ // frame size in bytes
+ OopMap* map = save_live_registers(sasm);
+ const int frame_size = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size;
+ __ z_lg(bci, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+ __ z_lg(method, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers(sasm);
+ __ z_br(Z_R14);
+ }
+ break;
+ case new_type_array_id:
+ case new_object_array_id:
+ {
+ Register length = Z_R13; // Incoming
+ Register klass = Z_R11; // Incoming
+ Register obj = Z_R2; // Result
+
+ if (id == new_type_array_id) {
+ __ set_info("new_type_array", dont_gc_arguments);
+ } else {
+ __ set_info("new_object_array", dont_gc_arguments);
+ }
+
+#ifdef ASSERT
+ // Assert object type is really an array of the proper kind.
+ {
+ NearLabel ok;
+ Register t0 = obj;
+ __ mem2reg_opt(t0, Address(klass, Klass::layout_helper_offset()), false);
+ __ z_sra(t0, Klass::_lh_array_tag_shift);
+ int tag = ((id == new_type_array_id)
+ ? Klass::_lh_array_tag_type_value
+ : Klass::_lh_array_tag_obj_value);
+ __ compare32_and_branch(t0, tag, Assembler::bcondEqual, ok);
+ __ stop("assert(is an array klass)");
+ __ should_not_reach_here();
+ __ bind(ok);
+ }
+#endif // ASSERT
+
+ if (UseTLAB && FastTLABRefill) {
+ // sapjvm: must call RT to generate allocation events.
+ }
+
+ OopMap* map = save_live_registers_except_r2(sasm);
+ int call_offset;
+ if (id == new_type_array_id) {
+ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+ } else {
+ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+ }
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers_except_r2(sasm);
+
+ __ verify_oop(obj);
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case new_multi_array_id:
+ { __ set_info("new_multi_array", dont_gc_arguments);
+ // Z_R3,: klass
+ // Z_R4,: rank
+ // Z_R5: address of 1st dimension
+ OopMap* map = save_live_registers(sasm);
+ int call_offset = __ call_RT(Z_R2, noreg, CAST_FROM_FN_PTR(address, new_multi_array), Z_R3, Z_R4, Z_R5);
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers_except_r2(sasm);
+
+ // Z_R2,: new multi array
+ __ verify_oop(Z_R2);
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case register_finalizer_id:
+ {
+ __ set_info("register_finalizer", dont_gc_arguments);
+
+ // Load the klass and check the has finalizer flag.
+ Register klass = Z_ARG2;
+ __ load_klass(klass, Z_ARG1);
+ __ testbit(Address(klass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER));
+ __ z_bcr(Assembler::bcondAllZero, Z_R14); // Return if bit is not set.
+
+ OopMap* oop_map = save_live_registers(sasm);
+ int call_offset = __ call_RT(noreg, noreg,
+ CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), Z_ARG1);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ // Now restore all the live registers.
+ restore_live_registers(sasm);
+
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case throw_range_check_failed_id:
+ { __ set_info("range_check_failed", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+ }
+ break;
+
+ case throw_index_exception_id:
+ { __ set_info("index_range_check_failed", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+ }
+ break;
+ case throw_div0_exception_id:
+ { __ set_info("throw_div0_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+ }
+ break;
+ case throw_null_pointer_exception_id:
+ { __ set_info("throw_null_pointer_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+ }
+ break;
+ case handle_exception_nofpu_id:
+ case handle_exception_id:
+ { __ set_info("handle_exception", dont_gc_arguments);
+ oop_maps = generate_handle_exception(id, sasm);
+ }
+ break;
+ case handle_exception_from_callee_id:
+ { __ set_info("handle_exception_from_callee", dont_gc_arguments);
+ oop_maps = generate_handle_exception(id, sasm);
+ }
+ break;
+ case unwind_exception_id:
+ { __ set_info("unwind_exception", dont_gc_arguments);
+ // Note: no stubframe since we are about to leave the current
+ // activation and we are calling a leaf VM function only.
+ generate_unwind_exception(sasm);
+ }
+ break;
+ case throw_array_store_exception_id:
+ { __ set_info("throw_array_store_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+ }
+ break;
+ case throw_class_cast_exception_id:
+ { // Z_R1_scratch: object
+ __ set_info("throw_class_cast_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+ }
+ break;
+ case throw_incompatible_class_change_error_id:
+ { __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+ }
+ break;
+ case slow_subtype_check_id:
+ {
+ // Arguments :
+ // sub : stack param 0
+ // super: stack param 1
+ // raddr: Z_R14, blown by call
+ //
+ // Result : condition code 0 for match (bcondEqual will be true),
+ // condition code 2 for miss (bcondNotEqual will be true)
+ NearLabel miss;
+ const Register Rsubklass = Z_ARG2; // sub
+ const Register Rsuperklass = Z_ARG3; // super
+
+ // No args, but tmp registers that are killed.
+ const Register Rlength = Z_ARG4; // cache array length
+ const Register Rarray_ptr = Z_ARG5; // Current value from cache array.
+
+ if (UseCompressedOops) {
+ assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
+ }
+
+ const int frame_size = 4*BytesPerWord + frame::z_abi_160_size;
+ // Save return pc. This is not necessary, but could be helpful
+ // in the case of crashes.
+ __ save_return_pc();
+ __ push_frame(frame_size);
+ // Save registers before changing them.
+ int i = 0;
+ __ z_stg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_stg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_stg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_stg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+
+ // Get sub and super from stack.
+ __ z_lg(Rsubklass, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+ __ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
+
+ __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, NULL, &miss);
+
+ // Match falls through here.
+ i = 0;
+ __ z_lg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+ __ pop_frame();
+ // Return pc is still in R_14.
+ __ clear_reg(Z_R0_scratch); // Zero indicates a match. Set CC 0 (bcondEqual will be true)
+ __ z_br(Z_R14);
+
+ __ BIND(miss);
+ i = 0;
+ __ z_lg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ __ z_lg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP);
+ assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check");
+ __ pop_frame();
+ // return pc is still in R_14
+ __ load_const_optimized(Z_R0_scratch, 1); // One indicates a miss.
+ __ z_ltgr(Z_R0_scratch, Z_R0_scratch); // Set CC 2 (bcondNotEqual will be true).
+ __ z_br(Z_R14);
+ }
+ break;
+ case monitorenter_nofpu_id:
+ case monitorenter_id:
+ { // Z_R1_scratch : object
+ // Z_R13 : lock address (see LIRGenerator::syncTempOpr())
+ __ set_info("monitorenter", dont_gc_arguments);
+
+ int save_fpu_registers = (id == monitorenter_id);
+ // Make a frame and preserve the caller's caller-save registers.
+ OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), Z_R1_scratch, Z_R13);
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+ restore_live_registers(sasm, save_fpu_registers);
+
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case monitorexit_nofpu_id:
+ case monitorexit_id:
+ { // Z_R1_scratch : lock address
+ // Note: really a leaf routine but must setup last java sp
+ // => Use call_RT for now (speed can be improved by
+ // doing last java sp setup manually).
+ __ set_info("monitorexit", dont_gc_arguments);
+
+ int save_fpu_registers = (id == monitorexit_id);
+ // Make a frame and preserve the caller's caller-save registers.
+ OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), Z_R1_scratch);
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+ restore_live_registers(sasm, save_fpu_registers);
+
+ __ z_br(Z_R14);
+ }
+ break;
+
+ case deoptimize_id:
+ { // Args: Z_R1_scratch: trap request
+ __ set_info("deoptimize", dont_gc_arguments);
+ Register trap_request = Z_R1_scratch;
+ OopMap* oop_map = save_live_registers(sasm);
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), trap_request);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+ restore_live_registers(sasm);
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+ AddressLiteral dest(deopt_blob->unpack_with_reexecution());
+ __ load_const_optimized(Z_R1_scratch, dest);
+ __ z_br(Z_R1_scratch);
+ }
+ break;
+
+ case access_field_patching_id:
+ { __ set_info("access_field_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+ }
+ break;
+
+ case load_klass_patching_id:
+ { __ set_info("load_klass_patching", dont_gc_arguments);
+ // We should set up register map.
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+ }
+ break;
+
+ case load_mirror_patching_id:
+ { __ set_info("load_mirror_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+ }
+ break;
+
+ case load_appendix_patching_id:
+ { __ set_info("load_appendix_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+ }
+ break;
+#if 0
+ case dtrace_object_alloc_id:
+ { // rax,: object
+ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+ // We can't gc here so skip the oopmap but make sure that all
+ // the live registers get saved.
+ save_live_registers(sasm, 1);
+
+ __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
+ NOT_LP64(__ pop(rax));
+
+ restore_live_registers(sasm);
+ }
+ break;
+
+ case fpu2long_stub_id:
+ {
+ // rax, and rdx are destroyed, but should be free since the result is returned there
+ // preserve rsi,ecx
+ __ push(rsi);
+ __ push(rcx);
+ LP64_ONLY(__ push(rdx);)
+
+ // check for NaN
+ Label return0, do_return, return_min_jlong, do_convert;
+
+ Address value_high_word(rsp, wordSize + 4);
+ Address value_low_word(rsp, wordSize);
+ Address result_high_word(rsp, 3*wordSize + 4);
+ Address result_low_word(rsp, 3*wordSize);
+
+ __ subptr(rsp, 32); // more than enough on 32bit
+ __ fst_d(value_low_word);
+ __ movl(rax, value_high_word);
+ __ andl(rax, 0x7ff00000);
+ __ cmpl(rax, 0x7ff00000);
+ __ jcc(Assembler::notEqual, do_convert);
+ __ movl(rax, value_high_word);
+ __ andl(rax, 0xfffff);
+ __ orl(rax, value_low_word);
+ __ jcc(Assembler::notZero, return0);
+
+ __ bind(do_convert);
+ __ fnstcw(Address(rsp, 0));
+ __ movzwl(rax, Address(rsp, 0));
+ __ orl(rax, 0xc00);
+ __ movw(Address(rsp, 2), rax);
+ __ fldcw(Address(rsp, 2));
+ __ fwait();
+ __ fistp_d(result_low_word);
+ __ fldcw(Address(rsp, 0));
+ __ fwait();
+ // This gets the entire long in rax on 64bit
+ __ movptr(rax, result_low_word);
+ // testing of high bits
+ __ movl(rdx, result_high_word);
+ __ mov(rcx, rax);
+ // What the heck is the point of the next instruction???
+ __ xorl(rcx, 0x0);
+ __ movl(rsi, 0x80000000);
+ __ xorl(rsi, rdx);
+ __ orl(rcx, rsi);
+ __ jcc(Assembler::notEqual, do_return);
+ __ fldz();
+ __ fcomp_d(value_low_word);
+ __ fnstsw_ax();
+ __ testl(rax, 0x4100); // ZF & CF == 0
+ __ jcc(Assembler::equal, return_min_jlong);
+ // return max_jlong
+ __ mov64(rax, CONST64(0x7fffffffffffffff));
+ __ jmp(do_return);
+
+ __ bind(return_min_jlong);
+ __ mov64(rax, UCONST64(0x8000000000000000));
+ __ jmp(do_return);
+
+ __ bind(return0);
+ __ fpop();
+ __ xorptr(rax, rax);
+
+ __ bind(do_return);
+ __ addptr(rsp, 32);
+ LP64_ONLY(__ pop(rdx);)
+ __ pop(rcx);
+ __ pop(rsi);
+ __ ret(0);
+ }
+ break;
+#endif // TODO
+
+#if INCLUDE_ALL_GCS
+ case g1_pre_barrier_slow_id:
+ { // Z_R1_scratch: previous value of memory
+
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+ __ should_not_reach_here(FILE_AND_LINE);
+ break;
+ }
+
+ __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+ Register pre_val = Z_R1_scratch;
+ Register tmp = Z_R6; // Must be non-volatile because it is used to save pre_val.
+ Register tmp2 = Z_R7;
+
+ Label refill, restart;
+ int satb_q_index_byte_offset =
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_index());
+ int satb_q_buf_byte_offset =
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_buf());
+
+ // Save tmp registers (see assertion in G1PreBarrierStub::emit_code()).
+ __ z_stg(tmp, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_stg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+ __ bind(restart);
+ // Load the index into the SATB buffer. SATBMarkQueue::_index is a
+ // size_t so ld_ptr is appropriate.
+ __ z_ltg(tmp, satb_q_index_byte_offset, Z_R0, Z_thread);
+
+ // index == 0?
+ __ z_brz(refill);
+
+ __ z_lg(tmp2, satb_q_buf_byte_offset, Z_thread);
+ __ add2reg(tmp, -oopSize);
+
+ __ z_stg(pre_val, 0, tmp, tmp2); // [_buf + index] := <address_of_card>
+ __ z_stg(tmp, satb_q_index_byte_offset, Z_thread);
+
+ // Restore tmp registers (see assertion in G1PreBarrierStub::emit_code()).
+ __ z_lg(tmp, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_lg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_br(Z_R14);
+
+ __ bind(refill);
+ save_volatile_registers(sasm);
+ __ z_lgr(tmp, pre_val); // save pre_val
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread),
+ Z_thread);
+ __ z_lgr(pre_val, tmp); // restore pre_val
+ restore_volatile_registers(sasm);
+ __ z_bru(restart);
+ }
+ break;
+
+ case g1_post_barrier_slow_id:
+ { // Z_R1_scratch: oop address, address of updated memory slot
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+ __ should_not_reach_here(FILE_AND_LINE);
+ break;
+ }
+
+ __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+ Register addr_oop = Z_R1_scratch;
+ Register addr_card = Z_R1_scratch;
+ Register r1 = Z_R6; // Must be saved/restored.
+ Register r2 = Z_R7; // Must be saved/restored.
+ Register cardtable = r1; // Must be non-volatile, because it is used to save addr_card.
+ jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+ // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
+ __ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+ Label not_already_dirty, restart, refill, young_card;
+
+ // Calculate address of card corresponding to the updated oop slot.
+ AddressLiteral rs(byte_map_base);
+ __ z_srlg(addr_card, addr_oop, CardTableModRefBS::card_shift);
+ addr_oop = noreg; // dead now
+ __ load_const_optimized(cardtable, rs); // cardtable := <card table base>
+ __ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable
+
+ __ z_cli(0, addr_card, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ __ z_bre(young_card);
+
+ __ z_sync(); // Required to support concurrent cleaning.
+
+ __ z_cli(0, addr_card, (int)CardTableModRefBS::dirty_card_val());
+ __ z_brne(not_already_dirty);
+
+ __ bind(young_card);
+ // We didn't take the branch, so we're already dirty: restore
+ // used registers and return.
+ __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_br(Z_R14);
+
+ // Not dirty.
+ __ bind(not_already_dirty);
+
+ // First, dirty it: [addr_card] := 0
+ __ z_mvi(0, addr_card, CardTableModRefBS::dirty_card_val());
+
+ Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card.
+ Register buf = r2;
+ cardtable = noreg; // now dead
+
+ // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
+ __ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+
+ ByteSize dirty_card_q_index_byte_offset =
+ JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index();
+ ByteSize dirty_card_q_buf_byte_offset =
+ JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_buf();
+
+ __ bind(restart);
+
+ // Get the index into the update buffer. DirtyCardQueue::_index is
+ // a size_t so z_ltg is appropriate here.
+ __ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
+
+ // index == 0?
+ __ z_brz(refill);
+
+ __ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset));
+ __ add2reg(idx, -oopSize);
+
+ __ z_stg(addr_card, 0, idx, buf); // [_buf + index] := <address_of_card>
+ __ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
+ // Restore killed registers and return.
+ __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
+ __ z_br(Z_R14);
+
+ __ bind(refill);
+ save_volatile_registers(sasm);
+ __ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread),
+ Z_thread);
+ __ z_lgr(addr_card, idx);
+ restore_volatile_registers(sasm); // Restore addr_card.
+ __ z_bru(restart);
+ }
+ break;
+#endif // INCLUDE_ALL_GCS
+ case predicate_failed_trap_id:
+ {
+ __ set_info("predicate_failed_trap", dont_gc_arguments);
+
+ OopMap* map = save_live_registers(sasm);
+
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers(sasm);
+
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+ __ load_const_optimized(Z_R1_scratch, deopt_blob->unpack_with_reexecution());
+ __ z_br(Z_R1_scratch);
+ }
+ break;
+
+ default:
+ {
+ __ should_not_reach_here(FILE_AND_LINE, id);
+ }
+ break;
+ }
+ return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+ __ block_comment("generate_handle_exception");
+
+ // incoming parameters: Z_EXC_OOP, Z_EXC_PC
+
+ // Save registers if required.
+ OopMapSet* oop_maps = new OopMapSet();
+ OopMap* oop_map = NULL;
+ Register reg_fp = Z_R1_scratch;
+
+ switch (id) {
+ case forward_exception_id: {
+ // We're handling an exception in the context of a compiled frame.
+ // The registers have been saved in the standard places. Perform
+ // an exception lookup in the caller and dispatch to the handler
+ // if found. Otherwise unwind and dispatch to the callers
+ // exception handler.
+ oop_map = generate_oop_map(sasm);
+
+ // Load and clear pending exception oop into.
+ __ z_lg(Z_EXC_OOP, Address(Z_thread, Thread::pending_exception_offset()));
+ __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), 8);
+
+ // Different stubs forward their exceptions; they should all have similar frame layouts
+ // (a) to find their return address (b) for a correct oop_map generated above.
+ assert(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers) ==
+ RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers_except_r2), "requirement");
+
+ // Load issuing PC (the return address for this stub).
+ const int frame_size_in_bytes = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size;
+ __ z_lg(Z_EXC_PC, Address(Z_SP, frame_size_in_bytes + _z_abi16(return_pc)));
+ DEBUG_ONLY(__ z_lay(reg_fp, Address(Z_SP, frame_size_in_bytes));)
+
+ // Make sure that the vm_results are cleared (may be unnecessary).
+ __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(oop));
+ __ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(Metadata*));
+ break;
+ }
+ case handle_exception_nofpu_id:
+ case handle_exception_id:
+ // At this point all registers MAY be live.
+ DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
+ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Z_EXC_PC);
+ break;
+ case handle_exception_from_callee_id: {
+ // At this point all registers except Z_EXC_OOP and Z_EXC_PC are dead.
+ DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);)
+ __ save_return_pc(Z_EXC_PC);
+ const int frame_size_in_bytes = __ push_frame_abi160(0);
+ oop_map = new OopMap(frame_size_in_bytes / VMRegImpl::stack_slot_size, 0);
+ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+ break;
+ }
+ default: ShouldNotReachHere();
+ }
+
+ // Verify that only Z_EXC_OOP, and Z_EXC_PC are valid at this time.
+ __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC, reg_fp);
+ // Verify that Z_EXC_OOP, contains a valid exception.
+ __ verify_not_null_oop(Z_EXC_OOP);
+
+ // Check that fields in JavaThread for exception oop and issuing pc
+ // are empty before writing to them.
+ __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0);
+ __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0);
+
+ // Save exception oop and issuing pc into JavaThread.
+ // (Exception handler will load it from here.)
+ __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
+ __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+#ifdef ASSERT
+ { NearLabel ok;
+ __ z_cg(Z_EXC_PC, Address(reg_fp, _z_abi16(return_pc)));
+ __ branch_optimized(Assembler::bcondEqual, ok);
+ __ stop("use throwing pc as return address (has bci & oop map)");
+ __ bind(ok);
+ }
+#endif
+
+ // Compute the exception handler.
+ // The exception oop and the throwing pc are read from the fields in JavaThread.
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ // Z_RET(Z_R2): handler address
+ // will be the deopt blob if nmethod was deoptimized while we looked up
+ // handler regardless of whether handler existed in the nmethod.
+
+ // Only Z_R2, is valid at this time, all other registers have been destroyed by the runtime call.
+ __ invalidate_registers(Z_R2);
+
+ switch(id) {
+ case forward_exception_id:
+ case handle_exception_nofpu_id:
+ case handle_exception_id:
+ // Restore the registers that were saved at the beginning.
+ __ z_lgr(Z_R1_scratch, Z_R2); // Restoring live registers kills Z_R2.
+ restore_live_registers(sasm, id != handle_exception_nofpu_id); // Pops as well the frame.
+ __ z_br(Z_R1_scratch);
+ break;
+ case handle_exception_from_callee_id: {
+ __ pop_frame();
+ __ z_br(Z_R2); // Jump to exception handler.
+ }
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ return oop_maps;
+}
+
+
+#undef __
+
+const char *Runtime1::pd_name_for_address(address entry) {
+ return "<unknown function>";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c1_globals_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C1_GLOBALS_S390_HPP
+#define CPU_S390_VM_C1_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+// Flags sorted according to sparc.
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation, true);
+define_pd_global(bool, CICompileOSR, true);
+define_pd_global(bool, InlineIntrinsics, true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps, false);
+define_pd_global(bool, UseOnStackReplacement, true);
+define_pd_global(bool, TieredCompilation, false);
+define_pd_global(intx, CompileThreshold, 1000);
+
+define_pd_global(intx, OnStackReplacePercentage, 1400);
+define_pd_global(bool, UseTLAB, true);
+define_pd_global(bool, ProfileInterpreter, false);
+define_pd_global(intx, FreqInlineSize, 325);
+define_pd_global(bool, ResizeTLAB, true);
+define_pd_global(intx, ReservedCodeCacheSize, 32*M);
+define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M);
+define_pd_global(uintx, ProfiledCodeHeapSize, 14*M);
+define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M);
+define_pd_global(uintx, CodeCacheExpansionSize, 32*K);
+define_pd_global(uintx, CodeCacheMinBlockLength, 1);
+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
+define_pd_global(size_t, MetaspaceSize, 12*M);
+define_pd_global(bool, NeverActAsServerClassMachine, true);
+define_pd_global(size_t, NewSizeThreadIncrease, 16*K);
+define_pd_global(uint64_t, MaxRAM, 1ULL*G);
+define_pd_global(uintx, InitialCodeCacheSize, 160*K);
+#endif // !TIERED
+
+define_pd_global(bool, UseTypeProfile, false);
+define_pd_global(bool, RoundFPResults, false);
+
+define_pd_global(bool, LIRFillDelaySlots, false);
+define_pd_global(bool, OptimizeSinglePrecision, false);
+define_pd_global(bool, CSEArrayLength, true);
+define_pd_global(bool, TwoOperandLIRForm, true);
+
+#endif // CPU_S390_VM_C1_GLOBALS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_C2_GLOBALS_S390_HPP
+#define CPU_S390_VM_C2_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).
+// Sorted according to sparc.
+
+define_pd_global(bool, BackgroundCompilation, true);
+define_pd_global(bool, CICompileOSR, true);
+define_pd_global(bool, InlineIntrinsics, true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps, true);
+define_pd_global(bool, UseOnStackReplacement, true);
+define_pd_global(bool, ProfileInterpreter, true);
+define_pd_global(bool, TieredCompilation, trueInTiered);
+define_pd_global(intx, CompileThreshold, 10000);
+
+define_pd_global(intx, OnStackReplacePercentage, 140);
+define_pd_global(intx, ConditionalMoveLimit, 4);
+define_pd_global(intx, FLOATPRESSURE, 15);
+define_pd_global(intx, FreqInlineSize, 175);
+// 10 prevents spill-split-recycle sanity check in JVM2008.xml.transform.
+define_pd_global(intx, INTPRESSURE, 10); // Medium size register set, 6 special purpose regs, 3 SOE regs.
+define_pd_global(intx, InteriorEntryAlignment, 2);
+define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, RegisterCostAreaRatio, 12000);
+define_pd_global(bool, UseTLAB, true);
+define_pd_global(bool, ResizeTLAB, true);
+define_pd_global(intx, LoopUnrollLimit, 60);
+define_pd_global(intx, LoopPercentProfileLimit, 10);
+define_pd_global(intx, PostLoopMultiversioning, false);
+define_pd_global(intx, MinJumpTableSize, 18);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole, false);
+define_pd_global(bool, UseCISCSpill, true);
+define_pd_global(bool, OptoBundling, false);
+define_pd_global(bool, OptoScheduling, false);
+define_pd_global(bool, OptoRegScheduling, false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
+// On s390x, we can clear the array with a single instruction,
+// so don't idealize it.
+define_pd_global(bool, IdealizeClearArrayNode, false);
+
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(uintx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(uintx, ReservedCodeCacheSize, 48*M);
+define_pd_global(uintx, NonProfiledCodeHeapSize, 21*M);
+define_pd_global(uintx, ProfiledCodeHeapSize, 22*M);
+define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M);
+define_pd_global(uintx, CodeCacheExpansionSize, 64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t, MaxRAM, 128ULL*G);
+define_pd_global(uintx, CodeCacheMinBlockLength, 4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
+
+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on z/Architecture.
+
+// Heap related flags
+define_pd_global(size_t, MetaspaceSize, ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_S390_VM_C2_GLOBALS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/c2_init_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// Processor dependent initialization for z/Architecture.
+
+void Compile::pd_compiler2_init() {
+ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "");
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_CODEBUFFER_S390_HPP
+#define CPU_S390_VM_CODEBUFFER_S390_HPP
+
+ private:
+ void pd_initialize() {}
+
+ public:
+ void flush_bundle(bool start_new_bundle) {}
+
+ void getCpuData(const CodeBuffer * const cb) {}
+
+#endif // CPU_S390_VM_CODEBUFFER_S390_HPP
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/compiledIC_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+#ifdef COMPILER2
+#include "opto/matcher.hpp"
+#endif
+
+// ----------------------------------------------------------------------------
+
+#undef __
+#define __ _masm.
+
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) {
+#ifdef COMPILER2
+ // Stub is fixed up when the corresponding call is converted from calling
+ // compiled code to calling interpreted code.
+ if (mark == NULL) {
+ // Get the mark within main instrs section which is set to the address of the call.
+ mark = cbuf.insts_mark();
+ }
+ assert(mark != NULL, "mark must not be NULL");
+
+ // Note that the code buffer's insts_mark is always relative to insts.
+ // That's why we must use the macroassembler to generate a stub.
+ MacroAssembler _masm(&cbuf);
+
+ address stub = __ start_a_stub(Compile::MAX_stubs_size);
+ if (stub == NULL) {
+ return NULL; // CodeBuffer::expand failed.
+ }
+ __ relocate(static_stub_Relocation::spec(mark));
+
+ AddressLiteral meta = __ allocate_metadata_address(NULL);
+ bool success = __ load_const_from_toc(as_Register(Matcher::inline_cache_reg_encode()), meta);
+
+ __ set_inst_mark();
+ AddressLiteral a((address)-1);
+ success = success && __ load_const_from_toc(Z_R1, a);
+ if (!success) {
+ return NULL; // CodeCache is full.
+ }
+
+ __ z_br(Z_R1);
+ __ end_a_stub(); // Update current stubs pointer and restore insts_end.
+ return stub;
+#else
+ ShouldNotReachHere();
+#endif
+}
+
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+ return 2 * MacroAssembler::load_const_from_toc_size() +
+ 2; // branch
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+ return 5; // 4 in emit_java_to_interp + 1 in Java_Static_Call
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+ address stub = find_stub();
+ guarantee(stub != NULL, "stub not found");
+
+ if (TraceICs) {
+ ResourceMark rm;
+ tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+ p2i(instruction_address()),
+ callee->name_and_sig_as_C_string());
+ }
+
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+ // A generated lambda form might be deleted from the Lambdaform
+ // cache in MethodTypeForm. If a jit compiled lambdaform method
+ // becomes not entrant and the cache access returns null, the new
+ // resolve will lead to a new generated LambdaForm.
+
+ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee() || callee->is_compiled_lambda_form(),
+ "a) MT-unsafe modification of inline cache");
+ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+ "b) MT-unsafe modification of inline cache");
+
+ // Update stub.
+ method_holder->set_data((intptr_t)callee());
+ jump->set_jump_destination(entry);
+
+ // Update jump to call.
+ set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+ // Reset stub.
+ address stub = static_stub->addr();
+ assert(stub != NULL, "stub not found");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+ method_holder->set_data(0);
+ jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+ // Verify call.
+ NativeCall::verify();
+ if (os::is_MP()) {
+ verify_alignment();
+ }
+
+ // Verify stub.
+ address stub = find_stub();
+ assert(stub != NULL, "no stub found for static call");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub());
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+ // Verify state.
+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/copy_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1134 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by LS
+
+#ifndef CPU_S390_VM_COPY_S390_HPP
+#define CPU_S390_VM_COPY_S390_HPP
+
+// Inline functions for memory copy and fill.
+
+// HeapWordSize (the size of class HeapWord) is 8 Bytes (the size of a
+// pointer variable), since we always run the _LP64 model. As a consequence,
+// HeapWord* memory ranges are always assumed to be doubleword-aligned,
+// having a size which is an integer multiple of HeapWordSize.
+//
+// Dealing only with doubleword-aligned doubleword units has important
+// positive performance and data access consequences. Many of the move
+// instructions perform particularly well under these circumstances.
+// Data access is "doubleword-concurrent", except for MVC and XC.
+// Furthermore, data access can be forced to be sequential (MVCL and MVCLE)
+// by use of the special padding byte 0xb1, where required. For copying,
+// we use padding byte 0xb0 to prevent the D-cache from being polluted.
+//
+// On z/Architecture, gcc optimizes memcpy into a series of MVC instructions.
+// This is optimal, even if just one HeapWord is copied. However, MVC
+// copying is not atomic, i.e. not "doubleword concurrent" by definition.
+//
+// If the -mmvcle compiler option is specified, memcpy translates into
+// code such that the entire memory range is copied or preset with just
+// one MVCLE instruction.
+//
+// *to = *from is transformed into a MVC instruction already with -O1.
+// Thus, for atomic copy operations, (inline) assembler code is required
+// to guarantee atomic data accesses.
+//
+// For large (len >= MVCLEThreshold) chunks of memory, we exploit
+// special H/W support of z/Architecture:
+// 1) copy short piece of memory to page-align address(es)
+// 2) copy largest part (all contained full pages) of memory using mvcle instruction.
+// z/Architecture processors have special H/W support for page-aligned storage
+// where len is an int multiple of page size. In that case, up to 4 cache lines are
+// processed in parallel and L1 cache is not polluted.
+// 3) copy the remaining piece of memory.
+//
+// Measurement classifications:
+// very rare - <= 10.000 calls AND <= 1.000 usec elapsed
+// rare - <= 100.000 calls AND <= 10.000 usec elapsed
+// some - <= 1.000.000 calls AND <= 100.000 usec elapsed
+// freq - <= 10.000.000 calls AND <= 1.000.000 usec elapsed
+// very freq - > 10.000.000 calls OR > 1.000.000 usec elapsed
+
+#undef USE_INLINE_ASM
+
+static void copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+ if (from > to) {
+ while (count-- > 0) {
+ // Copy forwards
+ *to++ = *from++;
+ }
+ } else {
+ from += count - 1;
+ to += count - 1;
+ while (count-- > 0) {
+ // Copy backwards
+ *to-- = *from--;
+ }
+ }
+}
+
+static void copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+ if (from > to) {
+ while (count-- > 0) {
+ // Copy forwards
+ *to++ = *from++;
+ }
+ } else {
+ from += count - 1;
+ to += count - 1;
+ while (count-- > 0) {
+ // Copy backwards
+ *to-- = *from--;
+ }
+ }
+}
+
+static bool has_destructive_overlap(char* from, char* to, size_t byte_count) {
+ return (from < to) && ((to-from) < (ptrdiff_t)byte_count);
+}
+
+#ifdef USE_INLINE_ASM
+
+ //--------------------------------------------------------------
+ // Atomic copying. Atomicity is given by the minimum of source
+ // and target alignment. Refer to mail comm with Tim Slegel/IBM.
+ // Only usable for disjoint source and target.
+ //--------------------------------------------------------------
+ #define MOVE8_ATOMIC_4(_to,_from) { \
+ unsigned long toaddr; \
+ unsigned long fromaddr; \
+ asm( \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LG %[fromaddr],%[from] \n\t" /* address of from area */ \
+ "MVC 0(32,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [toaddr] "=a" (toaddr) \
+ , [fromaddr] "=a" (fromaddr) \
+ : \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+ #define MOVE8_ATOMIC_3(_to,_from) { \
+ unsigned long toaddr; \
+ unsigned long fromaddr; \
+ asm( \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LG %[fromaddr],%[from] \n\t" /* address of from area */ \
+ "MVC 0(24,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [toaddr] "=a" (toaddr) \
+ , [fromaddr] "=a" (fromaddr) \
+ : \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+ #define MOVE8_ATOMIC_2(_to,_from) { \
+ unsigned long toaddr; \
+ unsigned long fromaddr; \
+ asm( \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LG %[fromaddr],%[from] \n\t" /* address of from area */ \
+ "MVC 0(16,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [toaddr] "=a" (toaddr) \
+ , [fromaddr] "=a" (fromaddr) \
+ : \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+ #define MOVE8_ATOMIC_1(_to,_from) { \
+ unsigned long toaddr; \
+ unsigned long fromaddr; \
+ asm( \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LG %[fromaddr],%[from] \n\t" /* address of from area */ \
+ "MVC 0(8,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [toaddr] "=a" (toaddr) \
+ , [fromaddr] "=a" (fromaddr) \
+ : \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+
+ //--------------------------------------------------------------
+ // Atomic copying of 8-byte entities.
+ // Conjoint/disjoint property does not matter. Entities are first
+ // loaded and then stored.
+ // _to and _from must be 8-byte aligned.
+ //--------------------------------------------------------------
+ #define COPY8_ATOMIC_4(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 3,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LMG 0,3,0(3) \n\t" /* load data */ \
+ "STMG 0,3,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
+ ); \
+ }
+ #define COPY8_ATOMIC_3(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 2,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LMG 0,2,0(2) \n\t" /* load data */ \
+ "STMG 0,2,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1", "r2" /* clobbered */ \
+ ); \
+ }
+ #define COPY8_ATOMIC_2(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 1,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LMG 0,1,0(1) \n\t" /* load data */ \
+ "STMG 0,1,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1" /* clobbered */ \
+ ); \
+ }
+ #define COPY8_ATOMIC_1(_to,_from) { \
+ unsigned long addr; \
+ asm( \
+ "LG %[addr],%[from] \n\t" /* address of from area */ \
+ "LG 0,0(0,%[addr]) \n\t" /* load data */ \
+ "LG %[addr],%[to] \n\t" /* address of to area */ \
+ "STG 0,0(0,%[addr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [addr] "=a" (addr) /* inputs */ \
+ : \
+ : "cc", "r0" /* clobbered */ \
+ ); \
+ }
+
+ //--------------------------------------------------------------
+ // Atomic copying of 4-byte entities.
+ // Exactly 4 (four) entities are copied.
+ // Conjoint/disjoint property does not matter. Entities are first
+ // loaded and then stored.
+ // _to and _from must be 4-byte aligned.
+ //--------------------------------------------------------------
+ #define COPY4_ATOMIC_4(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 3,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LM 0,3,0(3) \n\t" /* load data */ \
+ "STM 0,3,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
+ ); \
+ }
+ #define COPY4_ATOMIC_3(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 2,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LM 0,2,0(2) \n\t" /* load data */ \
+ "STM 0,2,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1", "r2" /* clobbered */ \
+ ); \
+ }
+ #define COPY4_ATOMIC_2(_to,_from) { \
+ unsigned long toaddr; \
+ asm( \
+ "LG 1,%[from] \n\t" /* address of from area */ \
+ "LG %[toaddr],%[to] \n\t" /* address of to area */ \
+ "LM 0,1,0(1) \n\t" /* load data */ \
+ "STM 0,1,0(%[toaddr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [toaddr] "=a" (toaddr) /* inputs */ \
+ : \
+ : "cc", "r0", "r1" /* clobbered */ \
+ ); \
+ }
+ #define COPY4_ATOMIC_1(_to,_from) { \
+ unsigned long addr; \
+ asm( \
+ "LG %[addr],%[from] \n\t" /* address of from area */ \
+ "L 0,0(0,%[addr]) \n\t" /* load data */ \
+ "LG %[addr],%[to] \n\t" /* address of to area */ \
+ "ST 0,0(0,%[addr]) \n\t" /* store data */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ , [addr] "=a" (addr) /* inputs */ \
+ : \
+ : "cc", "r0" /* clobbered */ \
+ ); \
+ }
+
+#if 0 // Waiting for gcc to support EXRL.
+ #define MVC_MEMCOPY(_to,_from,_len) \
+ if (VM_Version::has_ExecuteExtensions()) { \
+ asm("\t" \
+ " LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \
+ " EXRL 1,1f \n\t" /* execute MVC instr */ \
+ " BRC 15,2f \n\t" /* skip template */ \
+ "1: MVC 0(%[len],%[to]),0(%[from]) \n\t" \
+ "2: BCR 0,0 \n\t" \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc", "r1" /* clobbered */ \
+ ); \
+ } else { \
+ asm("\t" \
+ " LARL 2,3f \n\t" \
+ " LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \
+ " EX 1,0(2) \n\t" /* execute MVC instr */ \
+ " BRC 15,4f \n\t" /* skip template */ \
+ "3: MVC 0(%[len],%[to]),0(%[from]) \n\t" \
+ "4: BCR 0,0 \n\t" \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc", "r1", "r2" /* clobbered */ \
+ ); \
+ }
+#else
+ #define MVC_MEMCOPY(_to,_from,_len) \
+ { unsigned long toaddr; unsigned long tolen; \
+ unsigned long fromaddr; unsigned long target; \
+ asm("\t" \
+ " LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \
+ " BRC 8,2f \n\t" /* do nothing for l=0*/ \
+ " AGHI %[tolen],-1 \n\t" \
+ " LG %[toaddr],%[to] \n\t" \
+ " LG %[fromaddr],%[from] \n\t" \
+ " LARL %[target],1f \n\t" /* addr of MVC instr */ \
+ " EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \
+ " BRC 15,2f \n\t" /* skip template */ \
+ "1: MVC 0(1,%[toaddr]),0(%[fromaddr]) \n\t" \
+ "2: BCR 0,0 \n\t" /* nop a branch target*/\
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [tolen] "=a" (tolen) \
+ , [toaddr] "=a" (toaddr) \
+ , [fromaddr] "=a" (fromaddr) \
+ , [target] "=a" (target) \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+#endif
+
+ #if 0 // code snippet to be used for debugging
+ /* ASSERT code BEGIN */ \
+ " LARL %[len],5f \n\t" \
+ " LARL %[mta],4f \n\t" \
+ " SLGR %[len],%[mta] \n\t" \
+ " CGHI %[len],16 \n\t" \
+ " BRC 7,9f \n\t" /* block size != 16 */ \
+ \
+ " LARL %[len],1f \n\t" \
+ " SLGR %[len],%[mta] \n\t" \
+ " CGHI %[len],256 \n\t" \
+ " BRC 7,9f \n\t" /* list len != 256 */ \
+ \
+ " LGR 0,0 \n\t" /* artificial SIGILL */ \
+ "9: BRC 7,-2 \n\t" \
+ " LARL %[mta],1f \n\t" /* restore MVC table begin */ \
+ /* ASSERT code END */
+ #endif
+
+ // Optimized copying for data less than 4k
+ // - no destructive overlap
+ // - 0 <= _n_bytes <= 4096
+ // This macro needs to be gcc-compiled with -march=z990. Otherwise, the
+ // LAY instruction is not available.
+ #define MVC_MULTI(_to,_from,_n_bytes) \
+ { unsigned long toaddr; \
+ unsigned long fromaddr; \
+ unsigned long movetable; \
+ unsigned long len; \
+ asm("\t" \
+ " LTGFR %[len],%[nby] \n\t" \
+ " LG %[ta],%[to] \n\t" /* address of to area */ \
+ " BRC 8,1f \n\t" /* nothing to copy */ \
+ \
+ " NILL %[nby],255 \n\t" /* # bytes mod 256 */ \
+ " LG %[fa],%[from] \n\t" /* address of from area */ \
+ " BRC 8,3f \n\t" /* no rest, skip copying */ \
+ \
+ " LARL %[mta],2f \n\t" /* MVC template addr */ \
+ " AHI %[nby],-1 \n\t" /* adjust for EX MVC */ \
+ \
+ " EX %[nby],0(%[mta]) \n\t" /* only rightmost */ \
+ /* 8 bits of nby used */ \
+ /* Since nby is <= 4096 on entry to this code, we do need */ \
+ /* no zero extension before using it in addr calc. */ \
+ " LA %[fa],1(%[nby],%[fa]) \n\t"/* adjust from addr */ \
+ " LA %[ta],1(%[nby],%[ta]) \n\t"/* adjust to addr */ \
+ \
+ "3: SRAG %[nby],%[len],8 \n\t" /* # cache lines */ \
+ " LARL %[mta],1f \n\t" /* MVC table begin */ \
+ " BRC 8,1f \n\t" /* nothing to copy */ \
+ \
+ /* Insert ASSERT code here if required. */ \
+ \
+ \
+ " LNGFR %[nby],%[nby] \n\t" /* negative offset into */ \
+ " SLLG %[nby],%[nby],4 \n\t" /* MVC table 16-byte blocks */ \
+ " BC 15,0(%[nby],%[mta]) \n\t" /* branch to block #ncl */ \
+ \
+ "2: MVC 0(1,%[ta]),0(%[fa]) \n\t" /* MVC template */ \
+ \
+ "4: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 4096 == l */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ "5: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3840 <= l < 4096 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3548 <= l < 3328 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3328 <= l < 3328 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3072 <= l < 3328 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2816 <= l < 3072 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2560 <= l < 2816 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2304 <= l < 2560 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2048 <= l < 2304 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1792 <= l < 2048 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1536 <= l < 1792 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1280 <= l < 1536 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1024 <= l < 1280 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 768 <= l < 1024 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 512 <= l < 768 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 256 <= l < 512 */ \
+ " LAY %[ta],256(0,%[ta]) \n\t" \
+ " LA %[fa],256(0,%[fa]) \n\t" \
+ "1: BCR 0,0 \n\t" /* nop as branch target */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) \
+ , [ta] "=a" (toaddr) \
+ , [fa] "=a" (fromaddr) \
+ , [mta] "=a" (movetable) \
+ , [nby] "+a" (_n_bytes) \
+ , [len] "=a" (len) \
+ : \
+ : "cc" /* clobbered */ \
+ ); \
+ }
+
+ #define MVCLE_MEMCOPY(_to,_from,_len) \
+ asm( \
+ " LG 0,%[to] \n\t" /* address of to area */ \
+ " LG 2,%[from] \n\t" /* address of from area */ \
+ " LGR 1,%[len] \n\t" /* len of to area */ \
+ " LGR 3,%[len] \n\t" /* len of from area */ \
+ "1: MVCLE 0,2,176 \n\t" /* copy storage, bypass cache (0xb0) */ \
+ " BRC 1,1b \n\t" /* retry if interrupted */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [from] "+Q" (_from) /* outputs */ \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
+ );
+
+ #define MVCLE_MEMINIT(_to,_val,_len) \
+ asm( \
+ " LG 0,%[to] \n\t" /* address of to area */ \
+ " LGR 1,%[len] \n\t" /* len of to area */ \
+ " XGR 3,3 \n\t" /* from area len = 0 */ \
+ "1: MVCLE 0,2,0(%[val]) \n\t" /* init storage */ \
+ " BRC 1,1b \n\t" /* retry if interrupted */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ : [len] "r" (_len) /* inputs */ \
+ , [val] "r" (_val) /* inputs */ \
+ : "cc", "r0", "r1", "r3" /* clobbered */ \
+ );
+ #define MVCLE_MEMZERO(_to,_len) \
+ asm( \
+ " LG 0,%[to] \n\t" /* address of to area */ \
+ " LGR 1,%[len] \n\t" /* len of to area */ \
+ " XGR 3,3 \n\t" /* from area len = 0 */ \
+ "1: MVCLE 0,2,0 \n\t" /* clear storage */ \
+ " BRC 1,1b \n\t" /* retry if interrupted */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc", "r0", "r1", "r3" /* clobbered */ \
+ );
+
+ // Clear a stretch of memory, 0 <= _len <= 256.
+ // There is no alignment prereq.
+ // There is no test for len out of range specified above.
+ #define XC_MEMZERO_256(_to,_len) \
+{ unsigned long toaddr; unsigned long tolen; \
+ unsigned long target; \
+ asm("\t" \
+ " LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \
+ " BRC 8,2f \n\t" /* do nothing for l=0*/ \
+ " AGHI %[tolen],-1 \n\t" /* adjust for EX XC */ \
+ " LARL %[target],1f \n\t" /* addr of XC instr */ \
+ " LG %[toaddr],%[to] \n\t" /* addr of data area */ \
+ " EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \
+ " BRC 15,2f \n\t" /* skip template */ \
+ "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
+ "2: BCR 0,0 \n\t" /* nop a branch target*/\
+ : [to] "+Q" (_to) /* outputs */ \
+ , [tolen] "=a" (tolen) \
+ , [toaddr] "=a" (toaddr) \
+ , [target] "=a" (target) \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc" /* clobbered */ \
+ ); \
+}
+
+ // Clear a stretch of memory, 256 < _len.
+ // XC_MEMZERO_256 may be used to clear shorter areas.
+ //
+ // The code
+ // - first zeroes a few bytes to align on a HeapWord.
+ // This step is currently inactive because all calls seem
+ // to have their data aligned on HeapWord boundaries.
+ // - then zeroes a few HeapWords to align on a cache line.
+ // - then zeroes entire cache lines in a loop.
+ // - then zeroes the remaining (partial) cache line.
+#if 1
+ #define XC_MEMZERO_ANY(_to,_len) \
+{ unsigned long toaddr; unsigned long tolen; \
+ unsigned long len8; unsigned long len256; \
+ unsigned long target; unsigned long lenx; \
+ asm("\t" \
+ " LTGR %[tolen],%[len] \n\t" /* */ \
+ " BRC 8,2f \n\t" /* do nothing for l=0*/ \
+ " LG %[toaddr],%[to] \n\t" /* addr of data area */ \
+ " LARL %[target],1f \n\t" /* addr of XC instr */ \
+ " " \
+ " LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\
+ " NILL %[len256],0xff \n\t" \
+ " BRC 8,4f \n\t" /* already aligned */ \
+ " NILH %[len256],0x00 \n\t" /* zero extend */ \
+ " LLGFR %[len256],%[len256] \n\t" \
+ " LAY %[lenx],-1(,%[len256]) \n\t" \
+ " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
+ " LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \
+ " SGR %[tolen],%[len256] \n\t" /* adjust len */ \
+ " " \
+ "4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \
+ " BRC 8,6f \n\t" /* no full cache lines */ \
+ "5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \
+ " LA %[toaddr],256(,%[toaddr]) \n\t" \
+ " BRCTG %[lenx],5b \n\t" /* iterate */ \
+ " " \
+ "6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \
+ " BRC 8,2f \n\t" /* done if none */ \
+ " LAY %[lenx],-1(,%[tolen]) \n\t" \
+ " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
+ " BRC 15,2f \n\t" /* skip template */ \
+ " " \
+ "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
+ "2: BCR 0,0 \n\t" /* nop a branch target */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [lenx] "=a" (lenx) \
+ , [len256] "=a" (len256) \
+ , [tolen] "=a" (tolen) \
+ , [toaddr] "=a" (toaddr) \
+ , [target] "=a" (target) \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc" /* clobbered */ \
+ ); \
+}
+#else
+ #define XC_MEMZERO_ANY(_to,_len) \
+{ unsigned long toaddr; unsigned long tolen; \
+ unsigned long len8; unsigned long len256; \
+ unsigned long target; unsigned long lenx; \
+ asm("\t" \
+ " LTGR %[tolen],%[len] \n\t" /* */ \
+ " BRC 8,2f \n\t" /* do nothing for l=0*/ \
+ " LG %[toaddr],%[to] \n\t" /* addr of data area */ \
+ " LARL %[target],1f \n\t" /* addr of XC instr */ \
+ " " \
+ " LCGR %[len8],%[toaddr] \n\t" /* HeapWord alignment */ \
+ " NILL %[len8],0x07 \n\t" \
+ " BRC 8,3f \n\t" /* already aligned */ \
+ " NILH %[len8],0x00 \n\t" /* zero extend */ \
+ " LLGFR %[len8],%[len8] \n\t" \
+ " LAY %[lenx],-1(,%[len8]) \n\t" \
+ " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
+ " LA %[toaddr],0(%[len8],%[toaddr]) \n\t" \
+ " SGR %[tolen],%[len8] \n\t" /* adjust len */ \
+ " " \
+ "3: LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\
+ " NILL %[len256],0xff \n\t" \
+ " BRC 8,4f \n\t" /* already aligned */ \
+ " NILH %[len256],0x00 \n\t" /* zero extend */ \
+ " LLGFR %[len256],%[len256] \n\t" \
+ " LAY %[lenx],-1(,%[len256]) \n\t" \
+ " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
+ " LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \
+ " SGR %[tolen],%[len256] \n\t" /* adjust len */ \
+ " " \
+ "4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \
+ " BRC 8,6f \n\t" /* no full cache lines */ \
+ "5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \
+ " LA %[toaddr],256(,%[toaddr]) \n\t" \
+ " BRCTG %[lenx],5b \n\t" /* iterate */ \
+ " " \
+ "6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \
+ " BRC 8,2f \n\t" /* done if none */ \
+ " LAY %[lenx],-1(,%[tolen]) \n\t" \
+ " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
+ " BRC 15,2f \n\t" /* skip template */ \
+ " " \
+ "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
+ "2: BCR 0,0 \n\t" /* nop a branch target */ \
+ : [to] "+Q" (_to) /* outputs */ \
+ , [lenx] "=a" (lenx) \
+ , [len8] "=a" (len8) \
+ , [len256] "=a" (len256) \
+ , [tolen] "=a" (tolen) \
+ , [toaddr] "=a" (toaddr) \
+ , [target] "=a" (target) \
+ : [len] "r" (_len) /* inputs */ \
+ : "cc" /* clobbered */ \
+ ); \
+}
+#endif
+#endif // USE_INLINE_ASM
+
+//*************************************//
+// D I S J O I N T C O P Y I N G //
+//*************************************//
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+ // JVM2008: very frequent, some tests frequent.
+
+ // Copy HeapWord (=DW) aligned storage. Use MVCLE in inline-asm code.
+ // MVCLE guarantees DW concurrent (i.e. atomic) accesses if both the addresses of the operands
+ // are DW aligned and the length is an integer multiple of a DW. Should always be true here.
+ //
+ // No special exploit needed. H/W discovers suitable situations itself.
+ //
+ // For large chunks of memory, exploit special H/W support of z/Architecture:
+ // 1) copy short piece of memory to page-align address(es)
+ // 2) copy largest part (all contained full pages) of memory using mvcle instruction.
+ // z/Architecture processors have special H/W support for page-aligned storage
+ // where len is an int multiple of page size. In that case, up to 4 cache lines are
+ // processed in parallel and L1 cache is not polluted.
+ // 3) copy the remaining piece of memory.
+ //
+#ifdef USE_INLINE_ASM
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count*HeapWordSize;
+
+ // Optimized copying for data less than 4k
+ switch (count) {
+ case 0: return;
+ case 1: MOVE8_ATOMIC_1(to,from)
+ return;
+ case 2: MOVE8_ATOMIC_2(to,from)
+ return;
+// case 3: MOVE8_ATOMIC_3(to,from)
+// return;
+// case 4: MOVE8_ATOMIC_4(to,from)
+// return;
+ default:
+ if (len_bytes <= 4096) {
+ MVC_MULTI(to,from,len_bytes)
+ return;
+ }
+ // else
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+ return;
+ }
+#else
+ // Fallback code.
+ switch (count) {
+ case 0:
+ return;
+
+ case 1:
+ *to = *from;
+ return;
+
+ case 2:
+ *to++ = *from++;
+ *to = *from;
+ return;
+
+ case 3:
+ *to++ = *from++;
+ *to++ = *from++;
+ *to = *from;
+ return;
+
+ case 4:
+ *to++ = *from++;
+ *to++ = *from++;
+ *to++ = *from++;
+ *to = *from;
+ return;
+
+ default:
+ while (count-- > 0)
+ *(to++) = *(from++);
+ return;
+ }
+#endif
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+ // JVM2008: < 4k calls.
+ assert(((((size_t)from) & 0x07L) | (((size_t)to) & 0x07L)) == 0, "No atomic copy w/o aligned data");
+ pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+ // JVM2008: very rare.
+ pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
+}
+
+
+//*************************************//
+// C O N J O I N T C O P Y I N G //
+//*************************************//
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+ // JVM2008: between some and lower end of frequent.
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+ switch (count_in) {
+ case 4: COPY8_ATOMIC_4(to,from)
+ return;
+ case 3: COPY8_ATOMIC_3(to,from)
+ return;
+ case 2: COPY8_ATOMIC_2(to,from)
+ return;
+ case 1: COPY8_ATOMIC_1(to,from)
+ return;
+ case 0: return;
+ default:
+ from += count_in;
+ to += count_in;
+ while (count_in-- > 0)
+ *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+ return;
+ }
+ }
+ // else
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in*BytesPerLong;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+ return;
+#else
+ // Fallback code.
+ if (has_destructive_overlap((char*)from, (char*)to, count*BytesPerLong)) {
+ HeapWord t1, t2, t3;
+ switch (count) {
+ case 0:
+ return;
+
+ case 1:
+ *to = *from;
+ return;
+
+ case 2:
+ t1 = *(from+1);
+ *to = *from;
+ *(to+1) = t1;
+ return;
+
+ case 3:
+ t1 = *(from+1);
+ t2 = *(from+2);
+ *to = *from;
+ *(to+1) = t1;
+ *(to+2) = t2;
+ return;
+
+ case 4:
+ t1 = *(from+1);
+ t2 = *(from+2);
+ t3 = *(from+3);
+ *to = *from;
+ *(to+1) = t1;
+ *(to+2) = t2;
+ *(to+3) = t3;
+ return;
+
+ default:
+ from += count;
+ to += count;
+ while (count-- > 0)
+ *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+ return;
+ }
+ }
+ // else
+ // Just delegate. HeapWords are optimally aligned anyway.
+ pd_aligned_disjoint_words(from, to, count);
+#endif
+}
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+
+ // Just delegate. HeapWords are optimally aligned anyway.
+ pd_aligned_conjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in))
+ (void)memmove(to, from, count_in);
+ else {
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+ }
+#else
+ if (has_destructive_overlap((char*)from, (char*)to, count))
+ (void)memmove(to, from, count);
+ else
+ (void)memcpy(to, from, count);
+#endif
+}
+
+//**************************************************//
+// C O N J O I N T A T O M I C C O P Y I N G //
+//**************************************************//
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+ // Call arraycopy stubs to do the job.
+ pd_conjoint_bytes(from, to, count); // bytes are always accessed atomically.
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerShort)) {
+ // Use optimizations from shared code where no z-specific optimization exists.
+ copy_conjoint_jshorts_atomic(from, to, count);
+ } else {
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in*BytesPerShort;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+ }
+#else
+ // Use optimizations from shared code where no z-specific optimization exists.
+ copy_conjoint_jshorts_atomic(from, to, count);
+#endif
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerInt)) {
+ switch (count_in) {
+ case 4: COPY4_ATOMIC_4(to,from)
+ return;
+ case 3: COPY4_ATOMIC_3(to,from)
+ return;
+ case 2: COPY4_ATOMIC_2(to,from)
+ return;
+ case 1: COPY4_ATOMIC_1(to,from)
+ return;
+ case 0: return;
+ default:
+ // Use optimizations from shared code where no z-specific optimization exists.
+ copy_conjoint_jints_atomic(from, to, count_in);
+ return;
+ }
+ }
+ // else
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in*BytesPerInt;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+ // Use optimizations from shared code where no z-specific optimization exists.
+ copy_conjoint_jints_atomic(from, to, count);
+#endif
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+ switch (count_in) {
+ case 4: COPY8_ATOMIC_4(to,from) return;
+ case 3: COPY8_ATOMIC_3(to,from) return;
+ case 2: COPY8_ATOMIC_2(to,from) return;
+ case 1: COPY8_ATOMIC_1(to,from) return;
+ case 0: return;
+ default:
+ from += count_in;
+ to += count_in;
+ while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
+ return;
+ }
+ }
+ // else {
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in*BytesPerLong;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
+ if (count_in < 8) {
+ from += count_in;
+ to += count_in;
+ while (count_in-- > 0)
+ *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+ return;
+ }
+ // else {
+ from += count_in-1;
+ to += count_in-1;
+ if (count_in&0x01) {
+ *(to--) = *(from--);
+ count_in--;
+ }
+ for (; count_in>0; count_in-=2) {
+ *to = *from;
+ *(to-1) = *(from-1);
+ to -= 2;
+ from -= 2;
+ }
+ }
+ else
+ pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
+#endif
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+
+#ifdef USE_INLINE_ASM
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
+ switch (count_in) {
+ case 4: COPY8_ATOMIC_4(to,from) return;
+ case 3: COPY8_ATOMIC_3(to,from) return;
+ case 2: COPY8_ATOMIC_2(to,from) return;
+ case 1: COPY8_ATOMIC_1(to,from) return;
+ case 0: return;
+ default:
+ from += count_in;
+ to += count_in;
+ while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
+ return;
+ }
+ }
+ // else
+ jbyte* to_bytes = (jbyte*)to;
+ jbyte* from_bytes = (jbyte*)from;
+ size_t len_bytes = count_in*BytesPerOop;
+ MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
+#else
+ size_t count_in = count;
+ if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
+ from += count_in;
+ to += count_in;
+ while (count_in-- > 0) *(--to) = *(--from); // Copy backwards, areas overlap destructively.
+ return;
+ }
+ // else
+ pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
+ return;
+#endif
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_bytes_atomic(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+//**********************************************//
+// M E M O R Y I N I T I A L I S A T I O N //
+//**********************************************//
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+ // JVM2008: very rare, only in some tests.
+#ifdef USE_INLINE_ASM
+ // Initialize storage to a given value. Use memset instead of copy loop.
+ // For large chunks of memory, exploit special H/W support of z/Architecture:
+ // 1) init short piece of memory to page-align address
+ // 2) init largest part (all contained full pages) of memory using mvcle instruction.
+ // z/Architecture processors have special H/W support for page-aligned storage
+ // where len is an int multiple of page size. In that case, up to 4 cache lines are
+ // processed in parallel and L1 cache is not polluted.
+ // 3) init the remaining piece of memory.
+ // Atomicity cannot really be an issue since gcc implements the loop body with XC anyway.
+ // If atomicity is a problem, we have to prevent gcc optimization. Best workaround: inline asm.
+
+ jbyte* to_bytes = (jbyte*)to;
+ size_t len_bytes = count;
+
+ MVCLE_MEMINIT(to_bytes, value, len_bytes)
+
+#else
+ // Memset does the best job possible: loop over 256-byte MVCs, with
+ // the last MVC EXecuted. With the -mmvcle option, initialization
+ // is done using MVCLE -> slight advantage for large areas.
+ (void)memset(to, value, count);
+#endif
+}
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+ // Occurs in dbg builds only. Usually memory poisoning with BAADBABE, DEADBEEF, etc.
+ // JVM2008: < 4k calls.
+ if (value == 0) {
+ pd_zero_to_words(tohw, count);
+ return;
+ }
+ if (value == ~(juint)(0)) {
+ pd_fill_to_bytes(tohw, count*HeapWordSize, (jubyte)(~(juint)(0)));
+ return;
+ }
+ julong* to = (julong*) tohw;
+ julong v = ((julong) value << 32) | value;
+ while (count-- > 0) {
+ *to++ = v;
+ }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+ // JVM2008: very frequent, but virtually all calls are with value == 0.
+ pd_fill_to_words(tohw, count, value);
+}
+
+//**********************************//
+// M E M O R Y C L E A R I N G //
+//**********************************//
+
+// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic.
+// Distinguish between simple and large zero_to_words.
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+ pd_zero_to_bytes(tohw, count*HeapWordSize);
+}
+
+// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic.
+static void pd_zero_to_words_large(HeapWord* tohw, size_t count) {
+ // JVM2008: generally frequent, some tests show very frequent calls.
+ pd_zero_to_bytes(tohw, count*HeapWordSize);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+ // JVM2008: some calls (generally), some tests frequent
+#ifdef USE_INLINE_ASM
+ // Even zero_to_bytes() requires HeapWord-atomic, or, at least, sequential
+ // zeroing of the memory. MVCLE is not fit for that job:
+ // "As observed by other CPUs and by the channel subsystem,
+ // that portion of the first operand which is filled
+ // with the padding byte is not necessarily stored into in
+ // a left-to-right direction and may appear to be stored
+ // into more than once."
+ // Therefore, implementation was changed to use (multiple) XC instructions.
+
+ const long line_size = 256;
+ jbyte* to_bytes = (jbyte*)to;
+ size_t len_bytes = count;
+
+ if (len_bytes <= line_size) {
+ XC_MEMZERO_256(to_bytes, len_bytes);
+ } else {
+ XC_MEMZERO_ANY(to_bytes, len_bytes);
+ }
+
+#else
+ // Memset does the best job possible: loop over 256-byte MVCs, with
+ // the last MVC EXecuted. With the -mmvcle option, initialization
+ // is done using MVCLE -> slight advantage for large areas.
+ (void)memset(to, 0, count);
+#endif
+}
+
+#endif // CPU_S390_VM_COPY_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/debug_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+
+void pd_ps(frame f) {}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/depChecker_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_DEPCHECKER_S390_HPP
+#define CPU_S390_VM_DEPCHECKER_S390_HPP
+
+// Nothing to do on z/Architecture
+
+#endif // CPU_S390_VM_DEPCHECKER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/disassembler_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_DISASSEMBLER_S390_HPP
+#define CPU_S390_VM_DISASSEMBLER_S390_HPP
+
+ static int pd_instruction_alignment() {
+ return 1;
+ }
+
+ static const char* pd_cpu_opts() {
+ return "zarch";
+ }
+
+#endif // CPU_S390_VM_DISASSEMBLER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+// Major contributions by Aha, AS.
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif // ASSERT
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+ bool safe = false;
+ address cursp = (address)sp();
+ address curfp = (address)fp();
+ if ((cursp != NULL && curfp != NULL &&
+ (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) &&
+ (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) {
+ safe = true;
+ }
+ return safe;
+}
+
+bool frame::is_interpreted_frame() const {
+ return Interpreter::contains(pc());
+}
+
+// sender_sp
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+ return sender_sp();
+}
+
+frame frame::sender_for_entry_frame(RegisterMap *map) const {
+ assert(map != NULL, "map must be set");
+ // Java frame called from C. Skip all C frames and return top C
+ // frame of that chunk as the sender.
+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+
+ assert(!entry_frame_is_first(), "next Java sp must be non zero");
+ assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack");
+
+ map->clear();
+
+ assert(map->include_argument_oops(), "should be set by clear");
+
+ if (jfa->last_Java_pc() != NULL) {
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_pc());
+ return fr;
+ }
+ // Last_java_pc is not set if we come here from compiled code.
+ frame fr(jfa->last_Java_sp());
+ return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap *map) const {
+ // Pass callers sender_sp as unextended_sp.
+ return frame(sender_sp(), sender_pc(), (intptr_t*)(ijava_state()->sender_sp));
+}
+
+frame frame::sender_for_compiled_frame(RegisterMap *map) const {
+ assert(map != NULL, "map must be set");
+ // Frame owned by compiler.
+
+ address pc = *compiled_sender_pc_addr(_cb);
+ frame caller(compiled_sender_sp(_cb), pc);
+
+ // Now adjust the map.
+
+ // Get the rest.
+ if (map->update_map()) {
+ // Tell GC to use argument oopmaps for some runtime stubs that need it.
+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+ if (_cb->oop_maps() != NULL) {
+ OopMapSet::update_register_map(this, map);
+ }
+ }
+
+ return caller;
+}
+
+intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const {
+ return sender_sp();
+}
+
+address* frame::compiled_sender_pc_addr(CodeBlob* cb) const {
+ return sender_pc_addr();
+}
+
+frame frame::sender(RegisterMap* map) const {
+ // Default is we don't have to follow them. The sender_for_xxx will
+ // update it accordingly.
+ map->set_include_argument_oops(false);
+
+ if (is_entry_frame()) {
+ return sender_for_entry_frame(map);
+ }
+ if (is_interpreted_frame()) {
+ return sender_for_interpreter_frame(map);
+ }
+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+ if (_cb != NULL) {
+ return sender_for_compiled_frame(map);
+ }
+ // Must be native-compiled frame, i.e. the marshaling code for native
+ // methods that exists in the core system.
+ return frame(sender_sp(), sender_pc());
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+ if (TracePcPatching) {
+ tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "] ",
+ p2i(&((address*) _sp)[-1]), p2i(((address*) _sp)[-1]), p2i(pc));
+ }
+ own_abi()->return_pc = (uint64_t)pc;
+ _cb = CodeCache::find_blob(pc);
+ address original_pc = nmethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ assert(original_pc == _pc, "expected original to be stored before patching");
+ _deopt_state = is_deoptimized;
+ // Leave _pc as is.
+ } else {
+ _deopt_state = not_deoptimized;
+ _pc = pc;
+ }
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+ // Is there anything to do?
+ assert(is_interpreted_frame(), "Not an interpreted frame");
+ return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ Method* method = interpreter_frame_method();
+ BasicType type = method->result_type();
+
+ if (method->is_native()) {
+ address lresult = (address)&(ijava_state()->lresult);
+ address fresult = (address)&(ijava_state()->fresult);
+
+ switch (type) {
+ case T_OBJECT:
+ case T_ARRAY: {
+ *oop_result = (oop) (void*) ijava_state()->oop_tmp;
+ break;
+ }
+ // We use std/stfd to store the values.
+ case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break;
+ case T_INT : value_result->i = (jint) *(long*)lresult; break;
+ case T_CHAR : value_result->c = (jchar) *(unsigned long*)lresult; break;
+ case T_SHORT : value_result->s = (jshort) *(long*)lresult; break;
+ case T_BYTE : value_result->z = (jbyte) *(long*)lresult; break;
+ case T_LONG : value_result->j = (jlong) *(long*)lresult; break;
+ case T_FLOAT : value_result->f = (jfloat) *(float*)fresult; break;
+ case T_DOUBLE : value_result->d = (jdouble) *(double*)fresult; break;
+ case T_VOID : break; // Nothing to do.
+ default : ShouldNotReachHere();
+ }
+ } else {
+ intptr_t* tos_addr = interpreter_frame_tos_address();
+ switch (type) {
+ case T_OBJECT:
+ case T_ARRAY: {
+ oop obj = *(oop*)tos_addr;
+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+ *oop_result = obj;
+ break;
+ }
+ case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break;
+ case T_BYTE : value_result->b = (jbyte) *(jint*)tos_addr; break;
+ case T_CHAR : value_result->c = (jchar) *(jint*)tos_addr; break;
+ case T_SHORT : value_result->s = (jshort) *(jint*)tos_addr; break;
+ case T_INT : value_result->i = *(jint*)tos_addr; break;
+ case T_LONG : value_result->j = *(jlong*)tos_addr; break;
+ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break;
+ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break;
+ case T_VOID : break; // Nothing to do.
+ default : ShouldNotReachHere();
+ }
+ }
+
+ return type;
+}
+
+
+// Dump all frames starting a given C stack-pointer.
+// Use max_frames to limit the number of traced frames.
+void frame::back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc, unsigned long flags, int max_frames) {
+
+ static char buf[ 150 ];
+
+ bool print_outgoing_arguments = flags & 0x1;
+ bool print_istate_pointers = flags & 0x2;
+ int num = 0;
+
+ intptr_t* current_sp = (intptr_t*) start_sp;
+ int last_num_jargs = 0;
+ int frame_type = 0;
+ int last_frame_type = 0;
+
+ while (current_sp) {
+ intptr_t* current_fp = (intptr_t*) *current_sp;
+ address current_pc = (num == 0)
+ ? (address) top_pc
+ : (address) *((intptr_t*)(((address) current_sp) + _z_abi(return_pc)));
+
+ if ((intptr_t*) current_fp != 0 && (intptr_t*) current_fp <= current_sp) {
+ st->print_cr("ERROR: corrupt stack");
+ return;
+ }
+
+ st->print("#%-3d ", num);
+ const char* type_name = " ";
+ const char* function_name = NULL;
+
+ // Detect current frame's frame_type, default to 'C frame'.
+ frame_type = 0;
+
+ CodeBlob* blob = NULL;
+
+ if (Interpreter::contains(current_pc)) {
+ frame_type = 1;
+ } else if (StubRoutines::contains(current_pc)) {
+ if (StubRoutines::returns_to_call_stub(current_pc)) {
+ frame_type = 2;
+ } else {
+ frame_type = 4;
+ type_name = "stu";
+ StubCodeDesc* desc = StubCodeDesc::desc_for (current_pc);
+ if (desc) {
+ function_name = desc->name();
+ } else {
+ function_name = "unknown stub";
+ }
+ }
+ } else if (CodeCache::contains(current_pc)) {
+ blob = CodeCache::find_blob_unsafe(current_pc);
+ if (blob) {
+ if (blob->is_nmethod()) {
+ frame_type = 3;
+ } else if (blob->is_deoptimization_stub()) {
+ frame_type = 4;
+ type_name = "deo";
+ function_name = "deoptimization blob";
+ } else if (blob->is_uncommon_trap_stub()) {
+ frame_type = 4;
+ type_name = "uct";
+ function_name = "uncommon trap blob";
+ } else if (blob->is_exception_stub()) {
+ frame_type = 4;
+ type_name = "exc";
+ function_name = "exception blob";
+ } else if (blob->is_safepoint_stub()) {
+ frame_type = 4;
+ type_name = "saf";
+ function_name = "safepoint blob";
+ } else if (blob->is_runtime_stub()) {
+ frame_type = 4;
+ type_name = "run";
+ function_name = ((RuntimeStub *)blob)->name();
+ } else if (blob->is_method_handles_adapter_blob()) {
+ frame_type = 4;
+ type_name = "mha";
+ function_name = "method handles adapter blob";
+ } else {
+ frame_type = 4;
+ type_name = "blo";
+ function_name = "unknown code blob";
+ }
+ } else {
+ frame_type = 4;
+ type_name = "blo";
+ function_name = "unknown code blob";
+ }
+ }
+
+ st->print("sp=" PTR_FORMAT " ", p2i(current_sp));
+
+ if (frame_type == 0) {
+ current_pc = (address) *((intptr_t*)(((address) current_sp) + _z_abi(gpr14)));
+ }
+
+ st->print("pc=" PTR_FORMAT " ", p2i(current_pc));
+ st->print(" ");
+
+ switch (frame_type) {
+ case 0: // C frame:
+ {
+ st->print(" ");
+ if (current_pc == 0) {
+ st->print("? ");
+ } else {
+ // name
+ int func_offset;
+ char demangled_name[256];
+ int demangled_name_len = 256;
+ if (os::dll_address_to_function_name(current_pc, demangled_name, demangled_name_len, &func_offset)) {
+ demangled_name[demangled_name_len-1] = '\0';
+ st->print(func_offset == -1 ? "%s " : "%s+0x%x", demangled_name, func_offset);
+ } else {
+ st->print("? ");
+ }
+ }
+ }
+ break;
+
+ case 1: // interpreter frame:
+ {
+ st->print(" i ");
+
+ if (last_frame_type != 1) last_num_jargs = 8;
+
+ // name
+ Method* method = *(Method**)((address)current_fp + _z_ijava_state_neg(method));
+ if (method) {
+ if (method->is_synchronized()) st->print("synchronized ");
+ if (method->is_static()) st->print("static ");
+ if (method->is_native()) st->print("native ");
+ method->name_and_sig_as_C_string(buf, sizeof(buf));
+ st->print("%s ", buf);
+ }
+ else
+ st->print("? ");
+
+ intptr_t* tos = (intptr_t*) *(intptr_t*)((address)current_fp + _z_ijava_state_neg(esp));
+ if (print_istate_pointers) {
+ st->cr();
+ st->print(" ");
+ st->print("ts=" PTR_FORMAT " ", p2i(tos));
+ }
+
+ // Dump some Java stack slots.
+ if (print_outgoing_arguments) {
+ if (method->is_native()) {
+#ifdef ASSERT
+ intptr_t* cargs = (intptr_t*) (((address)current_sp) + _z_abi(carg_1));
+ for (int i = 0; i < last_num_jargs; i++) {
+ // Cargs is not prepushed.
+ st->cr();
+ st->print(" ");
+ st->print(PTR_FORMAT, *(cargs));
+ cargs++;
+ }
+#endif /* ASSERT */
+ }
+ else {
+ if (tos) {
+ for (int i = 0; i < last_num_jargs; i++) {
+ // tos+0 is prepushed, ignore.
+ tos++;
+ if (tos >= (intptr_t *)((address)current_fp + _z_ijava_state_neg(monitors)))
+ break;
+ st->cr();
+ st->print(" ");
+ st->print(PTR_FORMAT " %+.3e %+.3le", *(tos), *(float*)(tos), *(double*)(tos));
+ }
+ }
+ }
+ last_num_jargs = method->size_of_parameters();
+ }
+ }
+ break;
+
+ case 2: // entry frame:
+ {
+ st->print("v2i ");
+
+ // name
+ st->print("call stub");
+ }
+ break;
+
+ case 3: // compiled frame:
+ {
+ st->print(" c ");
+
+ // name
+ Method* method = ((nmethod *)blob)->method();
+ if (method) {
+ method->name_and_sig_as_C_string(buf, sizeof(buf));
+ st->print("%s ", buf);
+ }
+ else
+ st->print("? ");
+ }
+ break;
+
+ case 4: // named frames
+ {
+ st->print("%s ", type_name);
+
+ // name
+ if (function_name)
+ st->print("%s", function_name);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ st->cr();
+ st->flush();
+
+ current_sp = current_fp;
+ last_frame_type = frame_type;
+ num++;
+ // Check for maximum # of frames, and stop when reached.
+ if (max_frames > 0 && --max_frames == 0)
+ break;
+ }
+
+}
+
+// Convenience function for calls from the debugger.
+
+extern "C" void bt(intptr_t* start_sp,intptr_t* top_pc) {
+ frame::back_trace(tty,start_sp, top_pc, 0);
+}
+
+extern "C" void bt_full(intptr_t* start_sp,intptr_t* top_pc) {
+ frame::back_trace(tty,start_sp, top_pc, (unsigned long)(long)-1);
+}
+
+
+// Function for tracing a limited number of frames.
+// Use this one if you only need to see the "top of stack" frames.
+extern "C" void bt_max(intptr_t *start_sp, intptr_t *top_pc, int max_frames) {
+ frame::back_trace(tty, start_sp, top_pc, 0, max_frames);
+}
+
+#if !defined(PRODUCT)
+
+#define DESCRIBE_ADDRESS(name) \
+ values.describe(frame_no, (intptr_t*)&ijava_state()->name, #name);
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+ if (is_interpreted_frame()) {
+ // Describe z_ijava_state elements.
+ DESCRIBE_ADDRESS(method);
+ DESCRIBE_ADDRESS(locals);
+ DESCRIBE_ADDRESS(monitors);
+ DESCRIBE_ADDRESS(cpoolCache);
+ DESCRIBE_ADDRESS(bcp);
+ DESCRIBE_ADDRESS(mdx);
+ DESCRIBE_ADDRESS(esp);
+ DESCRIBE_ADDRESS(sender_sp);
+ DESCRIBE_ADDRESS(top_frame_sp);
+ DESCRIBE_ADDRESS(oop_tmp);
+ DESCRIBE_ADDRESS(lresult);
+ DESCRIBE_ADDRESS(fresult);
+ }
+}
+
+#endif // !PRODUCT
+
+intptr_t *frame::initial_deoptimization_info() {
+ // Used to reset the saved FP.
+ return fp();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by ML, AHa.
+
+#ifndef CPU_S390_VM_FRAME_S390_HPP
+#define CPU_S390_VM_FRAME_S390_HPP
+
+#include "runtime/synchronizer.hpp"
+
+ // C frame layout on ZARCH_64.
+ //
+ // In this figure the stack grows upwards, while memory grows
+ // downwards. See "Linux for zSeries: ELF Application Binary Interface Supplement",
+ // IBM Corp. (LINUX-1107-01)
+ //
+ // Square brackets denote stack regions possibly larger
+ // than a single 64 bit slot.
+ //
+ // STACK:
+ // 0 [C_FRAME] <-- SP after prolog (mod 8 = 0)
+ // [C_FRAME] <-- SP before prolog
+ // ...
+ // [C_FRAME]
+ //
+ // C_FRAME:
+ // 0 [ABI_160]
+ //
+ // ABI_160:
+ // 0 [ABI_16]
+ // 16 CARG_1: spill slot for outgoing arg 1. used by next callee.
+ // 24 CARG_2: spill slot for outgoing arg 2. used by next callee.
+ // 32 CARG_3: spill slot for outgoing arg 3. used by next callee.
+ // 40 CARG_4: spill slot for outgoing arg 4. used by next callee.
+ // 48 GPR_6: spill slot for GPR_6. used by next callee.
+ // ... ...
+ // 120 GPR_15: spill slot for GPR_15. used by next callee.
+ // 128 CFARG_1: spill slot for outgoing fp arg 1. used by next callee.
+ // 136 CFARG_2: spill slot for outgoing fp arg 2. used by next callee.
+ // 144 CFARG_3: spill slot for outgoing fp arg 3. used by next callee.
+ // 152 CFARG_4: spill slot for outgoing fp arg 4. used by next callee.
+ // 160 [REMAINING CARGS]
+ //
+ // ABI_16:
+ // 0 callers_sp
+ // 8 return_pc
+
+ public:
+
+ // C frame layout
+
+ typedef enum {
+ // stack alignment
+ alignment_in_bytes = 8,
+ // log_2(8*8 bits) = 6.
+ log_2_of_alignment_in_bits = 6
+ } frame_constants;
+
+ struct z_abi_16 {
+ uint64_t callers_sp;
+ uint64_t return_pc;
+ };
+
+ enum {
+ z_abi_16_size = sizeof(z_abi_16)
+ };
+
+ #define _z_abi16(_component) \
+ (offset_of(frame::z_abi_16, _component))
+
+ // ABI_160:
+
+ // REMARK: This structure should reflect the "minimal" ABI frame
+ // layout, but it doesn't. There is an extra field at the end of the
+ // structure that marks the area where arguments are passed, when
+ // the argument registers "overflow". Thus, sizeof(z_abi_160)
+ // doesn't yield the expected (and desired) result. Therefore, as
+ // long as we do not provide extra infrastructure, one should use
+ // either z_abi_160_size, or _z_abi(remaining_cargs) instead of
+ // sizeof(...).
+ struct z_abi_160 {
+ uint64_t callers_sp;
+ uint64_t return_pc;
+ uint64_t carg_1;
+ uint64_t carg_2;
+ uint64_t carg_3;
+ uint64_t carg_4;
+ uint64_t gpr6;
+ uint64_t gpr7;
+ uint64_t gpr8;
+ uint64_t gpr9;
+ uint64_t gpr10;
+ uint64_t gpr11;
+ uint64_t gpr12;
+ uint64_t gpr13;
+ uint64_t gpr14;
+ uint64_t gpr15;
+ uint64_t cfarg_1;
+ uint64_t cfarg_2;
+ uint64_t cfarg_3;
+ uint64_t cfarg_4;
+ uint64_t remaining_cargs;
+ };
+
+ enum {
+ z_abi_160_size = 160
+ };
+
+ #define _z_abi(_component) \
+ (offset_of(frame::z_abi_160, _component))
+
+ struct z_abi_160_spill : z_abi_160 {
+ // Additional spill slots. Use as 'offset_of(z_abi_160_spill, spill[n])'.
+ uint64_t spill[0];
+ // Aligned to frame::alignment_in_bytes (16).
+ };
+
+
+ // non-volatile GPRs:
+
+ struct z_spill_nonvolatiles {
+ uint64_t r6;
+ uint64_t r7;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ };
+
+ enum {
+ z_spill_nonvolatiles_size = sizeof(z_spill_nonvolatiles)
+ };
+
+ #define _z_spill_nonvolatiles_neg(_component) \
+ (-frame::z_spill_nonvolatiles_size + offset_of(frame::z_spill_nonvolatiles, _component))
+
+ // Frame layout for the Java template interpreter on z/Architecture.
+ //
+ // In these figures the stack grows upwards, while memory grows
+ // downwards. Square brackets denote regions possibly larger than
+ // single 64 bit slots.
+ //
+ // STACK (no JNI, no compiled code, no library calls, template interpreter is active):
+ //
+ // 0 [TOP_IJAVA_FRAME]
+ // [PARENT_IJAVA_FRAME]
+ // [PARENT_IJAVA_FRAME]
+ // ...
+ // [PARENT_IJAVA_FRAME]
+ // [ENTRY_FRAME]
+ // [C_FRAME]
+ // ...
+ // [C_FRAME]
+ //
+ // TOP_IJAVA_FRAME:
+ //
+ // 0 [TOP_IJAVA_FRAME_ABI]
+ // 16 [operand stack]
+ // [monitors] (optional)
+ // [IJAVA_STATE]
+ // note: Own locals are located in the caller frame.
+ //
+ // PARENT_IJAVA_FRAME:
+ //
+ // 0 [PARENT_IJAVA_FRAME_ABI]
+ // [callee's locals w/o arguments]
+ // [outgoing arguments]
+ // [used part of operand stack w/o arguments]
+ // [monitors] (optional)
+ // [IJAVA_STATE]
+ //
+ // ENTRY_FRAME:
+ //
+ // 0 [PARENT_IJAVA_FRAME_ABI]
+ // [callee's locals w/o arguments]
+ // [outgoing arguments]
+ // [ENTRY_FRAME_LOCALS]
+ //
+ // TOP_IJAVA_FRAME_ABI:
+ //
+ // 0 [ABI_160]
+ //
+ //
+ // PARENT_IJAVA_FRAME_ABI:
+ //
+ // 0 [ABI_16]
+ //
+ // IJAVA_STATE:
+ //
+ // 0 method
+ // 8 locals
+ // monitors : monitor block top (i.e. lowest address)
+ // cpoolCache
+ // bcp
+ // mdx
+ // esp : Points to first slot above operands.
+ // sender_sp : See comment in z_ijava_state.
+ // top_frame_sp : Own SP before modification by i2c adapter.
+ // oop_tmp
+ // lresult
+ // fresult
+ //
+ // EXAMPLE:
+ // ---------
+ //
+ // 3 monitors, 5 operand stack slots max. / 3 allocated
+ //
+ // F0 callers_sp <- Z_SP (callers_sp == Z_fp (own fp))
+ // return_pc
+ // [rest of ABI_160]
+ // /slot 4: free
+ // oper. | slot 3: free <- Z_esp points to first free slot
+ // stack | slot 2: ref val v2 caches IJAVA_STATE.esp
+ // | slot 1: unused
+ // \slot 0: long val v1
+ // /slot 5 <- IJAVA_STATE.monitors = monitor block top
+ // | slot 4
+ // monitors| slot 3
+ // | slot 2
+ // | slot 1
+ // \slot 0
+ // [IJAVA_STATE] <- monitor block bot (points to first byte in IJAVA_STATE)
+ // F1 [PARENT_IJAVA_FRAME_ABI] <- Z_fp (== *Z_SP, points to slot just below IJAVA_STATE)
+ // [F0's locals] <- Z_locals, locals[i] := *(Z_locals - i*BytesPerWord)
+ // [F1's operand stack]
+ // [F1's monitors] (optional)
+ // [IJAVA_STATE]
+
+ public:
+
+ // PARENT_IJAVA_FRAME_ABI
+
+ struct z_parent_ijava_frame_abi : z_abi_16 {
+ };
+
+ enum {
+ z_parent_ijava_frame_abi_size = sizeof(z_parent_ijava_frame_abi)
+ };
+
+ #define _z_parent_ijava_frame_abi(_component) \
+ (offset_of(frame::z_parent_ijava_frame_abi, _component))
+
+ // TOP_IJAVA_FRAME_ABI
+
+ struct z_top_ijava_frame_abi : z_abi_160 {
+ };
+
+ enum {
+ z_top_ijava_frame_abi_size = sizeof(z_top_ijava_frame_abi)
+ };
+
+ #define _z_top_ijava_frame_abi(_component) \
+ (offset_of(frame::z_top_ijava_frame_abi, _component))
+
+ // IJAVA_STATE
+
+ struct z_ijava_state{
+ DEBUG_ONLY(uint64_t magic;) // wrong magic -> wrong state!
+ uint64_t method;
+ uint64_t mirror;
+ uint64_t locals; // Z_locals
+ uint64_t monitors;
+ uint64_t cpoolCache;
+ uint64_t bcp; // Z_bcp
+ uint64_t mdx;
+ uint64_t esp; // Z_esp
+ // Caller's original SP before modification by c2i adapter (if caller is compiled)
+ // and before top -> parent frame conversion by the interpreter entry.
+ // Note: for i2i calls a correct sender_sp is required, too, because there
+ // we cannot use the caller's top_frame_sp as sp when removing the callee
+ // frame (caller could be compiled or entry frame). Therefore the sender_sp
+ // has to be the interpreted caller's sp as TOP_IJAVA_FRAME. See also
+ // AbstractInterpreter::layout_activation() used by deoptimization.
+ uint64_t sender_sp;
+ // Own SP before modification by i2c adapter and top-2-parent-resize
+ // by interpreted callee.
+ uint64_t top_frame_sp;
+ // Slots only needed for native calls. Maybe better to move elsewhere.
+ uint64_t oop_tmp;
+ uint64_t lresult;
+ uint64_t fresult;
+ };
+
+ enum {
+ z_ijava_state_size = sizeof(z_ijava_state)
+ };
+
+#ifdef ASSERT
+ enum {
+ z_istate_magic_number = 0x900d // ~= good magic
+ };
+#endif
+
+#define _z_ijava_state_neg(_component) \
+ (int) (-frame::z_ijava_state_size + offset_of(frame::z_ijava_state, _component))
+
+ // ENTRY_FRAME
+
+ struct z_entry_frame_locals {
+ uint64_t call_wrapper_address;
+ uint64_t result_address;
+ uint64_t result_type;
+ uint64_t arguments_tos_address;
+ // Callee saved registers are spilled to caller frame.
+ // Caller must have z_abi_160.
+ };
+
+ enum {
+ z_entry_frame_locals_size = sizeof(z_entry_frame_locals)
+ };
+
+ #define _z_entry_frame_locals_neg(_component) \
+ (int) (-frame::z_entry_frame_locals_size + offset_of(frame::z_entry_frame_locals, _component))
+
+ // Frame layout for JIT generated methods
+ //
+ // In these figures the stack grows upwards, while memory grows
+ // downwards. Square brackets denote regions possibly larger than single
+ // 64 bit slots.
+ //
+ // STACK (interpreted Java calls JIT generated Java):
+ //
+ // [JIT_FRAME] <-- SP (mod 16 = 0)
+ // [TOP_IJAVA_FRAME]
+ // ...
+ //
+ //
+ // JIT_FRAME (is a C frame according to z/Architecture ABI):
+ //
+ // [out_preserve]
+ // [out_args]
+ // [spills]
+ // [monitor] (optional)
+ // ...
+ // [monitor] (optional)
+ // [in_preserve] added / removed by prolog / epilog
+
+ public:
+
+ struct z_top_jit_abi_32 {
+ uint64_t callers_sp;
+ uint64_t return_pc;
+ uint64_t toc;
+ uint64_t tmp;
+ };
+
+ #define _z_top_jit_abi(_component) \
+ (offset_of(frame::z_top_jit_abi_32, _component))
+
+ struct jit_monitor {
+ uint64_t monitor[1];
+ };
+
+ struct jit_in_preserve {
+ // Used to provide a z/Architecture ABI on top of a jit frame.
+ // nothing to add here!
+ };
+
+ struct jit_out_preserve : z_top_jit_abi_32 {
+ // Nothing to add here!
+ };
+
+ enum {
+ z_jit_out_preserve_size = sizeof(jit_out_preserve)
+ };
+
+ typedef enum {
+ jit_monitor_size_in_4_byte_units = sizeof(jit_monitor) / 4,
+
+ // Stack alignment requirement. Log_2 of alignment size in bits.
+ // log_2(16*8 bits) = 7.
+ jit_log_2_of_stack_alignment_in_bits = 7,
+
+ jit_out_preserve_size_in_4_byte_units = sizeof(jit_out_preserve) / 4,
+
+ jit_in_preserve_size_in_4_byte_units = sizeof(jit_in_preserve) / 4
+ } jit_frame_constants;
+
+
+ // C2I adapter frames:
+ //
+ // STACK (interpreted called from compiled, on entry to frame manager):
+ //
+ // [TOP_C2I_FRAME]
+ // [JIT_FRAME]
+ // ...
+ //
+ //
+ // STACK (interpreted called from compiled, after interpreter has been pushed):
+ //
+ // [TOP_IJAVA_FRAME]
+ // [PARENT_C2I_FRAME]
+ // [JIT_FRAME]
+ // ...
+ //
+ //
+ // TOP_C2I_FRAME:
+ //
+ // [TOP_IJAVA_FRAME_ABI]
+ // [outgoing Java arguments]
+ // alignment (optional)
+ //
+ //
+ // PARENT_C2I_FRAME:
+ //
+ // [PARENT_IJAVA_FRAME_ABI]
+ // alignment (optional)
+ // [callee's locals w/o arguments]
+ // [outgoing Java arguments]
+ // alignment (optional)
+
+ private:
+
+ // STACK:
+ // ...
+ // [THIS_FRAME] <-- this._sp (stack pointer for this frame)
+ // [CALLER_FRAME] <-- this.fp() (_sp of caller's frame)
+ // ...
+ //
+
+ // NOTE: Stack pointer is now held in the base class, so remove it from here.
+
+ // Frame pointer for this frame.
+ intptr_t* _fp;
+
+ // Needed by deoptimization.
+ intptr_t* _unextended_sp;
+
+ public:
+
+ // Interface for all frames:
+
+ // Accessors
+
+ inline intptr_t* fp() const { return _fp; }
+
+ private:
+
+ inline void find_codeblob_and_set_pc_and_deopt_state(address pc);
+
+ // Constructors
+
+ public:
+ frame(intptr_t* sp);
+ // To be used, if sp was not extended to match callee's calling convention.
+ frame(intptr_t* sp, address pc);
+ frame(intptr_t* sp, address pc, intptr_t* unextended_sp);
+
+ // Access frame via stack pointer.
+ inline intptr_t* sp_addr_at(int index) const { return &sp()[index]; }
+ inline intptr_t sp_at( int index) const { return *sp_addr_at(index); }
+
+ // Access ABIs.
+ inline z_abi_16* own_abi() const { return (z_abi_16*) sp(); }
+ inline z_abi_160* callers_abi() const { return (z_abi_160*) fp(); }
+
+ private:
+
+ intptr_t* compiled_sender_sp(CodeBlob* cb) const;
+ address* compiled_sender_pc_addr(CodeBlob* cb) const;
+
+ address* sender_pc_addr(void) const;
+
+ public:
+
+ // Additional interface for interpreter frames:
+ static int interpreter_frame_interpreterstate_size_in_bytes();
+ static int interpreter_frame_monitor_size_in_bytes();
+
+ private:
+
+ // template interpreter state
+ inline z_ijava_state* ijava_state() const;
+
+ // Where z_ijava_state.monitors is saved.
+ inline BasicObjectLock** interpreter_frame_monitors_addr() const;
+ // Where z_ijava_state.esp is saved.
+ inline intptr_t** interpreter_frame_esp_addr() const;
+
+ public:
+ inline intptr_t* interpreter_frame_top_frame_sp();
+ inline void interpreter_frame_set_tos_address(intptr_t* x);
+ inline void interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp);
+ inline void interpreter_frame_set_sender_sp(intptr_t* sender_sp);
+#ifdef ASSERT
+ inline void interpreter_frame_set_magic();
+#endif
+
+ // monitors:
+
+ // Next two functions read and write z_ijava_state.monitors.
+ private:
+ inline BasicObjectLock* interpreter_frame_monitors() const;
+ inline void interpreter_frame_set_monitors(BasicObjectLock* monitors);
+
+ public:
+
+ // Additional interface for entry frames:
+ inline z_entry_frame_locals* entry_frame_locals() const {
+ return (z_entry_frame_locals*) (((address) fp()) - z_entry_frame_locals_size);
+ }
+
+ public:
+
+ // Get caller pc from stack slot of gpr14.
+ address native_sender_pc() const;
+ // Get caller pc from stack slot of gpr10.
+ address callstub_sender_pc() const;
+
+ // Dump all frames starting at a given C stack pointer.
+ // max_frames: Limit number of traced frames.
+ // <= 0 --> full trace
+ // > 0 --> trace the #max_frames topmost frames
+ static void back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc,
+ unsigned long flags, int max_frames = 0);
+
+ enum {
+ // This enum value specifies the offset from the pc remembered by
+ // call instructions to the location where control returns to
+ // after a normal return. Most architectures remember the return
+ // location directly, i.e. the offset is zero. This is the case
+ // for z/Architecture, too.
+ //
+ // Normal return address is the instruction following the branch.
+ pc_return_offset = 0,
+ };
+
+#endif // CPU_S390_VM_FRAME_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_FRAME_S390_INLINE_HPP
+#define CPU_S390_VM_FRAME_S390_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for z/Architecture frames:
+
+inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {
+ assert(pc != NULL, "precondition: must have PC");
+
+ _cb = CodeCache::find_blob(pc);
+ _pc = pc; // Must be set for get_deopt_original_pc().
+
+ _fp = (intptr_t *) own_abi()->callers_sp;
+
+ address original_pc = nmethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+
+ assert(((uint64_t)_sp & 0x7) == 0, "SP must be 8-byte aligned");
+}
+
+// Constructors
+
+// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state.
+inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {}
+
+inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) {
+ find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->return_pc);
+}
+
+inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) {
+ find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp.
+}
+
+inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) {
+ find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp.
+}
+
+// Generic constructor. Used by pns() in debug.cpp only
+#ifndef PRODUCT
+inline frame::frame(void* sp, void* pc, void* unextended_sp) :
+ _sp((intptr_t*)sp), _unextended_sp((intptr_t*)unextended_sp), _cb(NULL), _pc(NULL) {
+ find_codeblob_and_set_pc_and_deopt_state((address)pc); // Also sets _fp and adjusts _unextended_sp.
+}
+#endif
+
+// template interpreter state
+inline frame::z_ijava_state* frame::ijava_state() const {
+ z_ijava_state* state = (z_ijava_state*) ((uintptr_t)fp() - z_ijava_state_size);
+ assert(state->magic == (intptr_t) frame::z_istate_magic_number,
+ "wrong z_ijava_state in interpreter frame (no magic found)");
+ return state;
+}
+
+inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const {
+ return (BasicObjectLock**) &(ijava_state()->monitors);
+}
+
+// The next two funcions read and write z_ijava_state.monitors.
+inline BasicObjectLock* frame::interpreter_frame_monitors() const {
+ return *interpreter_frame_monitors_addr();
+}
+inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
+ *interpreter_frame_monitors_addr() = monitors;
+}
+
+// Accessors
+
+// Return unique id for this frame. The id must have a value where we
+// can distinguish identity and younger/older relationship. NULL
+// represents an invalid (incomparable) frame.
+inline intptr_t* frame::id(void) const {
+ // Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
+ return _fp;
+}
+
+// Return true if this frame is younger (more recent activation) than
+// the frame represented by id.
+inline bool frame::is_younger(intptr_t* id) const {
+ assert(this->id() != NULL && id != NULL, "NULL frame id");
+ // Stack grows towards smaller addresses on z/Architecture.
+ return this->id() < id;
+}
+
+// Return true if this frame is older (less recent activation) than
+// the frame represented by id.
+inline bool frame::is_older(intptr_t* id) const {
+ assert(this->id() != NULL && id != NULL, "NULL frame id");
+ // Stack grows towards smaller addresses on z/Architecture.
+ return this->id() > id;
+}
+
+inline int frame::frame_size(RegisterMap* map) const {
+ // Stack grows towards smaller addresses on z/Linux: sender is at a higher address.
+ return sender_sp() - sp();
+}
+
+// Ignore c2i adapter frames.
+inline intptr_t* frame::unextended_sp() const {
+ return _unextended_sp;
+}
+
+inline address frame::sender_pc() const {
+ return (address) callers_abi()->return_pc;
+}
+
+// Get caller pc, if caller is native from stack slot of gpr14.
+inline address frame::native_sender_pc() const {
+ return (address) callers_abi()->gpr14;
+}
+
+// Get caller pc from stack slot of gpr10.
+inline address frame::callstub_sender_pc() const {
+ return (address) callers_abi()->gpr10;
+}
+
+inline address* frame::sender_pc_addr() const {
+ return (address*) &(callers_abi()->return_pc);
+}
+
+inline intptr_t* frame::sender_sp() const {
+ return (intptr_t*) callers_abi();
+}
+
+inline intptr_t* frame::link() const {
+ return (intptr_t*) callers_abi()->callers_sp;
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+ return (intptr_t**) &(ijava_state()->locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+ return (intptr_t*) &(ijava_state()->bcp);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+ return (intptr_t*) &(ijava_state()->mdx);
+}
+
+// Bottom(base) of the expression stack (highest address).
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+ return (intptr_t*)interpreter_frame_monitor_end() - 1;
+}
+
+inline jint frame::interpreter_frame_expression_stack_direction() {
+ return -1;
+}
+
+inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+ return &interpreter_frame_tos_address()[offset];
+}
+
+
+// monitor elements
+
+// End is lower in memory than begin, and beginning element is oldest element.
+// Also begin is one past last monitor.
+
+inline intptr_t* frame::interpreter_frame_top_frame_sp() {
+ return (intptr_t*)ijava_state()->top_frame_sp;
+}
+
+inline void frame::interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp) {
+ ijava_state()->top_frame_sp = (intptr_t) top_frame_sp;
+}
+
+inline void frame::interpreter_frame_set_sender_sp(intptr_t* sender_sp) {
+ ijava_state()->sender_sp = (intptr_t) sender_sp;
+}
+
+#ifdef ASSERT
+inline void frame::interpreter_frame_set_magic() {
+ ijava_state()->magic = (intptr_t) frame::z_istate_magic_number;
+}
+#endif
+
+// Where z_ijava_state.esp is saved.
+inline intptr_t** frame::interpreter_frame_esp_addr() const {
+ return (intptr_t**) &(ijava_state()->esp);
+}
+
+// top of expression stack (lowest address)
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+ return *interpreter_frame_esp_addr() + 1;
+}
+
+inline void frame::interpreter_frame_set_tos_address(intptr_t* x) {
+ *interpreter_frame_esp_addr() = x - 1;
+}
+
+// Stack slot needed for native calls and GC.
+inline oop * frame::interpreter_frame_temp_oop_addr() const {
+ return (oop *) ((address) _fp + _z_ijava_state_neg(oop_tmp));
+}
+
+// In keeping with Intel side: end is lower in memory than begin.
+// Beginning element is oldest element. Also begin is one past last monitor.
+inline BasicObjectLock * frame::interpreter_frame_monitor_begin() const {
+ return (BasicObjectLock*)ijava_state();
+}
+
+inline BasicObjectLock * frame::interpreter_frame_monitor_end() const {
+ return interpreter_frame_monitors();
+}
+
+inline void frame::interpreter_frame_set_monitor_end(BasicObjectLock* monitors) {
+ interpreter_frame_set_monitors((BasicObjectLock *)monitors);
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+ // Number of stack slots for a monitor
+ return round_to(BasicObjectLock::size() /* number of stack slots */,
+ WordsPerLong /* Number of stack slots for a Java long. */);
+}
+
+inline int frame::interpreter_frame_monitor_size_in_bytes() {
+ // Number of bytes for a monitor.
+ return frame::interpreter_frame_monitor_size() * wordSize;
+}
+
+inline int frame::interpreter_frame_interpreterstate_size_in_bytes() {
+ return z_ijava_state_size;
+}
+
+inline Method** frame::interpreter_frame_method_addr() const {
+ return (Method**)&(ijava_state()->method);
+}
+
+inline oop* frame::interpreter_frame_mirror_addr() const {
+ return (oop*)&(ijava_state()->mirror);
+}
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+ return (ConstantPoolCache**)&(ijava_state()->cpoolCache);
+}
+
+// entry frames
+
+inline intptr_t* frame::entry_frame_argument_at(int offset) const {
+ // Since an entry frame always calls the interpreter first,
+ // the parameters are on the stack and relative to known register in the
+ // entry frame.
+ intptr_t* tos = (intptr_t*) entry_frame_locals()->arguments_tos_address;
+ return &tos[offset + 1]; // prepushed tos
+}
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**) &entry_frame_locals()->call_wrapper_address;
+}
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+ return *((oop*) map->location(Z_R2->as_VMReg())); // R2 is return register.
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+ *((oop*) map->location(Z_R2->as_VMReg())) = obj; // R2 is return register.
+}
+
+inline intptr_t* frame::real_fp() const {
+ return fp();
+}
+
+#endif // CPU_S390_VM_FRAME_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
+#define CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
+
+#ifdef CC_INTERP
+#error "CC_INTERP is not supported on z/Architecture."
+#endif
+
+// Convenience macro that produces a string literal with the filename
+// and linenumber of the location where the macro was used.
+#ifndef FILE_AND_LINE
+#define FILE_AND_LINE __FILE__ ":" XSTR(__LINE__)
+#endif
+
+#define ShortenBranches true
+
+const int StackAlignmentInBytes = 16;
+
+#define SUPPORTS_NATIVE_CX8
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+// This is the case on z/Architecture.
+const bool CCallingConventionRequiresIntsAsLongs = true;
+
+// Contended Locking reorder and cache line bucket.
+// This setting should be kept compatible with vm_version_s390.cpp.
+// The expected size in bytes of a cache line, used to pad data structures.
+#define DEFAULT_CACHE_LINE_SIZE 256
+
+#endif // CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/globals_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_GLOBALS_S390_HPP
+#define CPU_S390_VM_GLOBALS_S390_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+// Sorted according to sparc.
+
+// z/Architecture remembers branch targets, so don't share vtables.
+define_pd_global(bool, ShareVtableStubs, false);
+define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
+
+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.
+define_pd_global(bool, TrapBasedNullChecks, true);
+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast.
+
+define_pd_global(uintx, CodeCacheSegmentSize, 256);
+// This shall be at least 32 for proper branch target alignment.
+// Ideally, this is 256 (cache line size). This keeps code end data
+// on separate lines. But we reduced it to 64 since 256 increased
+// code size significantly by padding nops between IVC and second UEP.
+define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(intx, OptoLoopAlignment, 2);
+define_pd_global(intx, InlineFrequencyCount, 100);
+define_pd_global(intx, InlineSmallCode, 2000);
+
+#define DEFAULT_STACK_YELLOW_PAGES (2)
+#define DEFAULT_STACK_RED_PAGES (1)
+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
+// stack. To pass stack overflow tests we need 20 shadow pages.
+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+2))
+#define DEFAULT_STACK_RESERVED_PAGES (0)
+
+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_RESERVED_PAGES (0)
+
+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
+
+define_pd_global(bool, RewriteBytecodes, true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, UseMembar, false);
+
+define_pd_global(bool, PreserveFramePointer, false);
+
+// GC Ergo Flags
+define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, CompactStrings, true);
+
+// 8146801 (Short Array Allocation): No performance work done here yet.
+define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint, writeable) \
+ \
+ /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
+ /* indirect call by a direct call. */ \
+ product(bool, ReoptimizeCallSequences, true, \
+ "Reoptimize code-sequences of calls at runtime.") \
+ \
+ product(bool, UseCountLeadingZerosInstruction, true, \
+ "Use count leading zeros instruction.") \
+ \
+ product(bool, UseByteReverseInstruction, true, \
+ "Use byte reverse instruction.") \
+ \
+ product(bool, ExpandLoadingBaseDecode, true, "Expand the assembler " \
+ "instruction required to load the base from DecodeN nodes during " \
+ "matching.") \
+ product(bool, ExpandLoadingBaseDecode_NN, true, "Expand the assembler " \
+ "instruction required to load the base from DecodeN_NN nodes " \
+ "during matching.") \
+ product(bool, ExpandLoadingBaseEncode, true, "Expand the assembler " \
+ "instruction required to load the base from EncodeP nodes during " \
+ "matching.") \
+ product(bool, ExpandLoadingBaseEncode_NN, true, "Expand the assembler " \
+ "instruction required to load the base from EncodeP_NN nodes " \
+ "during matching.") \
+ \
+ /* Seems to pay off with 2 pages already. */ \
+ product(size_t, MVCLEThreshold, +2*(4*K), \
+ "Threshold above which page-aligned MVCLE copy/init is used.") \
+ \
+ product(bool, PreferLAoverADD, false, \
+ "Use LA/LAY instructions over ADD instructions (z/Architecture).") \
+ \
+ develop(bool, ZapEmptyStackFields, false, "Write 0x0101... to empty stack" \
+ " fields. Use this to ease stack debugging.") \
+ \
+ product(bool, TraceTraps, false, "Trace all traps the signal handler" \
+ "handles.")
+
+#endif // CPU_S390_VM_GLOBALS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icBuffer_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+
+#define __ masm.
+
+int InlineCacheBuffer::ic_stub_code_size() {
+ return MacroAssembler::load_const_size() + Assembler::z_brul_size();
+}
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_oop, address entry_point) {
+ ResourceMark rm;
+ CodeBuffer code(code_begin, ic_stub_code_size());
+ MacroAssembler masm(&code);
+ // Note: even though the code contains an embedded oop, we do not need reloc info
+ // because
+ // (1) the oop is old (i.e., doesn't matter for scavenges)
+ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear.
+
+ // Load the oop,
+ __ load_const(Z_method, (address) cached_oop); // inline cache reg = Z_method
+ // and do a tail-call (pc-relative).
+ __ z_brul((address) entry_point);
+ __ flush();
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // Creation also verifies the object.
+ return MacroAssembler::get_target_addr_pcrel(move->next_instruction_address());
+}
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // Creation also verifies the object.
+ return (void*)move->data();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icache_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/icache.hpp"
+
+// interface (see ICache::flush_icache_stub_t):
+// address addr (Z_R2, ignored)
+// int lines (Z_R3, ignored)
+// int magic (Z_R4)
+//
+// returns: int (Z_R2)
+//
+// Note: z/Architecture doesn't need explicit flushing, so this is implemented as a nop.
+
+// Call c function (which just does nothing).
+int z_flush_icache(address start, int lines, int magic) { return magic; }
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+ *flush_icache_stub = (ICache::flush_icache_stub_t)z_flush_icache;
+
+ // First call to flush itself.
+ ICache::invalidate_range((address)(*flush_icache_stub), 0);
+};
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/icache_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_ICACHE_S390_HPP
+#define CPU_S390_VM_ICACHE_S390_HPP
+
+// Interface for updating the instruction cache. Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+ enum {
+ stub_size = 0, // Size of the icache flush stub in bytes.
+ line_size = 2, // There is no explicit flushing on z/Architecture.
+ // This value is ignored by the flush stub (a nop !).
+ log2_line_size = 1
+ };
+
+ // Use default implementation.
+};
+
+#endif // CPU_S390_VM_ICACHE_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2127 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by AHa, AS, JL, ML.
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interp_masm_s390.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of InterpreterMacroAssembler.
+// This file specializes the assember with interpreter-specific macros.
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str)
+#define BIND(label) bind(label);
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) {
+ assert(entry != NULL, "Entry must have been generated by now");
+ assert(Rscratch != Z_R0, "Can't use R0 for addressing");
+ branch_optimized(Assembler::bcondAlways, entry);
+}
+
+void InterpreterMacroAssembler::empty_expression_stack(void) {
+ get_monitors(Z_R1_scratch);
+ add2reg(Z_esp, -Interpreter::stackElementSize, Z_R1_scratch);
+}
+
+// Dispatch code executed in the prolog of a bytecode which does not do it's
+// own dispatch.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) {
+ // On z/Architecture we are short on registers, therefore we do not preload the
+ // dispatch address of the next bytecode.
+}
+
+// Dispatch code executed in the epilog of a bytecode which does not do it's
+// own dispatch.
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+ dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
+ z_llgc(Z_bytecode, bcp_incr, Z_R0, Z_bcp); // Load next bytecode.
+ add2reg(Z_bcp, bcp_incr); // Advance bcp. Add2reg produces optimal code.
+ dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+// Common code to dispatch and dispatch_only.
+// Dispatch value in Lbyte_code and increment Lbcp.
+
+void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
+ verify_FPU(1, state);
+
+#ifdef ASSERT
+ address reentry = NULL;
+ { Label OK;
+ // Check if the frame pointer in Z_fp is correct.
+ z_cg(Z_fp, 0, Z_SP);
+ z_bre(OK);
+ reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp: " FILE_AND_LINE);
+ bind(OK);
+ }
+ { Label OK;
+ // check if the locals pointer in Z_locals is correct
+ z_cg(Z_locals, _z_ijava_state_neg(locals), Z_fp);
+ z_bre(OK);
+ reentry = stop_chain_static(reentry, "invalid locals pointer Z_locals: " FILE_AND_LINE);
+ bind(OK);
+ }
+#endif
+
+ // TODO: Maybe implement +VerifyActivationFrameSize here.
+ // verify_thread(); // Too slow. We will just verify on method entry & exit.
+ verify_oop(Z_tos, state);
+#ifdef FAST_DISPATCH
+ if (table == Interpreter::dispatch_table(state)) {
+ // Use IdispatchTables.
+ add(Lbyte_code, Interpreter::distance_from_dispatch_table(state), Lbyte_code);
+ // Add offset to correct dispatch table.
+ sll(Lbyte_code, LogBytesPerWord, Lbyte_code); // Multiply by wordSize.
+ ld_ptr(IdispatchTables, Lbyte_code, G3_scratch); // Get entry addr.
+ } else
+#endif
+ {
+ // Dispatch table to use.
+ load_absolute_address(Z_tmp_1, (address) table); // Z_tmp_1 = table;
+
+ // 0 <= Z_bytecode < 256 => Use a 32 bit shift, because it is shorter than sllg.
+ // Z_bytecode must have been loaded zero-extended for this approach to be correct.
+ z_sll(Z_bytecode, LogBytesPerWord, Z_R0); // Multiply by wordSize.
+ z_lg(Z_tmp_1, 0, Z_bytecode, Z_tmp_1); // Get entry addr.
+ }
+ z_br(Z_tmp_1);
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+ dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+ dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address *table) {
+ // Load current bytecode.
+ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t)0));
+ dispatch_base(state, table);
+}
+
+// The following call_VM*_base() methods overload and mask the respective
+// declarations/definitions in class MacroAssembler. They are meant as a "detour"
+// to perform additional, template interpreter specific tasks before actually
+// calling their MacroAssembler counterparts.
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point) {
+ bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated.
+ // interpreter specific
+ // Note: No need to save/restore bcp (Z_R13) pointer since these are callee
+ // saved registers and no blocking/ GC can happen in leaf calls.
+
+ // super call
+ MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
+ // interpreter specific
+ // Note: No need to save/restore bcp (Z_R13) pointer since these are callee
+ // saved registers and no blocking/ GC can happen in leaf calls.
+
+ // super call
+ MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp,
+ address entry_point, bool check_exceptions) {
+ bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated.
+ // interpreter specific
+
+ save_bcp();
+ save_esp();
+ // super call
+ MacroAssembler::call_VM_base(oop_result, last_java_sp,
+ entry_point, allow_relocation, check_exceptions);
+ restore_bcp();
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp,
+ address entry_point, bool allow_relocation,
+ bool check_exceptions) {
+ // interpreter specific
+
+ save_bcp();
+ save_esp();
+ // super call
+ MacroAssembler::call_VM_base(oop_result, last_java_sp,
+ entry_point, allow_relocation, check_exceptions);
+ restore_bcp();
+}
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
+ if (JvmtiExport::can_pop_frame()) {
+ BLOCK_COMMENT("check_and_handle_popframe {");
+ Label L;
+ // Initiate popframe handling only if it is not already being
+ // processed. If the flag has the popframe_processing bit set, it
+ // means that this code is called *during* popframe handling - we
+ // don't want to reenter.
+ // TODO: Check if all four state combinations could be visible.
+ // If (processing and !pending) is an invisible/impossible state,
+ // there is optimization potential by testing both bits at once.
+ // Then, All_Zeroes and All_Ones means skip, Mixed means doit.
+ testbit(Address(Z_thread, JavaThread::popframe_condition_offset()),
+ exact_log2(JavaThread::popframe_pending_bit));
+ z_bfalse(L);
+ testbit(Address(Z_thread, JavaThread::popframe_condition_offset()),
+ exact_log2(JavaThread::popframe_processing_bit));
+ z_btrue(L);
+
+ // Call Interpreter::remove_activation_preserving_args_entry() to get the
+ // address of the same-named entrypoint in the generated interpreter code.
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+ // The above call should (as its only effect) return the contents of the field
+ // _remove_activation_preserving_args_entry in Z_RET.
+ // We just jump there to have the work done.
+ z_br(Z_RET);
+ // There is no way for control to fall thru here.
+
+ bind(L);
+ BLOCK_COMMENT("} check_and_handle_popframe");
+ }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+ Register RjvmtiState = Z_R1_scratch;
+ int tos_off = in_bytes(JvmtiThreadState::earlyret_tos_offset());
+ int oop_off = in_bytes(JvmtiThreadState::earlyret_oop_offset());
+ int val_off = in_bytes(JvmtiThreadState::earlyret_value_offset());
+ int state_off = in_bytes(JavaThread::jvmti_thread_state_offset());
+
+ z_lg(RjvmtiState, state_off, Z_thread);
+
+ switch (state) {
+ case atos: z_lg(Z_tos, oop_off, RjvmtiState);
+ store_const(Address(RjvmtiState, oop_off), 0L, 8, 8, Z_R0_scratch);
+ break;
+ case ltos: z_lg(Z_tos, val_off, RjvmtiState); break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos: z_llgf(Z_tos, val_off, RjvmtiState); break;
+ case ftos: z_le(Z_ftos, val_off, RjvmtiState); break;
+ case dtos: z_ld(Z_ftos, val_off, RjvmtiState); break;
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+
+ // Clean up tos value in the jvmti thread state.
+ store_const(Address(RjvmtiState, val_off), 0L, 8, 8, Z_R0_scratch);
+ // Set tos state field to illegal value.
+ store_const(Address(RjvmtiState, tos_off), ilgl, 4, 1, Z_R0_scratch);
+}
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
+ if (JvmtiExport::can_force_early_return()) {
+ BLOCK_COMMENT("check_and_handle_earlyret {");
+ Label L;
+ // arg regs are save, because we are just behind the call in call_VM_base
+ Register jvmti_thread_state = Z_ARG2;
+ Register tmp = Z_ARG3;
+ load_and_test_long(jvmti_thread_state, Address(Z_thread, JavaThread::jvmti_thread_state_offset()));
+ z_bre(L); // if (thread->jvmti_thread_state() == NULL) exit;
+
+ // Initiate earlyret handling only if it is not already being processed.
+ // If the flag has the earlyret_processing bit set, it means that this code
+ // is called *during* earlyret handling - we don't want to reenter.
+
+ assert((JvmtiThreadState::earlyret_pending != 0) && (JvmtiThreadState::earlyret_inactive == 0),
+ "must fix this check, when changing the values of the earlyret enum");
+ assert(JvmtiThreadState::earlyret_pending == 1, "must fix this check, when changing the values of the earlyret enum");
+
+ load_and_test_int(tmp, Address(jvmti_thread_state, JvmtiThreadState::earlyret_state_offset()));
+ z_brz(L); // if (thread->jvmti_thread_state()->_earlyret_state != JvmtiThreadState::earlyret_pending) exit;
+
+ // Call Interpreter::remove_activation_early_entry() to get the address of the
+ // same-named entrypoint in the generated interpreter code.
+ assert(sizeof(TosState) == 4, "unexpected size");
+ z_l(Z_ARG1, Address(jvmti_thread_state, JvmtiThreadState::earlyret_tos_offset()));
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), Z_ARG1);
+ // The above call should (as its only effect) return the contents of the field
+ // _remove_activation_preserving_args_entry in Z_RET.
+ // We just jump there to have the work done.
+ z_br(Z_RET);
+ // There is no way for control to fall thru here.
+
+ bind(L);
+ BLOCK_COMMENT("} check_and_handle_earlyret");
+ }
+}
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+ lgr_if_needed(Z_ARG1, arg_1);
+ assert(arg_2 != Z_ARG1, "smashed argument");
+ lgr_if_needed(Z_ARG2, arg_2);
+ MacroAssembler::call_VM_leaf_base(entry_point, true);
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size) {
+ Address param(Z_bcp, bcp_offset);
+
+ BLOCK_COMMENT("get_cache_index_at_bcp {");
+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+ if (index_size == sizeof(u2)) {
+ load_sized_value(index, param, 2, false /*signed*/);
+ } else if (index_size == sizeof(u4)) {
+
+ load_sized_value(index, param, 4, false);
+
+ // Check if the secondary index definition is still ~x, otherwise
+ // we have to change the following assembler code to calculate the
+ // plain index.
+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+ not_(index); // Convert to plain index.
+ } else if (index_size == sizeof(u1)) {
+ z_llgc(index, param);
+ } else {
+ ShouldNotReachHere();
+ }
+ BLOCK_COMMENT("}");
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register cpe_offset,
+ int bcp_offset, size_t index_size) {
+ BLOCK_COMMENT("get_cache_and_index_at_bcp {");
+ assert_different_registers(cache, cpe_offset);
+ get_cache_index_at_bcp(cpe_offset, bcp_offset, index_size);
+ z_lg(cache, Address(Z_fp, _z_ijava_state_neg(cpoolCache)));
+ // Convert from field index to ConstantPoolCache offset in bytes.
+ z_sllg(cpe_offset, cpe_offset, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord));
+ BLOCK_COMMENT("}");
+}
+
+// Kills Z_R0_scratch.
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+ Register cpe_offset,
+ Register bytecode,
+ int byte_no,
+ int bcp_offset,
+ size_t index_size) {
+ BLOCK_COMMENT("get_cache_and_index_and_bytecode_at_bcp {");
+ get_cache_and_index_at_bcp(cache, cpe_offset, bcp_offset, index_size);
+
+ // We want to load (from CP cache) the bytecode that corresponds to the passed-in byte_no.
+ // It is located at (cache + cpe_offset + base_offset + indices_offset + (8-1) (last byte in DW) - (byte_no+1).
+ // Instead of loading, shifting and masking a DW, we just load that one byte of interest with z_llgc (unsigned).
+ const int base_ix_off = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset());
+ const int off_in_DW = (8-1) - (1+byte_no);
+ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+ assert(ConstantPoolCacheEntry::bytecode_1_mask == 0xff, "");
+ load_sized_value(bytecode, Address(cache, cpe_offset, base_ix_off+off_in_DW), 1, false /*signed*/);
+
+ BLOCK_COMMENT("}");
+}
+
+// Load object from cpool->resolved_references(index).
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+ assert_different_registers(result, index);
+ get_constant_pool(result);
+
+ // Convert
+ // - from field index to resolved_references() index and
+ // - from word index to byte offset.
+ // Since this is a java object, it is potentially compressed.
+ Register tmp = index; // reuse
+ z_sllg(index, index, LogBytesPerHeapOop); // Offset into resolved references array.
+ // Load pointer for resolved_references[] objArray.
+ z_lg(result, ConstantPool::resolved_references_offset_in_bytes(), result);
+ // JNIHandles::resolve(result)
+ z_lg(result, 0, result); // Load resolved references array itself.
+#ifdef ASSERT
+ NearLabel index_ok;
+ z_lgf(Z_R0, Address(result, arrayOopDesc::length_offset_in_bytes()));
+ z_sllg(Z_R0, Z_R0, LogBytesPerHeapOop);
+ compare64_and_branch(tmp, Z_R0, Assembler::bcondLow, index_ok);
+ stop("resolved reference index out of bounds", 0x09256);
+ bind(index_ok);
+#endif
+ z_agr(result, index); // Address of indexed array element.
+ load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+ Register tmp,
+ int bcp_offset,
+ size_t index_size) {
+ BLOCK_COMMENT("get_cache_entry_pointer_at_bcp {");
+ get_cache_and_index_at_bcp(cache, tmp, bcp_offset, index_size);
+ add2reg_with_index(cache, in_bytes(ConstantPoolCache::base_offset()), tmp, cache);
+ BLOCK_COMMENT("}");
+}
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is
+// a subtype of super_klass. Blows registers Rsuper_klass, Rsub_klass, tmp1, tmp2.
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+ Register Rsuper_klass,
+ Register Rtmp1,
+ Register Rtmp2,
+ Label &ok_is_subtype) {
+ // Profile the not-null value's klass.
+ profile_typecheck(Rtmp1, Rsub_klass, Rtmp2);
+
+ // Do the check.
+ check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype);
+
+ // Profile the failure of the check.
+ profile_typecheck_failed(Rtmp1, Rtmp2);
+}
+
+// Pop topmost element from stack. It just disappears.
+// Useful if consumed previously by access via stackTop().
+void InterpreterMacroAssembler::popx(int len) {
+ add2reg(Z_esp, len*Interpreter::stackElementSize);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+// Get Address object of stack top. No checks. No pop.
+// Purpose: - Provide address of stack operand to exploit reg-mem operations.
+// - Avoid RISC-like mem2reg - reg-reg-op sequence.
+Address InterpreterMacroAssembler::stackTop() {
+ return Address(Z_esp, Interpreter::expr_offset_in_bytes(0));
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+ z_l(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+ add2reg(Z_esp, Interpreter::stackElementSize);
+ assert_different_registers(r, Z_R1_scratch);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+ z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+ add2reg(Z_esp, Interpreter::stackElementSize);
+ assert_different_registers(r, Z_R1_scratch);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+ z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp);
+ add2reg(Z_esp, 2*Interpreter::stackElementSize);
+ assert_different_registers(r, Z_R1_scratch);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister f) {
+ mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), false);
+ add2reg(Z_esp, Interpreter::stackElementSize);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister f) {
+ mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), true);
+ add2reg(Z_esp, 2*Interpreter::stackElementSize);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+ assert_different_registers(r, Z_R1_scratch);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+ z_st(r, Address(Z_esp));
+ add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+ z_stg(r, Address(Z_esp));
+ add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+ assert_different_registers(r, Z_R1_scratch);
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+ int offset = -Interpreter::stackElementSize;
+ z_stg(r, Address(Z_esp, offset));
+ clear_mem(Address(Z_esp), Interpreter::stackElementSize);
+ add2reg(Z_esp, 2 * offset);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister f) {
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+ freg2mem_opt(f, Address(Z_esp), false);
+ add2reg(Z_esp, -Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister d) {
+ debug_only(verify_esp(Z_esp, Z_R1_scratch));
+ int offset = -Interpreter::stackElementSize;
+ freg2mem_opt(d, Address(Z_esp, offset));
+ add2reg(Z_esp, 2 * offset);
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+ verify_oop(Z_tos, state);
+ switch (state) {
+ case atos: push_ptr(); break;
+ case btos: push_i(); break;
+ case ztos:
+ case ctos:
+ case stos: push_i(); break;
+ case itos: push_i(); break;
+ case ltos: push_l(); break;
+ case ftos: push_f(); break;
+ case dtos: push_d(); break;
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+ switch (state) {
+ case atos: pop_ptr(Z_tos); break;
+ case btos: pop_i(Z_tos); break;
+ case ztos:
+ case ctos:
+ case stos: pop_i(Z_tos); break;
+ case itos: pop_i(Z_tos); break;
+ case ltos: pop_l(Z_tos); break;
+ case ftos: pop_f(Z_ftos); break;
+ case dtos: pop_d(Z_ftos); break;
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+ verify_oop(Z_tos, state);
+}
+
+// Helpers for swap and dup.
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+ z_lg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+ z_stg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted(Register method) {
+ // Satisfy interpreter calling convention (see generate_normal_entry()).
+ z_lgr(Z_R10, Z_SP); // Set sender sp (aka initial caller sp, aka unextended sp).
+ // Record top_frame_sp, because the callee might modify it, if it's compiled.
+ z_stg(Z_SP, _z_ijava_state_neg(top_frame_sp), Z_fp);
+ save_bcp();
+ save_esp();
+ z_lgr(Z_method, method); // Set Z_method (kills Z_fp!).
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry.
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+ assert_different_registers(method, Z_R10 /*used for initial_caller_sp*/, temp);
+ prepare_to_jump_from_interpreted(method);
+
+ if (JvmtiExport::can_post_interpreter_events()) {
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+ // compiled code in threads for which the event is enabled. Check here for
+ // interp_only_mode if these events CAN be enabled.
+ z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset()));
+ MacroAssembler::load_and_test_int(Z_R0_scratch, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+ z_bcr(bcondEqual, Z_R1_scratch); // Run compiled code if zero.
+ // Run interpreted.
+ z_lg(Z_R1_scratch, Address(method, Method::interpreter_entry_offset()));
+ z_br(Z_R1_scratch);
+ } else {
+ // Run compiled code.
+ z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset()));
+ z_br(Z_R1_scratch);
+ }
+}
+
+#ifdef ASSERT
+void InterpreterMacroAssembler::verify_esp(Register Resp, Register Rtemp) {
+ // About to read or write Resp[0].
+ // Make sure it is not in the monitors or the TOP_IJAVA_FRAME_ABI.
+ address reentry = NULL;
+
+ {
+ // Check if the frame pointer in Z_fp is correct.
+ NearLabel OK;
+ z_cg(Z_fp, 0, Z_SP);
+ z_bre(OK);
+ reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp");
+ bind(OK);
+ }
+ {
+ // Resp must not point into or below the operand stack,
+ // i.e. IJAVA_STATE.monitors > Resp.
+ NearLabel OK;
+ Register Rmonitors = Rtemp;
+ z_lg(Rmonitors, _z_ijava_state_neg(monitors), Z_fp);
+ compareU64_and_branch(Rmonitors, Resp, bcondHigh, OK);
+ reentry = stop_chain_static(reentry, "too many pops: Z_esp points into monitor area");
+ bind(OK);
+ }
+ {
+ // Resp may point to the last word of TOP_IJAVA_FRAME_ABI, but not below
+ // i.e. !(Z_SP + frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize > Resp).
+ NearLabel OK;
+ Register Rabi_bottom = Rtemp;
+ add2reg(Rabi_bottom, frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize, Z_SP);
+ compareU64_and_branch(Rabi_bottom, Resp, bcondNotHigh, OK);
+ reentry = stop_chain_static(reentry, "too many pushes: Z_esp points into TOP_IJAVA_FRAME_ABI");
+ bind(OK);
+ }
+}
+
+void InterpreterMacroAssembler::asm_assert_ijava_state_magic(Register tmp) {
+ Label magic_ok;
+ load_const_optimized(tmp, frame::z_istate_magic_number);
+ z_cg(tmp, Address(Z_fp, _z_ijava_state_neg(magic)));
+ z_bre(magic_ok);
+ stop_static("error: wrong magic number in ijava_state access");
+ bind(magic_ok);
+}
+#endif // ASSERT
+
+void InterpreterMacroAssembler::save_bcp() {
+ z_stg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp)));
+ asm_assert_ijava_state_magic(Z_bcp);
+ NOT_PRODUCT(z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp))));
+}
+
+void InterpreterMacroAssembler::restore_bcp() {
+ asm_assert_ijava_state_magic(Z_bcp);
+ z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp)));
+}
+
+void InterpreterMacroAssembler::save_esp() {
+ z_stg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp)));
+}
+
+void InterpreterMacroAssembler::restore_esp() {
+ asm_assert_ijava_state_magic(Z_esp);
+ z_lg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp)));
+}
+
+void InterpreterMacroAssembler::get_monitors(Register reg) {
+ asm_assert_ijava_state_magic(reg);
+ mem2reg_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors)));
+}
+
+void InterpreterMacroAssembler::save_monitors(Register reg) {
+ reg2mem_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors)));
+}
+
+void InterpreterMacroAssembler::get_mdp(Register mdp) {
+ z_lg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+}
+
+void InterpreterMacroAssembler::save_mdp(Register mdp) {
+ z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+}
+
+// Values that are only read (besides initialization).
+void InterpreterMacroAssembler::restore_locals() {
+ asm_assert_ijava_state_magic(Z_locals);
+ z_lg(Z_locals, Address(Z_fp, _z_ijava_state_neg(locals)));
+}
+
+void InterpreterMacroAssembler::get_method(Register reg) {
+ asm_assert_ijava_state_magic(reg);
+ z_lg(reg, Address(Z_fp, _z_ijava_state_neg(method)));
+}
+
+void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register Rdst, int bcp_offset,
+ signedOrNot is_signed) {
+ // Rdst is an 8-byte return value!!!
+
+ // Unaligned loads incur only a small penalty on z/Architecture. The penalty
+ // is a few (2..3) ticks, even when the load crosses a cache line
+ // boundary. In case of a cache miss, the stall could, of course, be
+ // much longer.
+
+ switch (is_signed) {
+ case Signed:
+ z_lgh(Rdst, bcp_offset, Z_R0, Z_bcp);
+ break;
+ case Unsigned:
+ z_llgh(Rdst, bcp_offset, Z_R0, Z_bcp);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register Rdst, int bcp_offset,
+ setCCOrNot set_cc) {
+ // Rdst is an 8-byte return value!!!
+
+ // Unaligned loads incur only a small penalty on z/Architecture. The penalty
+ // is a few (2..3) ticks, even when the load crosses a cache line
+ // boundary. In case of a cache miss, the stall could, of course, be
+ // much longer.
+
+ // Both variants implement a sign-extending int2long load.
+ if (set_cc == set_CC) {
+ load_and_test_int2long(Rdst, Address(Z_bcp, (intptr_t)bcp_offset));
+ } else {
+ mem2reg_signed_opt( Rdst, Address(Z_bcp, (intptr_t)bcp_offset));
+ }
+}
+
+void InterpreterMacroAssembler::get_constant_pool(Register Rdst) {
+ get_method(Rdst);
+ mem2reg_opt(Rdst, Address(Rdst, Method::const_offset()));
+ mem2reg_opt(Rdst, Address(Rdst, ConstMethod::constants_offset()));
+}
+
+void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) {
+ get_constant_pool(Rcpool);
+ mem2reg_opt(Rtags, Address(Rcpool, ConstantPool::tags_offset_in_bytes()));
+}
+
+// Unlock if synchronized method.
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+//
+// If there are locked Java monitors
+// If throw_monitor_exception
+// throws IllegalMonitorStateException
+// Else if install_monitor_exception
+// installs IllegalMonitorStateException
+// Else
+// no error processing
+void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state,
+ bool throw_monitor_exception,
+ bool install_monitor_exception) {
+ NearLabel unlocked, unlock, no_unlock;
+
+ {
+ Register R_method = Z_ARG2;
+ Register R_do_not_unlock_if_synchronized = Z_ARG3;
+
+ // Get the value of _do_not_unlock_if_synchronized into G1_scratch.
+ const Address do_not_unlock_if_synchronized(Z_thread,
+ JavaThread::do_not_unlock_if_synchronized_offset());
+ load_sized_value(R_do_not_unlock_if_synchronized, do_not_unlock_if_synchronized, 1, false /*unsigned*/);
+ z_mvi(do_not_unlock_if_synchronized, false); // Reset the flag.
+
+ // Check if synchronized method.
+ get_method(R_method);
+ verify_oop(Z_tos, state);
+ push(state); // Save tos/result.
+ testbit(method2_(R_method, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+ z_bfalse(unlocked);
+
+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+ // is set.
+ compareU64_and_branch(R_do_not_unlock_if_synchronized, (intptr_t)0L, bcondNotEqual, no_unlock);
+ }
+
+ // unlock monitor
+
+ // BasicObjectLock will be first in list, since this is a
+ // synchronized method. However, need to check that the object has
+ // not been unlocked by an explicit monitorexit bytecode.
+ const Address monitor(Z_fp, -(frame::z_ijava_state_size + (int) sizeof(BasicObjectLock)));
+ // We use Z_ARG2 so that if we go slow path it will be the correct
+ // register for unlock_object to pass to VM directly.
+ load_address(Z_ARG2, monitor); // Address of first monitor.
+ z_lg(Z_ARG3, Address(Z_ARG2, BasicObjectLock::obj_offset_in_bytes()));
+ compareU64_and_branch(Z_ARG3, (intptr_t)0L, bcondNotEqual, unlock);
+
+ if (throw_monitor_exception) {
+ // Entry already unlocked need to throw an exception.
+ MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
+ should_not_reach_here();
+ } else {
+ // Monitor already unlocked during a stack unroll.
+ // If requested, install an illegal_monitor_state_exception.
+ // Continue with stack unrolling.
+ if (install_monitor_exception) {
+ MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
+ }
+ z_bru(unlocked);
+ }
+
+ bind(unlock);
+
+ unlock_object(Z_ARG2);
+
+ bind(unlocked);
+
+ // I0, I1: Might contain return value
+
+ // Check that all monitors are unlocked.
+ {
+ NearLabel loop, exception, entry, restart;
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ // We use Z_ARG2 so that if we go slow path it will be the correct
+ // register for unlock_object to pass to VM directly.
+ Register R_current_monitor = Z_ARG2;
+ Register R_monitor_block_bot = Z_ARG1;
+ const Address monitor_block_top(Z_fp, _z_ijava_state_neg(monitors));
+ const Address monitor_block_bot(Z_fp, -frame::z_ijava_state_size);
+
+ bind(restart);
+ // Starting with top-most entry.
+ z_lg(R_current_monitor, monitor_block_top);
+ // Points to word before bottom of monitor block.
+ load_address(R_monitor_block_bot, monitor_block_bot);
+ z_bru(entry);
+
+ // Entry already locked, need to throw exception.
+ bind(exception);
+
+ if (throw_monitor_exception) {
+ // Throw exception.
+ MacroAssembler::call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::
+ throw_illegal_monitor_state_exception));
+ should_not_reach_here();
+ } else {
+ // Stack unrolling. Unlock object and install illegal_monitor_exception.
+ // Unlock does not block, so don't have to worry about the frame.
+ // We don't have to preserve c_rarg1 since we are going to throw an exception.
+ unlock_object(R_current_monitor);
+ if (install_monitor_exception) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::
+ new_illegal_monitor_state_exception));
+ }
+ z_bru(restart);
+ }
+
+ bind(loop);
+ // Check if current entry is used.
+ load_and_test_long(Z_R0_scratch, Address(R_current_monitor, BasicObjectLock::obj_offset_in_bytes()));
+ z_brne(exception);
+
+ add2reg(R_current_monitor, entry_size); // Otherwise advance to next entry.
+ bind(entry);
+ compareU64_and_branch(R_current_monitor, R_monitor_block_bot, bcondNotEqual, loop);
+ }
+
+ bind(no_unlock);
+ pop(state);
+ verify_oop(Z_tos, state);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+// If throw_monitor_exception
+// throws IllegalMonitorStateException
+// Else if install_monitor_exception
+// installs IllegalMonitorStateException
+// Else
+// no error processing
+void InterpreterMacroAssembler::remove_activation(TosState state,
+ Register return_pc,
+ bool throw_monitor_exception,
+ bool install_monitor_exception,
+ bool notify_jvmti) {
+
+ unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception);
+
+ // Save result (push state before jvmti call and pop it afterwards) and notify jvmti.
+ notify_method_exit(false, state, notify_jvmti ? NotifyJVMTI : SkipNotifyJVMTI);
+
+ verify_oop(Z_tos, state);
+ verify_thread();
+
+ pop_interpreter_frame(return_pc, Z_ARG2, Z_ARG3);
+}
+
+// lock object
+//
+// Registers alive
+// monitor - Address of the BasicObjectLock to be used for locking,
+// which must be initialized with the object to lock.
+// object - Address of the object to be locked.
+void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
+
+ if (UseHeavyMonitors) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+ monitor, /*check_for_exceptions=*/false);
+ return;
+ }
+
+ // template code:
+ //
+ // markOop displaced_header = obj->mark().set_unlocked();
+ // monitor->lock()->set_displaced_header(displaced_header);
+ // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+ // // We stored the monitor address into the object's mark word.
+ // } else if (THREAD->is_lock_owned((address)displaced_header))
+ // // Simple recursive case.
+ // monitor->lock()->set_displaced_header(NULL);
+ // } else {
+ // // Slow path.
+ // InterpreterRuntime::monitorenter(THREAD, monitor);
+ // }
+
+ const Register displaced_header = Z_ARG5;
+ const Register object_mark_addr = Z_ARG4;
+ const Register current_header = Z_ARG5;
+
+ NearLabel done;
+ NearLabel slow_case;
+
+ // markOop displaced_header = obj->mark().set_unlocked();
+
+ // Load markOop from object into displaced_header.
+ z_lg(displaced_header, oopDesc::mark_offset_in_bytes(), object);
+
+ if (UseBiasedLocking) {
+ biased_locking_enter(object, displaced_header, Z_R1, Z_R0, done, &slow_case);
+ }
+
+ // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
+ z_oill(displaced_header, markOopDesc::unlocked_value);
+
+ // monitor->lock()->set_displaced_header(displaced_header);
+
+ // Initialize the box (Must happen before we update the object mark!).
+ z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
+ BasicLock::displaced_header_offset_in_bytes(), monitor);
+
+ // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+
+ // Store stack address of the BasicObjectLock (this is monitor) into object.
+ add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object);
+
+ z_csg(displaced_header, monitor, 0, object_mark_addr);
+ assert(current_header==displaced_header, "must be same register"); // Identified two registers from z/Architecture.
+
+ z_bre(done);
+
+ // } else if (THREAD->is_lock_owned((address)displaced_header))
+ // // Simple recursive case.
+ // monitor->lock()->set_displaced_header(NULL);
+
+ // We did not see an unlocked object so try the fast recursive case.
+
+ // Check if owner is self by comparing the value in the markOop of object
+ // (current_header) with the stack pointer.
+ z_sgr(current_header, Z_SP);
+
+ assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+
+ // The prior sequence "LGR, NGR, LTGR" can be done better
+ // (Z_R1 is temp and not used after here).
+ load_const_optimized(Z_R0, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+ z_ngr(Z_R0, current_header); // AND sets CC (result eq/ne 0)
+
+ // If condition is true we are done and hence we can store 0 in the displaced
+ // header indicating it is a recursive lock and be done.
+ z_brne(slow_case);
+ z_release(); // Membar unnecessary on zarch AND because the above csg does a sync before and after.
+ z_stg(Z_R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
+ BasicLock::displaced_header_offset_in_bytes(), monitor);
+ z_bru(done);
+
+ // } else {
+ // // Slow path.
+ // InterpreterRuntime::monitorenter(THREAD, monitor);
+
+ // None of the above fast optimizations worked so we have to get into the
+ // slow case of monitor enter.
+ bind(slow_case);
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+ monitor, /*check_for_exceptions=*/false);
+
+ // }
+
+ bind(done);
+}
+
+// Unlocks an object. Used in monitorexit bytecode and remove_activation.
+//
+// Registers alive
+// monitor - address of the BasicObjectLock to be used for locking,
+// which must be initialized with the object to lock.
+//
+// Throw IllegalMonitorException if object is not locked by current thread.
+void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) {
+
+ if (UseHeavyMonitors) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+ monitor, /*check_for_exceptions=*/ true);
+ return;
+ }
+
+// else {
+ // template code:
+ //
+ // if ((displaced_header = monitor->displaced_header()) == NULL) {
+ // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
+ // monitor->set_obj(NULL);
+ // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // // We swapped the unlocked mark in displaced_header into the object's mark word.
+ // monitor->set_obj(NULL);
+ // } else {
+ // // Slow path.
+ // InterpreterRuntime::monitorexit(THREAD, monitor);
+ // }
+
+ const Register displaced_header = Z_ARG4;
+ const Register current_header = Z_R1;
+ Address obj_entry(monitor, BasicObjectLock::obj_offset_in_bytes());
+ Label done;
+
+ if (object == noreg) {
+ // In the template interpreter, we must assure that the object
+ // entry in the monitor is cleared on all paths. Thus we move
+ // loading up to here, and clear the entry afterwards.
+ object = Z_ARG3; // Use Z_ARG3 if caller didn't pass object.
+ z_lg(object, obj_entry);
+ }
+
+ assert_different_registers(monitor, object, displaced_header, current_header);
+
+ // if ((displaced_header = monitor->displaced_header()) == NULL) {
+ // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
+ // monitor->set_obj(NULL);
+
+ clear_mem(obj_entry, sizeof(oop));
+
+ if (UseBiasedLocking) {
+ // The object address from the monitor is in object.
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+ biased_locking_exit(object, displaced_header, done);
+ }
+
+ // Test first if we are in the fast recursive case.
+ MacroAssembler::load_and_test_long(displaced_header,
+ Address(monitor, BasicObjectLock::lock_offset_in_bytes() +
+ BasicLock::displaced_header_offset_in_bytes()));
+ z_bre(done); // displaced_header == 0 -> goto done
+
+ // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // // We swapped the unlocked mark in displaced_header into the object's mark word.
+ // monitor->set_obj(NULL);
+
+ // If we still have a lightweight lock, unlock the object and be done.
+
+ // The markword is expected to be at offset 0.
+ assert(oopDesc::mark_offset_in_bytes() == 0, "unlock_object: review code below");
+
+ // We have the displaced header in displaced_header. If the lock is still
+ // lightweight, it will contain the monitor address and we'll store the
+ // displaced header back into the object's mark word.
+ z_lgr(current_header, monitor);
+ z_csg(current_header, displaced_header, 0, object);
+ z_bre(done);
+
+ // } else {
+ // // Slow path.
+ // InterpreterRuntime::monitorexit(THREAD, monitor);
+
+ // The lock has been converted into a heavy lock and hence
+ // we need to get into the slow case.
+ z_stg(object, obj_entry); // Restore object entry, has been cleared above.
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+ monitor, /*check_for_exceptions=*/false);
+
+ // }
+
+ bind(done);
+}
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ load_and_test_long(mdp, Address(Z_fp, _z_ijava_state_neg(mdx)));
+ z_brz(zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ Label set_mdp;
+ Register mdp = Z_ARG4;
+ Register method = Z_ARG5;
+
+ get_method(method);
+ // Test MDO to avoid the call if it is NULL.
+ load_and_test_long(mdp, method2_(method, method_data));
+ z_brz(set_mdp);
+
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), method, Z_bcp);
+ // Z_RET: mdi
+ // Mdo is guaranteed to be non-zero here, we checked for it before the call.
+ assert(method->is_nonvolatile(), "choose nonvolatile reg or reload from frame");
+ z_lg(mdp, method2_(method, method_data)); // Must reload, mdp is volatile reg.
+ add2reg_with_index(mdp, in_bytes(MethodData::data_offset()), Z_RET, mdp);
+
+ bind(set_mdp);
+ save_mdp(mdp);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+ NearLabel verify_continue;
+ Register bcp_expected = Z_ARG3;
+ Register mdp = Z_ARG4;
+ Register method = Z_ARG5;
+
+ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
+ get_method(method);
+
+ // If the mdp is valid, it will point to a DataLayout header which is
+ // consistent with the bcp. The converse is highly probable also.
+ load_sized_value(bcp_expected, Address(mdp, DataLayout::bci_offset()), 2, false /*signed*/);
+ z_ag(bcp_expected, Address(method, Method::const_offset()));
+ load_address(bcp_expected, Address(bcp_expected, ConstMethod::codes_offset()));
+ compareU64_and_branch(bcp_expected, Z_bcp, bcondEqual, verify_continue);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, Z_bcp, mdp);
+ bind(verify_continue);
+#endif // ASSERT
+}
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int constant, Register value) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ z_stg(value, constant, mdp_in);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+ int constant,
+ Register tmp,
+ bool decrement) {
+ assert_different_registers(mdp_in, tmp);
+ // counter address
+ Address data(mdp_in, constant);
+ const int delta = decrement ? -DataLayout::counter_increment : DataLayout::counter_increment;
+ add2mem_64(Address(mdp_in, constant), delta, tmp);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+ int flag_byte_constant) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ // Set the flag.
+ z_oi(Address(mdp_in, DataLayout::flags_offset()), flag_byte_constant);
+}
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+ int offset,
+ Register value,
+ Register test_value_out,
+ Label& not_equal_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ if (test_value_out == noreg) {
+ z_cg(value, Address(mdp_in, offset));
+ z_brne(not_equal_continue);
+ } else {
+ // Put the test value into a register, so caller can use it:
+ z_lg(test_value_out, Address(mdp_in, offset));
+ compareU64_and_branch(test_value_out, value, bcondNotEqual, not_equal_continue);
+ }
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp) {
+ update_mdp_by_offset(mdp_in, noreg, offset_of_disp);
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+ Register dataidx,
+ int offset_of_disp) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ Address disp_address(mdp_in, dataidx, offset_of_disp);
+ Assembler::z_ag(mdp_in, disp_address);
+ save_mdp(mdp_in);
+}
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ add2reg(mdp_in, constant);
+ save_mdp(mdp_in);
+}
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert(return_bci->is_nonvolatile(), "choose nonvolatile reg or save/restore");
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+ return_bci);
+}
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ // Otherwise, assign to mdp.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the taken count.
+ // We inline increment_mdp_data_at to return bumped_count in a register
+ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+ Address data(mdp, JumpData::taken_offset());
+ z_lg(bumped_count, data);
+ // 64-bit overflow is very unlikely. Saturation to 32-bit values is
+ // performed when reading the counts.
+ add2reg(bumped_count, DataLayout::counter_increment);
+ z_stg(bumped_count, data); // Store back out
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+ bind(profile_continue);
+ }
+}
+
+// Kills Z_R1_scratch.
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the not taken count.
+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Z_R1_scratch);
+
+ // The method data pointer needs to be updated to correspond to
+ // the next bytecode.
+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+ bind(profile_continue);
+ }
+}
+
+// Kills: Z_R1_scratch.
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+ Register mdp,
+ Register reg2,
+ bool receiver_can_be_null) {
+ if (ProfileInterpreter) {
+ NearLabel profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ NearLabel skip_receiver_profile;
+ if (receiver_can_be_null) {
+ NearLabel not_null;
+ compareU64_and_branch(receiver, (intptr_t)0L, bcondNotEqual, not_null);
+ // We are making a call. Increment the count for null receiver.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+ z_bru(skip_receiver_profile);
+ bind(not_null);
+ }
+
+ // Record the receiver type.
+ record_klass_in_profile(receiver, mdp, reg2, true);
+ bind(skip_receiver_profile);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+ bind(profile_continue);
+ }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows. At the same time, it remembers
+// the location of the first empty row. (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree. Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+ Register receiver, Register mdp,
+ Register reg2, int start_row,
+ Label& done, bool is_virtual_call) {
+ if (TypeProfileWidth == 0) {
+ if (is_virtual_call) {
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+ }
+ return;
+ }
+
+ int last_row = VirtualCallData::row_limit() - 1;
+ assert(start_row <= last_row, "must be work left to do");
+ // Test this row for both the receiver and for null.
+ // Take any of three different outcomes:
+ // 1. found receiver => increment count and goto done
+ // 2. found null => keep looking for case 1, maybe allocate this cell
+ // 3. found something else => keep looking for cases 1 and 2
+ // Case 3 is handled by a recursive call.
+ for (int row = start_row; row <= last_row; row++) {
+ NearLabel next_test;
+ bool test_for_null_also = (row == start_row);
+
+ // See if the receiver is receiver[n].
+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+ test_mdp_data_at(mdp, recvr_offset, receiver,
+ (test_for_null_also ? reg2 : noreg),
+ next_test);
+ // (Reg2 now contains the receiver from the CallData.)
+
+ // The receiver is receiver[n]. Increment count[n].
+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+ increment_mdp_data_at(mdp, count_offset);
+ z_bru(done);
+ bind(next_test);
+
+ if (test_for_null_also) {
+ Label found_null;
+ // Failed the equality check on receiver[n]... Test for null.
+ z_ltgr(reg2, reg2);
+ if (start_row == last_row) {
+ // The only thing left to do is handle the null case.
+ if (is_virtual_call) {
+ z_brz(found_null);
+ // Receiver did not match any saved receiver and there is no empty row for it.
+ // Increment total counter to indicate polymorphic case.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+ z_bru(done);
+ bind(found_null);
+ } else {
+ z_brnz(done);
+ }
+ break;
+ }
+ // Since null is rare, make it be the branch-taken case.
+ z_brz(found_null);
+
+ // Put all the "Case 3" tests here.
+ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+ // Found a null. Keep searching for a matching receiver,
+ // but remember that this is an empty (unused) slot.
+ bind(found_null);
+ }
+ }
+
+ // In the fall-through case, we found no matching receiver, but we
+ // observed the receiver[start_row] is NULL.
+
+ // Fill in the receiver field and increment the count.
+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+ set_mdp_data_at(mdp, recvr_offset, receiver);
+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+ load_const_optimized(reg2, DataLayout::counter_increment);
+ set_mdp_data_at(mdp, count_offset, reg2);
+ if (start_row > 0) {
+ z_bru(done);
+ }
+}
+
+// Example state machine code for three profile rows:
+// // main copy of decision tree, rooted at row[1]
+// if (row[0].rec == rec) { row[0].incr(); goto done; }
+// if (row[0].rec != NULL) {
+// // inner copy of decision tree, rooted at row[1]
+// if (row[1].rec == rec) { row[1].incr(); goto done; }
+// if (row[1].rec != NULL) {
+// // degenerate decision tree, rooted at row[2]
+// if (row[2].rec == rec) { row[2].incr(); goto done; }
+// if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
+// row[2].init(rec); goto done;
+// } else {
+// // remember row[1] is empty
+// if (row[2].rec == rec) { row[2].incr(); goto done; }
+// row[1].init(rec); goto done;
+// }
+// } else {
+// // remember row[0] is empty
+// if (row[1].rec == rec) { row[1].incr(); goto done; }
+// if (row[2].rec == rec) { row[2].incr(); goto done; }
+// row[0].init(rec); goto done;
+// }
+// done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+ Register mdp, Register reg2,
+ bool is_virtual_call) {
+ assert(ProfileInterpreter, "must be profiling");
+ Label done;
+
+ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+ bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
+ if (ProfileInterpreter) {
+ NearLabel profile_continue;
+ uint row;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the total ret count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ for (row = 0; row < RetData::row_limit(); row++) {
+ NearLabel next_test;
+
+ // See if return_bci is equal to bci[n]:
+ test_mdp_data_at(mdp,
+ in_bytes(RetData::bci_offset(row)),
+ return_bci, noreg,
+ next_test);
+
+ // Return_bci is equal to bci[n]. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)));
+ z_bru(profile_continue);
+ bind(next_test);
+ }
+
+ update_mdp_for_ret(return_bci);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp, Register tmp) {
+ if (ProfileInterpreter && TypeProfileCasts) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ int count_offset = in_bytes(CounterData::count_offset());
+ // Back up the address, since we have already bumped the mdp.
+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // *Decrement* the counter. We expect to see zero or small negatives.
+ increment_mdp_data_at(mdp, count_offset, tmp, true);
+
+ bind (profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // Record the object type.
+ record_klass_in_profile(klass, mdp, reg2, false);
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the default case count.
+ increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()));
+
+ // The method data pointer needs to be updated.
+ update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()));
+
+ bind(profile_continue);
+ }
+}
+
+// Kills: index, scratch1, scratch2.
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+ Register mdp,
+ Register scratch1,
+ Register scratch2) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+ assert_different_registers(index, mdp, scratch1, scratch2);
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Build the base (index * per_case_size_in_bytes()) +
+ // case_array_offset_in_bytes().
+ z_sllg(index, index, exact_log2(in_bytes(MultiBranchData::per_case_size())));
+ add2reg(index, in_bytes(MultiBranchData::case_array_offset()));
+
+ // Add the calculated base to the mdp -> address of the case' data.
+ Address case_data_addr(mdp, index);
+ Register case_data = scratch1;
+ load_address(case_data, case_data_addr);
+
+ // Update the case count.
+ increment_mdp_data_at(case_data,
+ in_bytes(MultiBranchData::relative_count_offset()),
+ scratch2);
+
+ // The method data pointer needs to be updated.
+ update_mdp_by_offset(mdp,
+ index,
+ in_bytes(MultiBranchData::relative_displacement_offset()));
+
+ bind(profile_continue);
+ }
+}
+
+// kills: R0, R1, flags, loads klass from obj (if not null)
+void InterpreterMacroAssembler::profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done) {
+ NearLabel null_seen, init_klass, do_nothing, do_update;
+
+ // Klass = obj is allowed.
+ const Register tmp = Z_R1;
+ assert_different_registers(obj, mdo_addr.base(), tmp, Z_R0);
+ assert_different_registers(klass, mdo_addr.base(), tmp, Z_R0);
+
+ z_lg(tmp, mdo_addr);
+ if (cmp_done) {
+ z_brz(null_seen);
+ } else {
+ compareU64_and_branch(obj, (intptr_t)0, Assembler::bcondEqual, null_seen);
+ }
+
+ verify_oop(obj);
+ load_klass(klass, obj);
+
+ // Klass seen before, nothing to do (regardless of unknown bit).
+ z_lgr(Z_R0, tmp);
+ assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction");
+ z_nill(Z_R0, TypeEntries::type_klass_mask & 0xFFFF);
+ compareU64_and_branch(Z_R0, klass, Assembler::bcondEqual, do_nothing);
+
+ // Already unknown. Nothing to do anymore.
+ z_tmll(tmp, TypeEntries::type_unknown);
+ z_brc(Assembler::bcondAllOne, do_nothing);
+
+ z_lgr(Z_R0, tmp);
+ assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction");
+ z_nill(Z_R0, TypeEntries::type_mask & 0xFFFF);
+ compareU64_and_branch(Z_R0, (intptr_t)0, Assembler::bcondEqual, init_klass);
+
+ // Different than before. Cannot keep accurate profile.
+ z_oill(tmp, TypeEntries::type_unknown);
+ z_bru(do_update);
+
+ bind(init_klass);
+ // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+ z_ogr(tmp, klass);
+ z_bru(do_update);
+
+ bind(null_seen);
+ // Set null_seen if obj is 0.
+ z_oill(tmp, TypeEntries::null_seen);
+ // fallthru: z_bru(do_update);
+
+ bind(do_update);
+ z_stg(tmp, mdo_addr);
+
+ bind(do_nothing);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+ if (!ProfileInterpreter) {
+ return;
+ }
+
+ assert_different_registers(mdp, callee, tmp);
+
+ if (MethodData::profile_arguments() || MethodData::profile_return()) {
+ Label profile_continue;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+ z_cliy(in_bytes(DataLayout::tag_offset()) - off_to_start, mdp,
+ is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
+ z_brne(profile_continue);
+
+ if (MethodData::profile_arguments()) {
+ NearLabel done;
+ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+ add2reg(mdp, off_to_args);
+
+ for (int i = 0; i < TypeProfileArgsLimit; i++) {
+ if (i > 0 || MethodData::profile_return()) {
+ // If return value type is profiled we may have no argument to profile.
+ z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp);
+ add2reg(tmp, -i*TypeStackSlotEntries::per_arg_count());
+ compare64_and_branch(tmp, TypeStackSlotEntries::per_arg_count(), Assembler::bcondLow, done);
+ }
+ z_lg(tmp, Address(callee, Method::const_offset()));
+ z_lgh(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
+ // Stack offset o (zero based) from the start of the argument
+ // list. For n arguments translates into offset n - o - 1 from
+ // the end of the argument list. But there is an extra slot at
+ // the top of the stack. So the offset is n - o from Lesp.
+ z_sg(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
+ z_sllg(tmp, tmp, Interpreter::logStackElementSize);
+ Address stack_slot_addr(tmp, Z_esp);
+ z_ltg(tmp, stack_slot_addr);
+
+ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+ profile_obj_type(tmp, mdo_arg_addr, tmp, /*ltg did compare to 0*/ true);
+
+ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+ add2reg(mdp, to_add);
+ off_to_args += to_add;
+ }
+
+ if (MethodData::profile_return()) {
+ z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp);
+ add2reg(tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
+ }
+
+ bind(done);
+
+ if (MethodData::profile_return()) {
+ // We're right after the type profile for the last
+ // argument. Tmp is the number of cells left in the
+ // CallTypeData/VirtualCallTypeData to reach its end. Non null
+ // if there's a return to profile.
+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+ z_sllg(tmp, tmp, exact_log2(DataLayout::cell_size));
+ z_agr(mdp, tmp);
+ }
+ z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp);
+ } else {
+ assert(MethodData::profile_return(), "either profile call args or call ret");
+ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+ }
+
+ // Mdp points right after the end of the
+ // CallTypeData/VirtualCallTypeData, right after the cells for the
+ // return value type if there's one.
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+ assert_different_registers(mdp, ret, tmp);
+ if (ProfileInterpreter && MethodData::profile_return()) {
+ Label profile_continue;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ if (MethodData::profile_return_jsr292_only()) {
+ // If we don't profile all invoke bytecodes we must make sure
+ // it's a bytecode we indeed profile. We can't go back to the
+ // beginning of the ProfileData we intend to update to check its
+ // type because we're right after it and we don't known its
+ // length.
+ NearLabel do_profile;
+ Address bc(Z_bcp);
+ z_lb(tmp, bc);
+ compare32_and_branch(tmp, Bytecodes::_invokedynamic, Assembler::bcondEqual, do_profile);
+ compare32_and_branch(tmp, Bytecodes::_invokehandle, Assembler::bcondEqual, do_profile);
+ get_method(tmp);
+ // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit.
+ if (Method::intrinsic_id_size_in_bytes() == 1) {
+ z_cli(Method::intrinsic_id_offset_in_bytes(), tmp, vmIntrinsics::_compiledLambdaForm);
+ } else {
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id");
+ z_lh(tmp, Method::intrinsic_id_offset_in_bytes(), Z_R0, tmp);
+ z_chi(tmp, vmIntrinsics::_compiledLambdaForm);
+ }
+ z_brne(profile_continue);
+
+ bind(do_profile);
+ }
+
+ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+ profile_obj_type(ret, mdo_ret_addr, tmp);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
+ if (ProfileInterpreter && MethodData::profile_parameters()) {
+ Label profile_continue, done;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Load the offset of the area within the MDO used for
+ // parameters. If it's negative we're not profiling any parameters.
+ Address parm_di_addr(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
+ load_and_test_int2long(tmp1, parm_di_addr);
+ z_brl(profile_continue);
+
+ // Compute a pointer to the area for parameters from the offset
+ // and move the pointer to the slot for the last
+ // parameters. Collect profiling from last parameter down.
+ // mdo start + parameters offset + array length - 1
+
+ // Pointer to the parameter area in the MDO.
+ z_agr(mdp, tmp1);
+
+ // Offset of the current profile entry to update.
+ const Register entry_offset = tmp1;
+ // entry_offset = array len in number of cells.
+ z_lg(entry_offset, Address(mdp, ArrayData::array_len_offset()));
+ // entry_offset (number of cells) = array len - size of 1 entry
+ add2reg(entry_offset, -TypeStackSlotEntries::per_arg_count());
+ // entry_offset in bytes
+ z_sllg(entry_offset, entry_offset, exact_log2(DataLayout::cell_size));
+
+ Label loop;
+ bind(loop);
+
+ Address arg_off(mdp, entry_offset, ParametersTypeData::stack_slot_offset(0));
+ Address arg_type(mdp, entry_offset, ParametersTypeData::type_offset(0));
+
+ // Load offset on the stack from the slot for this parameter.
+ z_lg(tmp2, arg_off);
+ z_sllg(tmp2, tmp2, Interpreter::logStackElementSize);
+ z_lcgr(tmp2); // Negate.
+
+ // Profile the parameter.
+ z_ltg(tmp2, Address(Z_locals, tmp2));
+ profile_obj_type(tmp2, arg_type, tmp2, /*ltg did compare to 0*/ true);
+
+ // Go to next parameter.
+ z_aghi(entry_offset, -TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size);
+ z_brnl(loop);
+
+ bind(profile_continue);
+ }
+}
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+ int increment,
+ Address mask,
+ Register scratch,
+ bool preloaded,
+ branch_condition cond,
+ Label *where) {
+ assert_different_registers(counter_addr.base(), scratch);
+ if (preloaded) {
+ add2reg(scratch, increment);
+ reg2mem_opt(scratch, counter_addr, false);
+ } else {
+ if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment) && counter_addr.is_RSYform()) {
+ z_alsi(counter_addr.disp20(), counter_addr.base(), increment);
+ mem2reg_signed_opt(scratch, counter_addr);
+ } else {
+ mem2reg_signed_opt(scratch, counter_addr);
+ add2reg(scratch, increment);
+ reg2mem_opt(scratch, counter_addr, false);
+ }
+ }
+ z_n(scratch, mask);
+ if (where) { z_brc(cond, *where); }
+}
+
+// Get MethodCounters object for given method. Lazily allocated if necessary.
+// method - Ptr to Method object.
+// Rcounters - Ptr to MethodCounters object associated with Method object.
+// skip - Exit point if MethodCounters object can't be created (OOM condition).
+void InterpreterMacroAssembler::get_method_counters(Register Rmethod,
+ Register Rcounters,
+ Label& skip) {
+ assert_different_registers(Rmethod, Rcounters);
+
+ BLOCK_COMMENT("get MethodCounters object {");
+
+ Label has_counters;
+ load_and_test_long(Rcounters, Address(Rmethod, Method::method_counters_offset()));
+ z_brnz(has_counters);
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), Rmethod, false);
+ z_ltgr(Rcounters, Z_RET); // Runtime call returns MethodCounters object.
+ z_brz(skip); // No MethodCounters, out of memory.
+
+ bind(has_counters);
+
+ BLOCK_COMMENT("} get MethodCounters object");
+}
+
+// Increment invocation counter in MethodCounters object.
+// Return (invocation_counter+backedge_counter) as "result" in RctrSum.
+// Counter values are all unsigned.
+void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register RctrSum) {
+ assert(UseCompiler || LogTouchedMethods, "incrementing must be useful");
+ assert_different_registers(Rcounters, RctrSum);
+
+ int increment = InvocationCounter::count_increment;
+ int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset());
+ int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset());
+
+ BLOCK_COMMENT("Increment invocation counter {");
+
+ if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) {
+ // Increment the invocation counter in place,
+ // then add the incremented value to the backedge counter.
+ z_l(RctrSum, be_counter_offset, Rcounters);
+ z_alsi(inv_counter_offset, Rcounters, increment); // Atomic increment @no extra cost!
+ z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits.
+ z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters);
+ } else {
+ // This path is optimized for low register consumption
+ // at the cost of somewhat higher operand delays.
+ // It does not need an extra temp register.
+
+ // Update the invocation counter.
+ z_l(RctrSum, inv_counter_offset, Rcounters);
+ if (RctrSum == Z_R0) {
+ z_ahi(RctrSum, increment);
+ } else {
+ add2reg(RctrSum, increment);
+ }
+ z_st(RctrSum, inv_counter_offset, Rcounters);
+
+ // Mask off the state bits.
+ z_nilf(RctrSum, InvocationCounter::count_mask_value);
+
+ // Add the backedge counter to the updated invocation counter to
+ // form the result.
+ z_al(RctrSum, be_counter_offset, Z_R0, Rcounters);
+ }
+
+ BLOCK_COMMENT("} Increment invocation counter");
+
+ // Note that this macro must leave the backedge_count + invocation_count in Rtmp!
+}
+
+
+// increment backedge counter in MethodCounters object.
+// return (invocation_counter+backedge_counter) as "result" in RctrSum
+// counter values are all unsigned!
+void InterpreterMacroAssembler::increment_backedge_counter(Register Rcounters, Register RctrSum) {
+ assert(UseCompiler, "incrementing must be useful");
+ assert_different_registers(Rcounters, RctrSum);
+
+ int increment = InvocationCounter::count_increment;
+ int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset());
+ int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset());
+
+ BLOCK_COMMENT("Increment backedge counter {");
+
+ if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) {
+ // Increment the invocation counter in place,
+ // then add the incremented value to the backedge counter.
+ z_l(RctrSum, inv_counter_offset, Rcounters);
+ z_alsi(be_counter_offset, Rcounters, increment); // Atomic increment @no extra cost!
+ z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits.
+ z_al(RctrSum, be_counter_offset, Z_R0, Rcounters);
+ } else {
+ // This path is optimized for low register consumption
+ // at the cost of somewhat higher operand delays.
+ // It does not need an extra temp register.
+
+ // Update the invocation counter.
+ z_l(RctrSum, be_counter_offset, Rcounters);
+ if (RctrSum == Z_R0) {
+ z_ahi(RctrSum, increment);
+ } else {
+ add2reg(RctrSum, increment);
+ }
+ z_st(RctrSum, be_counter_offset, Rcounters);
+
+ // Mask off the state bits.
+ z_nilf(RctrSum, InvocationCounter::count_mask_value);
+
+ // Add the backedge counter to the updated invocation counter to
+ // form the result.
+ z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters);
+ }
+
+ BLOCK_COMMENT("} Increment backedge counter");
+
+ // Note that this macro must leave the backedge_count + invocation_count in Rtmp!
+}
+
+// Add an InterpMonitorElem to stack (see frame_s390.hpp).
+void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty,
+ Register Rtemp1,
+ Register Rtemp2,
+ Register Rtemp3) {
+
+ const Register Rcurr_slot = Rtemp1;
+ const Register Rlimit = Rtemp2;
+ const jint delta = -frame::interpreter_frame_monitor_size() * wordSize;
+
+ assert((delta & LongAlignmentMask) == 0,
+ "sizeof BasicObjectLock must be even number of doublewords");
+ assert(2 * wordSize == -delta, "this works only as long as delta == -2*wordSize");
+ assert(Rcurr_slot != Z_R0, "Register must be usable as base register");
+ assert_different_registers(Rlimit, Rcurr_slot, Rtemp3);
+
+ get_monitors(Rlimit);
+
+ // Adjust stack pointer for additional monitor entry.
+ resize_frame(RegisterOrConstant((intptr_t) delta), Z_fp, false);
+
+ if (!stack_is_empty) {
+ // Must copy stack contents down.
+ NearLabel next, done;
+
+ // Rtemp := addr(Tos), Z_esp is pointing below it!
+ add2reg(Rcurr_slot, wordSize, Z_esp);
+
+ // Nothing to do, if already at monitor area.
+ compareU64_and_branch(Rcurr_slot, Rlimit, bcondNotLow, done);
+
+ bind(next);
+
+ // Move one stack slot.
+ mem2reg_opt(Rtemp3, Address(Rcurr_slot));
+ reg2mem_opt(Rtemp3, Address(Rcurr_slot, delta));
+ add2reg(Rcurr_slot, wordSize);
+ compareU64_and_branch(Rcurr_slot, Rlimit, bcondLow, next); // Are we done?
+
+ bind(done);
+ // Done copying stack.
+ }
+
+ // Adjust expression stack and monitor pointers.
+ add2reg(Z_esp, delta);
+ add2reg(Rlimit, delta);
+ save_monitors(Rlimit);
+}
+
+// Note: Index holds the offset in bytes afterwards.
+// You can use this to store a new value (with Llocals as the base).
+void InterpreterMacroAssembler::access_local_int(Register index, Register dst) {
+ z_sllg(index, index, LogBytesPerWord);
+ mem2reg_opt(dst, Address(Z_locals, index), false);
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+ if (state == atos) { MacroAssembler::verify_oop(reg); }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+// InterpreterRuntime::post_method_entry();
+// }
+
+void InterpreterMacroAssembler::notify_method_entry() {
+
+ // JVMTI
+ // Whenever JVMTI puts a thread in interp_only_mode, method
+ // entry/exit events are sent for that thread to track stack
+ // depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (JvmtiExport::can_post_interpreter_events()) {
+ Label jvmti_post_done;
+ MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+ z_bre(jvmti_post_done);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry), /*check_exceptions=*/false);
+ bind(jvmti_post_done);
+ }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+// if (!native_method) save result
+// InterpreterRuntime::post_method_exit();
+// if (!native_method) restore result
+// }
+// if (DTraceMethodProbes) {
+// SharedRuntime::dtrace_method_exit(thread, method);
+// }
+//
+// For native methods their result is stored in z_ijava_state.lresult
+// and z_ijava_state.fresult before coming here.
+// Java methods have their result stored in the expression stack.
+//
+// Notice the dependency to frame::interpreter_frame_result().
+void InterpreterMacroAssembler::notify_method_exit(bool native_method,
+ TosState state,
+ NotifyMethodExitMode mode) {
+ // JVMTI
+ // Whenever JVMTI puts a thread in interp_only_mode, method
+ // entry/exit events are sent for that thread to track stack
+ // depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ Label jvmti_post_done;
+ MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+ z_bre(jvmti_post_done);
+ if (!native_method) push(state); // see frame::interpreter_frame_result()
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), /*check_exceptions=*/false);
+ if (!native_method) pop(state);
+ bind(jvmti_post_done);
+ }
+
+#if 0
+ // Dtrace currently not supported on z/Architecture.
+ {
+ SkipIfEqual skip(this, &DTraceMethodProbes, false);
+ push(state);
+ get_method(c_rarg1);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+ r15_thread, c_rarg1);
+ pop(state);
+ }
+#endif
+}
+
+void InterpreterMacroAssembler::skip_if_jvmti_mode(Label &Lskip, Register Rscratch) {
+ if (!JvmtiExport::can_post_interpreter_events()) {
+ return;
+ }
+
+ load_and_test_int(Rscratch, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+ z_brnz(Lskip);
+
+}
+
+// Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP.
+// The return pc is loaded into the register return_pc.
+//
+// Registers updated:
+// return_pc - The return pc of the calling frame.
+// tmp1, tmp2 - scratch
+void InterpreterMacroAssembler::pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2) {
+ // F0 Z_SP -> caller_sp (F1's)
+ // ...
+ // sender_sp (F1's)
+ // ...
+ // F1 Z_fp -> caller_sp (F2's)
+ // return_pc (Continuation after return from F0.)
+ // ...
+ // F2 caller_sp
+
+ // Remove F0's activation. Restoring Z_SP to sender_sp reverts modifications
+ // (a) by a c2i adapter and (b) by generate_fixed_frame().
+ // In case (a) the new top frame F1 is an unextended compiled frame.
+ // In case (b) F1 is converted from PARENT_IJAVA_FRAME to TOP_IJAVA_FRAME.
+
+ // Case (b) seems to be redundant when returning to a interpreted caller,
+ // because then the caller's top_frame_sp is installed as sp (see
+ // TemplateInterpreterGenerator::generate_return_entry_for ()). But
+ // pop_interpreter_frame() is also used in exception handling and there the
+ // frame type of the caller is unknown, therefore top_frame_sp cannot be used,
+ // so it is important that sender_sp is the caller's sp as TOP_IJAVA_FRAME.
+
+ Register R_f1_sender_sp = tmp1;
+ Register R_f2_sp = tmp2;
+
+ // Tirst check the for the interpreter frame's magic.
+ asm_assert_ijava_state_magic(R_f2_sp/*tmp*/);
+ z_lg(R_f2_sp, _z_parent_ijava_frame_abi(callers_sp), Z_fp);
+ z_lg(R_f1_sender_sp, _z_ijava_state_neg(sender_sp), Z_fp);
+ if (return_pc->is_valid())
+ z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+ // Pop F0 by resizing to R_f1_sender_sp and using R_f2_sp as fp.
+ resize_frame_absolute(R_f1_sender_sp, R_f2_sp, false/*load fp*/);
+
+#ifdef ASSERT
+ // The return_pc in the new top frame is dead... at least that's my
+ // current understanding; to assert this I overwrite it.
+ load_const_optimized(Z_ARG3, 0xb00b1);
+ z_stg(Z_ARG3, _z_parent_ijava_frame_abi(return_pc), Z_SP);
+#endif
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
+ if (VerifyFPU) {
+ unimplemented("verfiyFPU");
+ }
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
+#define CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "interpreter/invocationCounter.hpp"
+
+// This file specializes the assember with interpreter-specific macros.
+
+class InterpreterMacroAssembler: public MacroAssembler {
+
+ protected:
+ // Interpreter specific version of call_VM_base().
+ virtual void call_VM_leaf_base(address entry_point);
+ virtual void call_VM_leaf_base(address entry_point, bool allow_relocation);
+
+ virtual void call_VM_base(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ bool check_exceptions);
+ virtual void call_VM_base(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ bool allow_relocation,
+ bool check_exceptions);
+
+ virtual void check_and_handle_popframe(Register java_thread);
+ virtual void check_and_handle_earlyret(Register java_thread);
+
+ // Base routine for all dispatches.
+ void dispatch_base(TosState state, address* table);
+
+ public:
+ InterpreterMacroAssembler(CodeBuffer* c)
+ : MacroAssembler(c) {}
+
+ void jump_to_entry(address entry, Register Rscratch);
+
+ virtual void load_earlyret_value(TosState state);
+
+ static const Address l_tmp;
+ static const Address d_tmp;
+
+ // Handy address generation macros.
+#define thread_(field_name) Address(Z_thread, JavaThread::field_name ## _offset())
+#define method_(field_name) Address(Z_method, Method::field_name ## _offset())
+#define method2_(Rmethod, field_name) Address(Rmethod, Method::field_name ## _offset())
+
+ // Helper routine for frame allocation/deallocation.
+ // Compute the delta by which the caller's SP has to
+ // be adjusted to accomodate for the non-argument locals.
+ void compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta);
+
+ // dispatch routines
+ void dispatch_prolog(TosState state, int step = 0);
+ void dispatch_epilog(TosState state, int step = 0);
+ void dispatch_only(TosState state);
+ // Dispatch normal table via Z_bytecode (assume Z_bytecode is loaded already).
+ void dispatch_only_normal(TosState state);
+ void dispatch_normal(TosState state);
+ void dispatch_next(TosState state, int step = 0);
+ void dispatch_next_noverify_oop(TosState state, int step = 0);
+ void dispatch_via(TosState state, address* table);
+
+ // Jump to an invoked target.
+ void prepare_to_jump_from_interpreted(Register method);
+ void jump_from_interpreted(Register method, Register temp);
+
+ // Removes the current activation (incl. unlocking of monitors).
+ // Additionally this code is used for earlyReturn in which case we
+ // want to skip throwing an exception and installing an exception.
+ void remove_activation(TosState state,
+ Register return_pc,
+ bool throw_monitor_exception = true,
+ bool install_monitor_exception = true,
+ bool notify_jvmti = true);
+
+ public:
+ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls.
+ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+ void super_call_VM(Register thread_cache, Register oop_result, Register last_java_sp,
+ address entry_point, Register arg_1, Register arg_2, bool check_exception = true);
+
+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+ // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3.
+ void gen_subtype_check(Register sub_klass, Register super_klass, Register tmp1, Register tmp2, Label &ok_is_subtype);
+
+ void get_cache_and_index_at_bcp(Register cache, Register cpe_offset, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register cpe_offset, Register bytecode,
+ int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+ void load_resolved_reference_at_index(Register result, Register index);
+
+ // Pop topmost element from stack. It just disappears. Useful if
+ // consumed previously by access via stackTop().
+ void popx(int len);
+ void pop_i() { popx(1); }
+ void pop_ptr() { popx(1); }
+ void pop_l() { popx(2); }
+ void pop_f() { popx(1); }
+ void pop_d() { popx(2); }
+ // Get Address object of stack top. No checks. No pop.
+ // Purpose: provide address of stack operand to exploit reg-mem operations.
+ // Avoid RISC-like mem2reg - reg-reg-op sequence.
+ Address stackTop();
+
+ // Helpers for expression stack.
+ void pop_i( Register r);
+ void pop_ptr( Register r);
+ void pop_l( Register r);
+ void pop_f(FloatRegister f);
+ void pop_d(FloatRegister f);
+
+ void push_i( Register r = Z_tos);
+ void push_ptr( Register r = Z_tos);
+ void push_l( Register r = Z_tos);
+ void push_f(FloatRegister f = Z_ftos);
+ void push_d(FloatRegister f = Z_ftos);
+
+ // Helpers for swap and dup.
+ void load_ptr(int n, Register val);
+ void store_ptr(int n, Register val);
+
+ void pop (TosState state); // transition vtos -> state
+ void push(TosState state); // transition state -> vtos
+ void empty_expression_stack(void);
+
+#ifdef ASSERT
+ void verify_sp(Register Rsp, Register Rtemp);
+ void verify_esp(Register Resp, Register Rtemp); // Verify that Resp points to a word in the operand stack.
+#endif // ASSERT
+
+ public:
+ void if_cmp(Condition cc, bool ptr_compare);
+
+ // Accessors to the template interpreter state.
+
+ void asm_assert_ijava_state_magic(Register tmp) PRODUCT_RETURN;
+
+ void save_bcp();
+
+ void restore_bcp();
+
+ void save_esp();
+
+ void restore_esp();
+
+ void get_monitors(Register reg);
+
+ void save_monitors(Register reg);
+
+ void get_mdp(Register mdp);
+
+ void save_mdp(Register mdp);
+
+ // Values that are only read (besides initialization).
+ void restore_locals();
+
+ void get_method(Register reg);
+
+ // Load values from bytecode stream:
+
+ enum signedOrNot { Signed, Unsigned };
+ enum setCCOrNot { set_CC, dont_set_CC };
+
+ void get_2_byte_integer_at_bcp(Register Rdst,
+ int bcp_offset,
+ signedOrNot is_signed );
+
+ void get_4_byte_integer_at_bcp(Register Rdst,
+ int bcp_offset,
+ setCCOrNot should_set_CC = dont_set_CC);
+
+ // common code
+
+ void field_offset_at(int n, Register tmp, Register dest, Register base);
+ int field_offset_at(Register object, address bcp, int offset);
+ void fast_iaaccess(int n, address bcp);
+ void fast_iaputfield(address bcp, bool do_store_check);
+
+ void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
+ void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res);
+
+ void get_constant_pool(Register Rdst);
+ void get_constant_pool_cache(Register Rdst);
+ void get_cpool_and_tags(Register Rcpool, Register Rtags);
+ void is_a(Label& L);
+
+
+ // --------------------------------------------------
+
+ void unlock_if_synchronized_method(TosState state, bool throw_monitor_exception = true, bool install_monitor_exception = true);
+
+ void add_monitor_to_stack(bool stack_is_empty,
+ Register Rtemp,
+ Register Rtemp2,
+ Register Rtemp3);
+
+ void access_local_int(Register index, Register dst);
+ void access_local_ptr(Register index, Register dst);
+ void access_local_long(Register index, Register dst);
+ void access_local_float(Register index, FloatRegister dst);
+ void access_local_double(Register index, FloatRegister dst);
+#ifdef ASSERT
+ void check_for_regarea_stomp(Register Rindex, int offset, Register Rlimit, Register Rscratch, Register Rscratch1);
+#endif // ASSERT
+ void store_local_int(Register index, Register src);
+ void store_local_ptr(Register index, Register src);
+ void store_local_long(Register index, Register src);
+ void store_local_float(Register index, FloatRegister src);
+ void store_local_double(Register index, FloatRegister src);
+
+
+ Address first_local_in_stack();
+ static int top_most_monitor_byte_offset(); // Offset in bytes to top of monitor block.
+ Address top_most_monitor();
+ void compute_stack_base(Register Rdest);
+
+ enum LoadOrStore { load, store };
+ void static_iload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
+ void static_aload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
+ void static_dload_or_store(int which_local, LoadOrStore direction);
+
+ void static_iinc( int which_local, jint increment, Register Rtmp, Register Rtmp2);
+
+ void get_method_counters(Register Rmethod, Register Rcounters, Label& skip);
+ void increment_invocation_counter(Register Rcounters, Register RctrSum);
+ void increment_backedge_counter(Register Rcounters, Register RctrSum);
+ void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp);
+
+ void record_static_call_in_profile(Register Rentry, Register Rtmp);
+ void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp);
+
+ // Object locking
+ void lock_object (Register lock_reg, Register obj_reg);
+ void unlock_object(Register lock_reg, Register obj_reg=noreg);
+
+ // Interpreter profiling operations
+ void set_method_data_pointer_for_bcp();
+ void test_method_data_pointer(Register mdp, Label& zero_continue);
+ void verify_method_data_pointer();
+
+ void set_mdp_data_at(Register mdp_in, int constant, Register value);
+ void increment_mdp_data_at(Register mdp_in, int constant,
+ Register tmp = Z_R1_scratch, bool decrement = false);
+ void increment_mask_and_jump(Address counter_addr,
+ int increment, Address mask,
+ Register scratch, bool preloaded,
+ branch_condition cond, Label* where);
+ void set_mdp_flag_at(Register mdp_in, int flag_constant);
+ void test_mdp_data_at(Register mdp_in, int offset, Register value,
+ Register test_value_out,
+ Label& not_equal_continue);
+
+ void record_klass_in_profile(Register receiver, Register mdp,
+ Register reg2, bool is_virtual_call);
+ void record_klass_in_profile_helper(Register receiver, Register mdp,
+ Register reg2, int start_row,
+ Label& done, bool is_virtual_call);
+
+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+ void update_mdp_by_offset(Register mdp_in, Register dataidx, int offset_of_disp);
+ void update_mdp_by_constant(Register mdp_in, int constant);
+ void update_mdp_for_ret(Register return_bci);
+
+ void profile_taken_branch(Register mdp, Register bumped_count);
+ void profile_not_taken_branch(Register mdp);
+ void profile_call(Register mdp);
+ void profile_final_call(Register mdp);
+ void profile_virtual_call(Register receiver, Register mdp,
+ Register scratch2,
+ bool receiver_can_be_null = false);
+ void profile_ret(Register return_bci, Register mdp);
+ void profile_null_seen(Register mdp);
+ void profile_typecheck(Register mdp, Register klass, Register scratch);
+ void profile_typecheck_failed(Register mdp, Register tmp);
+ void profile_switch_default(Register mdp);
+ void profile_switch_case(Register index_in_scratch, Register mdp,
+ Register scratch1, Register scratch2);
+
+ void profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done = false);
+ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+ void profile_return_type(Register mdp, Register ret, Register tmp);
+ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
+
+ // Debugging
+ void verify_oop(Register reg, TosState state = atos); // Only if +VerifyOops && state == atos.
+ void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
+ void verify_FPU(int stack_depth, TosState state = ftos);
+
+ // JVMTI helpers
+ void skip_if_jvmti_mode(Label &Lskip, Register Rscratch = Z_R0);
+
+ // support for JVMTI/Dtrace
+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+ void notify_method_entry();
+ void notify_method_exit(bool native_method, TosState state, NotifyMethodExitMode mode);
+
+ // Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP.
+ // The return pc is loaded into the Register return_pc.
+ void pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2);
+};
+
+#endif // CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+// Access macros for Java and C arguments.
+// First Java argument is at index-1.
+#define locals_j_arg_at(index) Address(Z_R1/*locals*/, in_ByteSize((-(index)*wordSize)))
+
+#define __ _masm->
+
+static int sp_c_int_arg_offset(int arg_nr, int fp_arg_nr) {
+ int int_arg_nr = arg_nr-fp_arg_nr;
+
+ // arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0
+ if (int_arg_nr < 5) {
+ return int_arg_nr * wordSize + _z_abi(carg_1);
+ }
+ int offset = int_arg_nr - 5 + (fp_arg_nr > 4 ? fp_arg_nr - 4 : 0);
+ return offset * wordSize + _z_abi(remaining_cargs);
+}
+
+static int sp_c_fp_arg_offset(int arg_nr, int fp_arg_nr) {
+ int int_arg_nr = arg_nr-fp_arg_nr;
+
+ // Arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0.
+ if (fp_arg_nr < 5) {
+ return (fp_arg_nr - 1 ) * wordSize + _z_abi(cfarg_1);
+ }
+ int offset = fp_arg_nr - 5 + (int_arg_nr > 4 ? int_arg_nr - 4 : 0);
+ return offset * wordSize + _z_abi(remaining_cargs);
+}
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+ int int_arg_nr = jni_offset() - _fp_arg_nr;
+ Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+ as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+ __ z_lgf(r, locals_j_arg_at(offset()));
+ if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+ __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+ int int_arg_nr = jni_offset() - _fp_arg_nr;
+ Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+ as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+ __ z_lg(r, locals_j_arg_at(offset() + 1)); // Long resides in upper slot.
+ if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+ __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+ FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ?
+ as_FloatRegister((_fp_arg_nr * 2) + Z_FARG1->encoding()) : Z_F1;
+ _fp_arg_nr++;
+ __ z_ley(fp_reg, locals_j_arg_at(offset()));
+ if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) {
+ __ z_ste(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr) + 4, Z_SP);
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+ FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ?
+ as_FloatRegister((_fp_arg_nr*2) + Z_FARG1->encoding()) : Z_F1;
+ _fp_arg_nr++;
+ __ z_ldy(fp_reg, locals_j_arg_at(offset()+1));
+ if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) {
+ __ z_std(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+ int int_arg_nr = jni_offset() - _fp_arg_nr;
+ Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ?
+ as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0;
+
+ // The handle for a receiver will never be null.
+ bool do_NULL_check = offset() != 0 || is_static();
+
+ Label do_null;
+ if (do_NULL_check) {
+ __ clear_reg(r, true, false);
+ __ load_and_test_long(Z_R0, locals_j_arg_at(offset()));
+ __ z_bre(do_null);
+ }
+ __ add2reg(r, -offset() * wordSize, Z_R1 /* locals */);
+ __ bind(do_null);
+ if (DEBUG_ONLY(true ||) int_arg_nr >= 5) {
+ __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP);
+ }
+}
+
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+ __ z_lgr(Z_R1, Z_ARG1); // Z_R1 is used in locals_j_arg_at(index) macro.
+
+ // Generate code to handle arguments.
+ iterate(fingerprint);
+ __ load_const_optimized(Z_RET, AbstractInterpreter::result_handler(method()->result_type()));
+ __ z_br(Z_R14);
+ __ flush();
+}
+
+#undef __
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method))
+ methodHandle m(thread, method);
+ assert(m->is_native(), "sanity check");
+ Symbol *s = m->signature();
+ return (address) s->base();
+IRT_END
+
+IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method))
+ methodHandle m(thread, method);
+ assert(m->is_native(), "sanity check");
+ return AbstractInterpreter::result_handler(m->result_type());
+IRT_END
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_INTERPRETERRT_S390_HPP
+#define CPU_S390_VM_INTERPRETERRT_S390_HPP
+
+#include "memory/allocation.hpp"
+
+static int binary_search(int key, LookupswitchPair* array, int n);
+
+static address iload (JavaThread* thread);
+static address aload (JavaThread* thread);
+static address istore(JavaThread* thread);
+static address astore(JavaThread* thread);
+static address iinc (JavaThread* thread);
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+ MacroAssembler* _masm;
+ int _fp_arg_nr;
+
+ void pass_int();
+ void pass_long();
+ void pass_double();
+ void pass_float();
+ void pass_object();
+
+ public:
+ // creation
+ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+ _masm = new MacroAssembler(buffer);
+ _fp_arg_nr = 0;
+ }
+
+ // code generation
+ void generate(uint64_t fingerprint);
+};
+
+static address get_result_handler(JavaThread* thread, Method* method);
+
+static address get_signature(JavaThread* thread, Method* method);
+
+#endif // CPU_S390_VM_INTERPRETERRT_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
+#define CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
+
+ public:
+
+ // Each arch must define reset, save, restore.
+ // These are used by objects that only care about:
+ // 1 - initializing a new state (thread creation, javaCalls)
+ // 2 - saving a current state (javaCalls)
+ // 3 - restoring an old state (javaCalls).
+
+ inline void clear(void) {
+ // Clearing _last_Java_sp must be first.
+ OrderAccess::release();
+ _last_Java_sp = NULL;
+ // Fence?
+ OrderAccess::fence();
+
+ _last_Java_pc = NULL;
+ }
+
+ inline void set(intptr_t* sp, address pc) {
+ _last_Java_pc = pc;
+
+ OrderAccess::release();
+ _last_Java_sp = sp;
+ }
+
+ void copy(JavaFrameAnchor* src) {
+ // In order to make sure the transition state is valid for "this"
+ // we must clear _last_Java_sp before copying the rest of the new data.
+ // Hack Alert: Temporary bugfix for 4717480/4721647
+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+ // unless the value is changing.
+ //
+ if (_last_Java_sp != src->_last_Java_sp) {
+ OrderAccess::release();
+ _last_Java_sp = NULL;
+ OrderAccess::fence();
+ }
+ _last_Java_pc = src->_last_Java_pc;
+ // Must be last so profiler will always see valid frame if has_last_frame() is true.
+
+ OrderAccess::release();
+ _last_Java_sp = src->_last_Java_sp;
+ }
+
+ // We don't have to flush registers, so the stack is always walkable.
+ inline bool walkable(void) { return true; }
+ inline void make_walkable(JavaThread* thread) { }
+
+ public:
+
+ // We don't have a frame pointer.
+ intptr_t* last_Java_fp(void) { return NULL; }
+
+ intptr_t* last_Java_sp() const { return _last_Java_sp; }
+ void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; }
+
+ address last_Java_pc(void) { return _last_Java_pc; }
+
+#endif // CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+
+// TSO ensures that loads are blocking and ordered with respect to
+// to earlier loads, so we don't need LoadLoad membars.
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*sizeof(jint)
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+ // Don't use fast jni accessors.
+ return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+ return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+ return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+ return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+ return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+ return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+ // Don't use fast jni accessors.
+ return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
+ // Don't use fast jni accessors.
+ return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+ return generate_fast_get_float_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+ return generate_fast_get_float_field0(T_DOUBLE);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jniTypes_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_JNITYPES_S390_HPP
+#define CPU_S390_VM_JNITYPES_S390_HPP
+
+// This file holds platform-dependent routines used to write primitive
+// jni types to the array of arguments passed into JavaCalls::call.
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+class JNITypes : AllStatic {
+ // These functions write a java primitive type (in native format) to
+ // a java stack slot array to be passed as an argument to
+ // JavaCalls:calls. I.e., they are functionally 'push' operations
+ // if they have a 'pos' formal parameter. Note that jlongs and
+ // jdoubles are written _in reverse_ of the order in which they
+ // appear in the interpreter stack. This is because call stubs (see
+ // stubGenerator_s390.cpp) reverse the argument list constructed by
+ // JavaCallArguments (see javaCalls.hpp).
+
+ public:
+ // Ints are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_int(jint from, intptr_t *to) {
+ *(jint*) to = from;
+ }
+
+ static inline void put_int(jint from, intptr_t *to, int& pos) {
+ *(jint*) (to + pos++) = from;
+ }
+
+ static inline void put_int(jint *from, intptr_t *to, int& pos) {
+ *(jint*) (to + pos++) = *from;
+ }
+
+ // Longs are stored in native format in one JavaCallArgument slot at *(to+1).
+ static inline void put_long(jlong from, intptr_t *to) {
+ *(jlong*) (to + 1) = from;
+ }
+
+ static inline void put_long(jlong from, intptr_t *to, int& pos) {
+ *(jlong*) (to + 1 + pos) = from;
+ pos += 2;
+ }
+
+ static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+ *(jlong*) (to + 1 + pos) = *from;
+ pos += 2;
+ }
+
+ // Oops are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_obj(oop from, intptr_t *to) {
+ *(oop*) to = from;
+ }
+
+ static inline void put_obj(oop from, intptr_t *to, int& pos) {
+ *(oop*) (to + pos++) = from;
+ }
+
+ static inline void put_obj(oop *from, intptr_t *to, int& pos) {
+ *(oop*) (to + pos++) = *from;
+ }
+
+ // Floats are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_float(jfloat from, intptr_t *to) {
+ *(jfloat*) to = from;
+ }
+
+ static inline void put_float(jfloat from, intptr_t *to, int& pos) {
+ *(jfloat*) (to + pos++) = from;
+ }
+
+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) {
+ *(jfloat*) (to + pos++) = *from;
+ }
+
+ // Doubles are stored in native word format in one JavaCallArgument
+ // slot at *(to+1).
+ static inline void put_double(jdouble from, intptr_t *to) {
+ *(jdouble*) (to + 1) = from;
+ }
+
+ static inline void put_double(jdouble from, intptr_t *to, int& pos) {
+ *(jdouble*) (to + 1 + pos) = from;
+ pos += 2;
+ }
+
+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+ *(jdouble*) (to + 1 + pos) = *from;
+ pos += 2;
+ }
+
+ // The get_xxx routines, on the other hand, actually _do_ fetch
+ // java primitive types from the interpreter stack.
+ // No need to worry about alignment on z/Architecture.
+ static inline jint get_int(intptr_t *from) {
+ return *(jint*) from;
+ }
+
+ static inline jlong get_long(intptr_t *from) {
+ return *(jlong*) (from + 1);
+ }
+
+ static inline oop get_obj(intptr_t *from) {
+ return *(oop*) from;
+ }
+
+ static inline jfloat get_float(intptr_t *from) {
+ return *(jfloat*) from;
+ }
+
+ static inline jdouble get_double(intptr_t *from) {
+ return *(jdouble*) (from + 1);
+ }
+};
+
+#endif // CPU_S390_VM_JNITYPES_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jni_s390.h Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+ #define JNIEXPORT __attribute__((visibility("default")))
+ #define JNIIMPORT __attribute__((visibility("default")))
+#else
+ #define JNIEXPORT
+ #define JNIIMPORT
+#endif
+
+#define JNICALL
+
+typedef int jint;
+
+typedef long int jlong;
+
+typedef signed char jbyte;
+
+#endif // _JAVASOFT_JNI_MD_H_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// JVMCI (JEP 243):
+// So far, the JVMCI is not supported/implemented on SAP platforms.
+// This file just serves as a placeholder which may be filled with life
+// should the JVMCI ever be implemented.
+#if INCLUDE_JVMCI
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+ Unimplemented();
+ return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+ Unimplemented();
+}
+
+// Convert JVMCI register indices (as used in oop maps) to HotSpot registers.
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+ return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+ return false;
+}
+
+#endif // INLCUDE_JVMCI
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,6691 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/codeBuffer.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "oops/klass.inline.hpp"
+#include "opto/compile.hpp"
+#include "opto/intrinsicnode.hpp"
+#include "opto/matcher.hpp"
+#include "prims/methodHandles.hpp"
+#include "registerSaver_s390.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/events.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+#include <ucontext.h>
+
+#define BLOCK_COMMENT(str) block_comment(str)
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Move 32-bit register if destination and source are different.
+void MacroAssembler::lr_if_needed(Register rd, Register rs) {
+ if (rs != rd) { z_lr(rd, rs); }
+}
+
+// Move register if destination and source are different.
+void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
+ if (rs != rd) { z_lgr(rd, rs); }
+}
+
+// Zero-extend 32-bit register into 64-bit register if destination and source are different.
+void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
+ if (rs != rd) { z_llgfr(rd, rs); }
+}
+
+// Move float register if destination and source are different.
+void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
+ if (rs != rd) { z_ldr(rd, rs); }
+}
+
+// Move integer register if destination and source are different.
+// It is assumed that shorter-than-int types are already
+// appropriately sign-extended.
+void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
+ BasicType src_type) {
+ assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
+ assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
+
+ if (dst_type == src_type) {
+ lgr_if_needed(dst, src); // Just move all 64 bits.
+ return;
+ }
+
+ switch (dst_type) {
+ // Do not support these types for now.
+ // case T_BOOLEAN:
+ case T_BYTE: // signed byte
+ switch (src_type) {
+ case T_INT:
+ z_lgbr(dst, src);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ return;
+
+ case T_CHAR:
+ case T_SHORT:
+ switch (src_type) {
+ case T_INT:
+ if (dst_type == T_CHAR) {
+ z_llghr(dst, src);
+ } else {
+ z_lghr(dst, src);
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ return;
+
+ case T_INT:
+ switch (src_type) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ case T_CHAR:
+ case T_SHORT:
+ case T_INT:
+ case T_LONG:
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_VOID:
+ case T_ADDRESS:
+ lr_if_needed(dst, src);
+ // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug).
+ return;
+
+ default:
+ assert(false, "non-integer src type");
+ return;
+ }
+ case T_LONG:
+ switch (src_type) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ case T_CHAR:
+ case T_SHORT:
+ case T_INT:
+ z_lgfr(dst, src); // sign extension
+ return;
+
+ case T_LONG:
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_VOID:
+ case T_ADDRESS:
+ lgr_if_needed(dst, src);
+ return;
+
+ default:
+ assert(false, "non-integer src type");
+ return;
+ }
+ return;
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_VOID:
+ case T_ADDRESS:
+ switch (src_type) {
+ // These types don't make sense to be converted to pointers:
+ // case T_BOOLEAN:
+ // case T_BYTE:
+ // case T_CHAR:
+ // case T_SHORT:
+
+ case T_INT:
+ z_llgfr(dst, src); // zero extension
+ return;
+
+ case T_LONG:
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_VOID:
+ case T_ADDRESS:
+ lgr_if_needed(dst, src);
+ return;
+
+ default:
+ assert(false, "non-integer src type");
+ return;
+ }
+ return;
+ default:
+ assert(false, "non-integer dst type");
+ return;
+ }
+}
+
+// Move float register if destination and source are different.
+void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
+ FloatRegister src, BasicType src_type) {
+ assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
+ assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
+ if (dst_type == src_type) {
+ ldr_if_needed(dst, src); // Just move all 64 bits.
+ } else {
+ switch (dst_type) {
+ case T_FLOAT:
+ assert(src_type == T_DOUBLE, "invalid float type combination");
+ z_ledbr(dst, src);
+ return;
+ case T_DOUBLE:
+ assert(src_type == T_FLOAT, "invalid float type combination");
+ z_ldebr(dst, src);
+ return;
+ default:
+ assert(false, "non-float dst type");
+ return;
+ }
+ }
+}
+
+// Optimized emitter for reg to mem operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
+// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::freg2mem_opt(FloatRegister reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+ Register scratch) {
+ index = (index == noreg) ? Z_R0 : index;
+ if (Displacement::is_shortDisp(disp)) {
+ (this->*classic)(reg, disp, index, base);
+ } else {
+ if (Displacement::is_validDisp(disp)) {
+ (this->*modern)(reg, disp, index, base);
+ } else {
+ if (scratch != Z_R0 && scratch != Z_R1) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ if (scratch != Z_R0) { // scratch == Z_R1
+ if ((scratch == index) || (index == base)) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ add2reg(scratch, disp, base);
+ (this->*classic)(reg, 0, index, scratch);
+ if (base == scratch) {
+ add2reg(base, -disp); // Restore base.
+ }
+ }
+ } else { // scratch == Z_R0
+ z_lgr(scratch, base);
+ add2reg(base, disp);
+ (this->*classic)(reg, 0, index, base);
+ z_lgr(base, scratch); // Restore base.
+ }
+ }
+ }
+ }
+}
+
+void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
+ if (is_double) {
+ freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
+ } else {
+ freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
+ }
+}
+
+// Optimized emitter for mem to reg operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
+// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::mem2freg_opt(FloatRegister reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+ Register scratch) {
+ index = (index == noreg) ? Z_R0 : index;
+ if (Displacement::is_shortDisp(disp)) {
+ (this->*classic)(reg, disp, index, base);
+ } else {
+ if (Displacement::is_validDisp(disp)) {
+ (this->*modern)(reg, disp, index, base);
+ } else {
+ if (scratch != Z_R0 && scratch != Z_R1) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ if (scratch != Z_R0) { // scratch == Z_R1
+ if ((scratch == index) || (index == base)) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ add2reg(scratch, disp, base);
+ (this->*classic)(reg, 0, index, scratch);
+ if (base == scratch) {
+ add2reg(base, -disp); // Restore base.
+ }
+ }
+ } else { // scratch == Z_R0
+ z_lgr(scratch, base);
+ add2reg(base, disp);
+ (this->*classic)(reg, 0, index, base);
+ z_lgr(base, scratch); // Restore base.
+ }
+ }
+ }
+ }
+}
+
+void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
+ if (is_double) {
+ mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
+ } else {
+ mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
+ }
+}
+
+// Optimized emitter for reg to mem operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) cannot be used as work register.
+//
+// Don't rely on register locking, instead pass a scratch register
+// (Z_R0 by default)
+// CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
+void MacroAssembler::reg2mem_opt(Register reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
+ Register scratch) {
+ index = (index == noreg) ? Z_R0 : index;
+ if (Displacement::is_shortDisp(disp)) {
+ (this->*classic)(reg, disp, index, base);
+ } else {
+ if (Displacement::is_validDisp(disp)) {
+ (this->*modern)(reg, disp, index, base);
+ } else {
+ if (scratch != Z_R0 && scratch != Z_R1) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ if (scratch != Z_R0) { // scratch == Z_R1
+ if ((scratch == index) || (index == base)) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ add2reg(scratch, disp, base);
+ (this->*classic)(reg, 0, index, scratch);
+ if (base == scratch) {
+ add2reg(base, -disp); // Restore base.
+ }
+ }
+ } else { // scratch == Z_R0
+ if ((scratch == reg) || (scratch == base) || (reg == base)) {
+ (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
+ } else {
+ z_lgr(scratch, base);
+ add2reg(base, disp);
+ (this->*classic)(reg, 0, index, base);
+ z_lgr(base, scratch); // Restore base.
+ }
+ }
+ }
+ }
+ }
+}
+
+int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
+ int store_offset = offset();
+ if (is_double) {
+ reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
+ } else {
+ reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
+ }
+ return store_offset;
+}
+
+// Optimized emitter for mem to reg operations.
+// Uses modern instructions if running on modern hardware, classic instructions
+// otherwise. Prefers (usually shorter) classic instructions if applicable.
+// Data register (reg) will be used as work register where possible.
+void MacroAssembler::mem2reg_opt(Register reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
+ index = (index == noreg) ? Z_R0 : index;
+ if (Displacement::is_shortDisp(disp)) {
+ (this->*classic)(reg, disp, index, base);
+ } else {
+ if (Displacement::is_validDisp(disp)) {
+ (this->*modern)(reg, disp, index, base);
+ } else {
+ if ((reg == index) && (reg == base)) {
+ z_sllg(reg, reg, 1);
+ add2reg(reg, disp);
+ (this->*classic)(reg, 0, noreg, reg);
+ } else if ((reg == index) && (reg != Z_R0)) {
+ add2reg(reg, disp);
+ (this->*classic)(reg, 0, reg, base);
+ } else if (reg == base) {
+ add2reg(reg, disp);
+ (this->*classic)(reg, 0, index, reg);
+ } else if (reg != Z_R0) {
+ add2reg(reg, disp, base);
+ (this->*classic)(reg, 0, index, reg);
+ } else { // reg == Z_R0 && reg != base here
+ add2reg(base, disp);
+ (this->*classic)(reg, 0, index, base);
+ add2reg(base, -disp);
+ }
+ }
+ }
+}
+
+void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
+ if (is_double) {
+ z_lg(reg, a);
+ } else {
+ mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
+ }
+}
+
+void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
+ mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
+}
+
+void MacroAssembler::and_imm(Register r, long mask,
+ Register tmp /* = Z_R0 */,
+ bool wide /* = false */) {
+ assert(wide || Immediate::is_simm32(mask), "mask value too large");
+
+ if (!wide) {
+ z_nilf(r, mask);
+ return;
+ }
+
+ assert(r != tmp, " need a different temporary register !");
+ load_const_optimized(tmp, mask);
+ z_ngr(r, tmp);
+}
+
+// Calculate the 1's complement.
+// Note: The condition code is neither preserved nor correctly set by this code!!!
+// Note: (wide == false) does not protect the high order half of the target register
+// from alteration. It only serves as optimization hint for 32-bit results.
+void MacroAssembler::not_(Register r1, Register r2, bool wide) {
+
+ if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
+ z_xilf(r1, -1);
+ if (wide) {
+ z_xihf(r1, -1);
+ }
+ } else { // Distinct src and dst registers.
+ if (VM_Version::has_DistinctOpnds()) {
+ load_const_optimized(r1, -1);
+ z_xgrk(r1, r2, r1);
+ } else {
+ if (wide) {
+ z_lgr(r1, r2);
+ z_xilf(r1, -1);
+ z_xihf(r1, -1);
+ } else {
+ z_lr(r1, r2);
+ z_xilf(r1, -1);
+ }
+ }
+ }
+}
+
+unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
+ assert(lBitPos >= 0, "zero is leftmost bit position");
+ assert(rBitPos <= 63, "63 is rightmost bit position");
+ assert(lBitPos <= rBitPos, "inverted selection interval");
+ return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
+}
+
+// Helper function for the "Rotate_then_<logicalOP>" emitters.
+// Rotate src, then mask register contents such that only bits in range survive.
+// For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
+// For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range.
+// The caller must ensure that the selected range only contains bits with defined value.
+void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
+ assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
+ bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
+ bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G).
+ // Pre-determine which parts of dst will be zero after shift/rotate.
+ bool llZero = sll4rll && (nRotate >= 16);
+ bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
+ bool lfZero = llZero && lhZero;
+ bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
+ bool hhZero = (srl4rll && (nRotate <= -16));
+ bool hfZero = hlZero && hhZero;
+
+ // rotate then mask src operand.
+ // if oneBits == true, all bits outside selected range are 1s.
+ // if oneBits == false, all bits outside selected range are 0s.
+ if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away.
+ if (dst32bit) {
+ z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed.
+ } else {
+ if (sll4rll) { z_sllg(dst, src, nRotate); }
+ else if (srl4rll) { z_srlg(dst, src, -nRotate); }
+ else { z_rllg(dst, src, nRotate); }
+ }
+ } else {
+ if (sll4rll) { z_sllg(dst, src, nRotate); }
+ else if (srl4rll) { z_srlg(dst, src, -nRotate); }
+ else { z_rllg(dst, src, nRotate); }
+ }
+
+ unsigned long range_mask = create_mask(lBitPos, rBitPos);
+ unsigned int range_mask_h = (unsigned int)(range_mask >> 32);
+ unsigned int range_mask_l = (unsigned int)range_mask;
+ unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
+ unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
+ unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
+ unsigned short range_mask_ll = (unsigned short)range_mask;
+ // Works for z9 and newer H/W.
+ if (oneBits) {
+ if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
+ if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
+ } else {
+ // All bits outside range become 0s
+ if (((~range_mask_l) != 0) && !lfZero) {
+ z_nilf(dst, range_mask_l);
+ }
+ if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
+ z_nihf(dst, range_mask_h);
+ }
+ }
+}
+
+// Rotate src, then insert selected range from rotated src into dst.
+// Clear dst before, if requested.
+void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool clear_dst) {
+ // This version does not depend on src being zero-extended int2long.
+ nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
+ z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
+}
+
+// Rotate src, then and selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool test_only) {
+ guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+ // This version does not depend on src being zero-extended int2long.
+ nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
+ z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+// Rotate src, then or selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool test_only) {
+ guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+ // This version does not depend on src being zero-extended int2long.
+ nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
+ z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+// Rotate src, then xor selected range from rotated src into dst.
+// Set condition code only if so requested. Otherwise it is unpredictable.
+// See performance note in macroAssembler_s390.hpp for important information.
+void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool test_only) {
+ guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
+ // This version does not depend on src being zero-extended int2long.
+ nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
+ z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
+}
+
+void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
+ if (inc.is_register()) {
+ z_agr(r1, inc.as_register());
+ } else { // constant
+ intptr_t imm = inc.as_constant();
+ add2reg(r1, imm);
+ }
+}
+// Helper function to multiply the 64bit contents of a register by a 16bit constant.
+// The optimization tries to avoid the mghi instruction, since it uses the FPU for
+// calculation and is thus rather slow.
+//
+// There is no handling for special cases, e.g. cval==0 or cval==1.
+//
+// Returns len of generated code block.
+unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
+ int block_start = offset();
+
+ bool sign_flip = cval < 0;
+ cval = sign_flip ? -cval : cval;
+
+ BLOCK_COMMENT("Reg64*Con16 {");
+
+ int bit1 = cval & -cval;
+ if (bit1 == cval) {
+ z_sllg(rval, rval, exact_log2(bit1));
+ if (sign_flip) { z_lcgr(rval, rval); }
+ } else {
+ int bit2 = (cval-bit1) & -(cval-bit1);
+ if ((bit1+bit2) == cval) {
+ z_sllg(work, rval, exact_log2(bit1));
+ z_sllg(rval, rval, exact_log2(bit2));
+ z_agr(rval, work);
+ if (sign_flip) { z_lcgr(rval, rval); }
+ } else {
+ if (sign_flip) { z_mghi(rval, -cval); }
+ else { z_mghi(rval, cval); }
+ }
+ }
+ BLOCK_COMMENT("} Reg64*Con16");
+
+ int block_end = offset();
+ return block_end - block_start;
+}
+
+// Generic operation r1 := r2 + imm.
+//
+// Should produce the best code for each supported CPU version.
+// r2 == noreg yields r1 := r1 + imm
+// imm == 0 emits either no instruction or r1 := r2 !
+// NOTES: 1) Don't use this function where fixed sized
+// instruction sequences are required!!!
+// 2) Don't use this function if condition code
+// setting is required!
+// 3) Despite being declared as int64_t, the parameter imm
+// must be a simm_32 value (= signed 32-bit integer).
+void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
+ assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
+
+ if (r2 == noreg) { r2 = r1; }
+
+ // Handle special case imm == 0.
+ if (imm == 0) {
+ lgr_if_needed(r1, r2);
+ // Nothing else to do.
+ return;
+ }
+
+ if (!PreferLAoverADD || (r2 == Z_R0)) {
+ bool distinctOpnds = VM_Version::has_DistinctOpnds();
+
+ // Can we encode imm in 16 bits signed?
+ if (Immediate::is_simm16(imm)) {
+ if (r1 == r2) {
+ z_aghi(r1, imm);
+ return;
+ }
+ if (distinctOpnds) {
+ z_aghik(r1, r2, imm);
+ return;
+ }
+ z_lgr(r1, r2);
+ z_aghi(r1, imm);
+ return;
+ }
+ } else {
+ // Can we encode imm in 12 bits unsigned?
+ if (Displacement::is_shortDisp(imm)) {
+ z_la(r1, imm, r2);
+ return;
+ }
+ // Can we encode imm in 20 bits signed?
+ if (Displacement::is_validDisp(imm)) {
+ // Always use LAY instruction, so we don't need the tmp register.
+ z_lay(r1, imm, r2);
+ return;
+ }
+
+ }
+
+ // Can handle it (all possible values) with long immediates.
+ lgr_if_needed(r1, r2);
+ z_agfi(r1, imm);
+}
+
+// Generic operation r := b + x + d
+//
+// Addition of several operands with address generation semantics - sort of:
+// - no restriction on the registers. Any register will do for any operand.
+// - x == noreg: operand will be disregarded.
+// - b == noreg: will use (contents of) result reg as operand (r := r + d).
+// - x == Z_R0: just disregard
+// - b == Z_R0: use as operand. This is not address generation semantics!!!
+//
+// The same restrictions as on add2reg() are valid!!!
+void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
+ assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
+
+ if (x == noreg) { x = Z_R0; }
+ if (b == noreg) { b = r; }
+
+ // Handle special case x == R0.
+ if (x == Z_R0) {
+ // Can simply add the immediate value to the base register.
+ add2reg(r, d, b);
+ return;
+ }
+
+ if (!PreferLAoverADD || (b == Z_R0)) {
+ bool distinctOpnds = VM_Version::has_DistinctOpnds();
+ // Handle special case d == 0.
+ if (d == 0) {
+ if (b == x) { z_sllg(r, b, 1); return; }
+ if (r == x) { z_agr(r, b); return; }
+ if (r == b) { z_agr(r, x); return; }
+ if (distinctOpnds) { z_agrk(r, x, b); return; }
+ z_lgr(r, b);
+ z_agr(r, x);
+ } else {
+ if (x == b) { z_sllg(r, x, 1); }
+ else if (r == x) { z_agr(r, b); }
+ else if (r == b) { z_agr(r, x); }
+ else if (distinctOpnds) { z_agrk(r, x, b); }
+ else {
+ z_lgr(r, b);
+ z_agr(r, x);
+ }
+ add2reg(r, d);
+ }
+ } else {
+ // Can we encode imm in 12 bits unsigned?
+ if (Displacement::is_shortDisp(d)) {
+ z_la(r, d, x, b);
+ return;
+ }
+ // Can we encode imm in 20 bits signed?
+ if (Displacement::is_validDisp(d)) {
+ z_lay(r, d, x, b);
+ return;
+ }
+ z_la(r, 0, x, b);
+ add2reg(r, d);
+ }
+}
+
+// Generic emitter (32bit) for direct memory increment.
+// For optimal code, do not specify Z_R0 as temp register.
+void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
+ if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
+ z_asi(a, imm);
+ } else {
+ z_lgf(tmp, a);
+ add2reg(tmp, imm);
+ z_st(tmp, a);
+ }
+}
+
+void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
+ if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
+ z_agsi(a, imm);
+ } else {
+ z_lg(tmp, a);
+ add2reg(tmp, imm);
+ z_stg(tmp, a);
+ }
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
+ switch (size_in_bytes) {
+ case 8: z_lg(dst, src); break;
+ case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
+ case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
+ case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
+ switch (size_in_bytes) {
+ case 8: z_stg(src, dst); break;
+ case 4: z_st(src, dst); break;
+ case 2: z_sth(src, dst); break;
+ case 1: z_stc(src, dst); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+// Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
+// a high-order summand in register tmp.
+//
+// return value: < 0: No split required, si20 actually has property uimm12.
+// >= 0: Split performed. Use return value as uimm12 displacement and
+// tmp as index register.
+int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
+ assert(Immediate::is_simm20(si20_offset), "sanity");
+ int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive.
+ int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
+ assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
+ !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
+ assert((lg_off+ll_off) == si20_offset, "offset splitup error");
+
+ Register work = accumulate? Z_R0 : tmp;
+
+ if (fixed_codelen) { // Len of code = 10 = 4 + 6.
+ z_lghi(work, ll_off>>12); // Implicit sign extension.
+ z_slag(work, work, 12);
+ } else { // Len of code = 0..10.
+ if (ll_off == 0) { return -1; }
+ // ll_off has 8 significant bits (at most) plus sign.
+ if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte.
+ z_llilh(work, ll_off >> 16);
+ if (ll_off < 0) { // Sign-extension required.
+ z_lgfr(work, work);
+ }
+ } else {
+ if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte.
+ z_llill(work, ll_off);
+ } else { // Non-zero bits in both halfbytes.
+ z_lghi(work, ll_off>>12); // Implicit sign extension.
+ z_slag(work, work, 12);
+ }
+ }
+ }
+ if (accumulate) { z_algr(tmp, work); } // len of code += 4
+ return lg_off;
+}
+
+void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
+ if (Displacement::is_validDisp(si20)) {
+ z_ley(t, si20, a);
+ } else {
+ // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
+ // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
+ // pool loads).
+ bool accumulate = true;
+ bool fixed_codelen = true;
+ Register work;
+
+ if (fixed_codelen) {
+ z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
+ } else {
+ accumulate = (a == tmp);
+ }
+ work = tmp;
+
+ int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
+ if (disp12 < 0) {
+ z_le(t, si20, work);
+ } else {
+ if (accumulate) {
+ z_le(t, disp12, work);
+ } else {
+ z_le(t, disp12, work, a);
+ }
+ }
+ }
+}
+
+void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
+ if (Displacement::is_validDisp(si20)) {
+ z_ldy(t, si20, a);
+ } else {
+ // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
+ // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
+ // pool loads).
+ bool accumulate = true;
+ bool fixed_codelen = true;
+ Register work;
+
+ if (fixed_codelen) {
+ z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
+ } else {
+ accumulate = (a == tmp);
+ }
+ work = tmp;
+
+ int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
+ if (disp12 < 0) {
+ z_ld(t, si20, work);
+ } else {
+ if (accumulate) {
+ z_ld(t, disp12, work);
+ } else {
+ z_ld(t, disp12, work, a);
+ }
+ }
+ }
+}
+
+// PCrelative TOC access.
+// Returns distance (in bytes) from current position to start of consts section.
+// Returns 0 (zero) if no consts section exists or if it has size zero.
+long MacroAssembler::toc_distance() {
+ CodeSection* cs = code()->consts();
+ return (long)((cs != NULL) ? cs->start()-pc() : 0);
+}
+
+// Implementation on x86/sparc assumes that constant and instruction section are
+// adjacent, but this doesn't hold. Two special situations may occur, that we must
+// be able to handle:
+// 1. const section may be located apart from the inst section.
+// 2. const section may be empty
+// In both cases, we use the const section's start address to compute the "TOC",
+// this seems to occur only temporarily; in the final step we always seem to end up
+// with the pc-relatice variant.
+//
+// PC-relative offset could be +/-2**32 -> use long for disp
+// Furthermore: makes no sense to have special code for
+// adjacent const and inst sections.
+void MacroAssembler::load_toc(Register Rtoc) {
+ // Simply use distance from start of const section (should be patched in the end).
+ long disp = toc_distance();
+
+ RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
+ relocate(rspec);
+ z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords.
+}
+
+// PCrelative TOC access.
+// Load from anywhere pcrelative (with relocation of load instr)
+void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
+ address pc = this->pc();
+ ptrdiff_t total_distance = dataLocation - pc;
+ RelocationHolder rspec = internal_word_Relocation::spec(dataLocation);
+
+ assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
+ assert(total_distance != 0, "sanity");
+
+ // Some extra safety net.
+ if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+ guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
+ }
+
+ (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+ z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
+}
+
+
+// PCrelative TOC access.
+// Load from anywhere pcrelative (with relocation of load instr)
+// loaded addr has to be relocated when added to constant pool.
+void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
+ address pc = this->pc();
+ ptrdiff_t total_distance = addrLocation - pc;
+ RelocationHolder rspec = internal_word_Relocation::spec(addrLocation);
+
+ assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
+
+ // Some extra safety net.
+ if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+ guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
+ }
+
+ (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+ z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
+}
+
+// Generic operation: load a value from memory and test.
+// CondCode indicates the sign (<0, ==0, >0) of the loaded value.
+void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
+ z_lb(dst, a);
+ z_ltr(dst, dst);
+}
+
+void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
+ int64_t disp = a.disp20();
+ if (Displacement::is_shortDisp(disp)) {
+ z_lh(dst, a);
+ } else if (Displacement::is_longDisp(disp)) {
+ z_lhy(dst, a);
+ } else {
+ guarantee(false, "displacement out of range");
+ }
+ z_ltr(dst, dst);
+}
+
+void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
+ z_lt(dst, a);
+}
+
+void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
+ z_ltgf(dst, a);
+}
+
+void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
+ z_ltg(dst, a);
+}
+
+// Test a bit in memory.
+void MacroAssembler::testbit(const Address &a, unsigned int bit) {
+ assert(a.index() == noreg, "no index reg allowed in testbit");
+ if (bit <= 7) {
+ z_tm(a.disp() + 3, a.base(), 1 << bit);
+ } else if (bit <= 15) {
+ z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
+ } else if (bit <= 23) {
+ z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
+ } else if (bit <= 31) {
+ z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+// Test a bit in a register. Result is reflected in CC.
+void MacroAssembler::testbit(Register r, unsigned int bitPos) {
+ if (bitPos < 16) {
+ z_tmll(r, 1U<<bitPos);
+ } else if (bitPos < 32) {
+ z_tmlh(r, 1U<<(bitPos-16));
+ } else if (bitPos < 48) {
+ z_tmhl(r, 1U<<(bitPos-32));
+ } else if (bitPos < 64) {
+ z_tmhh(r, 1U<<(bitPos-48));
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+// Clear a register, i.e. load const zero into reg.
+// Return len (in bytes) of generated instruction(s).
+// whole_reg: Clear 64 bits if true, 32 bits otherwise.
+// set_cc: Use instruction that sets the condition code, if true.
+int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
+ unsigned int start_off = offset();
+ if (whole_reg) {
+ set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
+ } else { // Only 32bit register.
+ set_cc ? z_xr(r, r) : z_lhi(r, 0);
+ }
+ return offset() - start_off;
+}
+
+#ifdef ASSERT
+int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
+ switch (pattern_len) {
+ case 1:
+ pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8);
+ case 2:
+ pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16);
+ case 4:
+ pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
+ case 8:
+ return load_const_optimized_rtn_len(r, pattern, true);
+ break;
+ default:
+ guarantee(false, "preset_reg: bad len");
+ }
+ return 0;
+}
+#endif
+
+// addr: Address descriptor of memory to clear index register will not be used !
+// size: Number of bytes to clear.
+// !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
+// !!! Use store_const() instead !!!
+void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
+ guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
+
+ if (size == 1) {
+ z_mvi(addr, 0);
+ return;
+ }
+
+ switch (size) {
+ case 2: z_mvhhi(addr, 0);
+ return;
+ case 4: z_mvhi(addr, 0);
+ return;
+ case 8: z_mvghi(addr, 0);
+ return;
+ default: ; // Fallthru to xc.
+ }
+
+ z_xc(addr, size, addr);
+}
+
+void MacroAssembler::align(int modulus) {
+ while (offset() % modulus != 0) z_nop();
+}
+
+// Special version for non-relocateable code if required alignment
+// is larger than CodeEntryAlignment.
+void MacroAssembler::align_address(int modulus) {
+ while ((uintptr_t)pc() % modulus != 0) z_nop();
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+ Register temp_reg,
+ int64_t extra_slot_offset) {
+ // On Z, we can have index and disp in an Address. So don't call argument_offset,
+ // which issues an unnecessary add instruction.
+ int stackElementSize = Interpreter::stackElementSize;
+ int64_t offset = extra_slot_offset * stackElementSize;
+ const Register argbase = Z_esp;
+ if (arg_slot.is_constant()) {
+ offset += arg_slot.as_constant() * stackElementSize;
+ return Address(argbase, offset);
+ }
+ // else
+ assert(temp_reg != noreg, "must specify");
+ assert(temp_reg != Z_ARG1, "base and index are conflicting");
+ z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
+ return Address(argbase, temp_reg, offset);
+}
+
+
+//===================================================================
+//=== START C O N S T A N T S I N C O D E S T R E A M ===
+//===================================================================
+//=== P A T CH A B L E C O N S T A N T S ===
+//===================================================================
+
+
+//---------------------------------------------------
+// Load (patchable) constant into register
+//---------------------------------------------------
+
+
+// Load absolute address (and try to optimize).
+// Note: This method is usable only for position-fixed code,
+// referring to a position-fixed target location.
+// If not so, relocations and patching must be used.
+void MacroAssembler::load_absolute_address(Register d, address addr) {
+ assert(addr != NULL, "should not happen");
+ BLOCK_COMMENT("load_absolute_address:");
+ if (addr == NULL) {
+ z_larl(d, pc()); // Dummy emit for size calc.
+ return;
+ }
+
+ if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
+ z_larl(d, addr);
+ return;
+ }
+
+ load_const_optimized(d, (long)addr);
+}
+
+// Load a 64bit constant.
+// Patchable code sequence, but not atomically patchable.
+// Make sure to keep code size constant -> no value-dependent optimizations.
+// Do not kill condition code.
+void MacroAssembler::load_const(Register t, long x) {
+ Assembler::z_iihf(t, (int)(x >> 32));
+ Assembler::z_iilf(t, (int)(x & 0xffffffff));
+}
+
+// Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
+// Patchable code sequence, but not atomically patchable.
+// Make sure to keep code size constant -> no value-dependent optimizations.
+// Do not kill condition code.
+void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
+ if (sign_extend) { Assembler::z_lgfi(t, x); }
+ else { Assembler::z_llilf(t, x); }
+}
+
+// Load narrow oop constant, no decompression.
+void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
+ assert(UseCompressedOops, "must be on to call this method");
+ load_const_32to64(t, a, false /*sign_extend*/);
+}
+
+// Load narrow klass constant, compression required.
+void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
+ assert(UseCompressedClassPointers, "must be on to call this method");
+ narrowKlass encoded_k = Klass::encode_klass(k);
+ load_const_32to64(t, encoded_k, false /*sign_extend*/);
+}
+
+//------------------------------------------------------
+// Compare (patchable) constant with register.
+//------------------------------------------------------
+
+// Compare narrow oop in reg with narrow oop constant, no decompression.
+void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
+ assert(UseCompressedOops, "must be on to call this method");
+
+ Assembler::z_clfi(oop1, oop2);
+}
+
+// Compare narrow oop in reg with narrow oop constant, no decompression.
+void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
+ assert(UseCompressedClassPointers, "must be on to call this method");
+ narrowKlass encoded_k = Klass::encode_klass(klass2);
+
+ Assembler::z_clfi(klass1, encoded_k);
+}
+
+//----------------------------------------------------------
+// Check which kind of load_constant we have here.
+//----------------------------------------------------------
+
+// Detection of CPU version dependent load_const sequence.
+// The detection is valid only for code sequences generated by load_const,
+// not load_const_optimized.
+bool MacroAssembler::is_load_const(address a) {
+ unsigned long inst1, inst2;
+ unsigned int len1, len2;
+
+ len1 = get_instruction(a, &inst1);
+ len2 = get_instruction(a + len1, &inst2);
+
+ return is_z_iihf(inst1) && is_z_iilf(inst2);
+}
+
+// Detection of CPU version dependent load_const_32to64 sequence.
+// Mostly used for narrow oops and narrow Klass pointers.
+// The detection is valid only for code sequences generated by load_const_32to64.
+bool MacroAssembler::is_load_const_32to64(address pos) {
+ unsigned long inst1, inst2;
+ unsigned int len1;
+
+ len1 = get_instruction(pos, &inst1);
+ return is_z_llilf(inst1);
+}
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
+bool MacroAssembler::is_compare_immediate32(address pos) {
+ return is_equal(pos, CLFI_ZOPC, RIL_MASK);
+}
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
+bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
+ return is_compare_immediate32(pos);
+ }
+
+// Detection of compare_immediate_narrow sequence.
+// The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
+bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
+ return is_compare_immediate32(pos);
+}
+
+//-----------------------------------
+// patch the load_constant
+//-----------------------------------
+
+// CPU-version dependend patching of load_const.
+void MacroAssembler::patch_const(address a, long x) {
+ assert(is_load_const(a), "not a load of a constant");
+ set_imm32((address)a, (int) ((x >> 32) & 0xffffffff));
+ set_imm32((address)(a + 6), (int)(x & 0xffffffff));
+}
+
+// Patching the value of CPU version dependent load_const_32to64 sequence.
+// The passed ptr MUST be in compressed format!
+int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
+ assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
+
+ set_imm32(pos, np);
+ return 6;
+}
+
+// Patching the value of CPU version dependent compare_immediate_narrow sequence.
+// The passed ptr MUST be in compressed format!
+int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
+ assert(is_compare_immediate32(pos), "not a compressed ptr compare");
+
+ set_imm32(pos, np);
+ return 6;
+}
+
+// Patching the immediate value of CPU version dependent load_narrow_oop sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
+ assert(UseCompressedOops, "Can only patch compressed oops");
+
+ narrowOop no = oopDesc::encode_heap_oop(o);
+ return patch_load_const_32to64(pos, no);
+}
+
+// Patching the immediate value of CPU version dependent load_narrow_klass sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
+ assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
+
+ narrowKlass nk = Klass::encode_klass(k);
+ return patch_load_const_32to64(pos, nk);
+}
+
+// Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
+ assert(UseCompressedOops, "Can only patch compressed oops");
+
+ narrowOop no = oopDesc::encode_heap_oop(o);
+ return patch_compare_immediate_32(pos, no);
+}
+
+// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
+// The passed ptr must NOT be in compressed format!
+int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
+ assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
+
+ narrowKlass nk = Klass::encode_klass(k);
+ return patch_compare_immediate_32(pos, nk);
+}
+
+//------------------------------------------------------------------------
+// Extract the constant from a load_constant instruction stream.
+//------------------------------------------------------------------------
+
+// Get constant from a load_const sequence.
+long MacroAssembler::get_const(address a) {
+ assert(is_load_const(a), "not a load of a constant");
+ unsigned long x;
+ x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
+ x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
+ return (long) x;
+}
+
+//--------------------------------------
+// Store a constant in memory.
+//--------------------------------------
+
+// General emitter to move a constant to memory.
+// The store is atomic.
+// o Address must be given in RS format (no index register)
+// o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
+// o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
+// o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
+// o Memory slot must be at least as wide as constant, will assert otherwise.
+// o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
+int MacroAssembler::store_const(const Address &dest, long imm,
+ unsigned int lm, unsigned int lc,
+ Register scratch) {
+ int64_t disp = dest.disp();
+ Register base = dest.base();
+ assert(!dest.has_index(), "not supported");
+ assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported");
+ assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
+ assert(lm>=lc, "memory slot too small");
+ assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
+ assert(Displacement::is_validDisp(disp), "displacement out of range");
+
+ bool is_shortDisp = Displacement::is_shortDisp(disp);
+ int store_offset = -1;
+
+ // For target len == 1 it's easy.
+ if (lm == 1) {
+ store_offset = offset();
+ if (is_shortDisp) {
+ z_mvi(disp, base, imm);
+ return store_offset;
+ } else {
+ z_mviy(disp, base, imm);
+ return store_offset;
+ }
+ }
+
+ // All the "good stuff" takes an unsigned displacement.
+ if (is_shortDisp) {
+ // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
+
+ store_offset = offset();
+ switch (lm) {
+ case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
+ z_mvhhi(disp, base, imm);
+ return store_offset;
+ case 4:
+ if (Immediate::is_simm16(imm)) {
+ z_mvhi(disp, base, imm);
+ return store_offset;
+ }
+ break;
+ case 8:
+ if (Immediate::is_simm16(imm)) {
+ z_mvghi(disp, base, imm);
+ return store_offset;
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ }
+
+ // Can't optimize, so load value and store it.
+ guarantee(scratch != noreg, " need a scratch register here !");
+ if (imm != 0) {
+ load_const_optimized(scratch, imm); // Preserves CC anyway.
+ } else {
+ // Leave CC alone!!
+ (void) clear_reg(scratch, true, false); // Indicate unused result.
+ }
+
+ store_offset = offset();
+ if (is_shortDisp) {
+ switch (lm) {
+ case 2:
+ z_sth(scratch, disp, Z_R0, base);
+ return store_offset;
+ case 4:
+ z_st(scratch, disp, Z_R0, base);
+ return store_offset;
+ case 8:
+ z_stg(scratch, disp, Z_R0, base);
+ return store_offset;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ } else {
+ switch (lm) {
+ case 2:
+ z_sthy(scratch, disp, Z_R0, base);
+ return store_offset;
+ case 4:
+ z_sty(scratch, disp, Z_R0, base);
+ return store_offset;
+ case 8:
+ z_stg(scratch, disp, Z_R0, base);
+ return store_offset;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ }
+ return -1; // should not reach here
+}
+
+//===================================================================
+//=== N O T P A T CH A B L E C O N S T A N T S ===
+//===================================================================
+
+// Load constant x into register t with a fast instrcution sequence
+// depending on the bits in x. Preserves CC under all circumstances.
+int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
+ if (x == 0) {
+ int len;
+ if (emit) {
+ len = clear_reg(t, true, false);
+ } else {
+ len = 4;
+ }
+ return len;
+ }
+
+ if (Immediate::is_simm16(x)) {
+ if (emit) { z_lghi(t, x); }
+ return 4;
+ }
+
+ // 64 bit value: | part1 | part2 | part3 | part4 |
+ // At least one part is not zero!
+ int part1 = ((x >> 32) & 0xffff0000) >> 16;
+ int part2 = (x >> 32) & 0x0000ffff;
+ int part3 = (x & 0xffff0000) >> 16;
+ int part4 = (x & 0x0000ffff);
+
+ // Lower word only (unsigned).
+ if ((part1 == 0) && (part2 == 0)) {
+ if (part3 == 0) {
+ if (emit) z_llill(t, part4);
+ return 4;
+ }
+ if (part4 == 0) {
+ if (emit) z_llilh(t, part3);
+ return 4;
+ }
+ if (emit) z_llilf(t, (int)(x & 0xffffffff));
+ return 6;
+ }
+
+ // Upper word only.
+ if ((part3 == 0) && (part4 == 0)) {
+ if (part1 == 0) {
+ if (emit) z_llihl(t, part2);
+ return 4;
+ }
+ if (part2 == 0) {
+ if (emit) z_llihh(t, part1);
+ return 4;
+ }
+ if (emit) z_llihf(t, (int)(x >> 32));
+ return 6;
+ }
+
+ // Lower word only (signed).
+ if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
+ if (emit) z_lgfi(t, (int)(x & 0xffffffff));
+ return 6;
+ }
+
+ int len = 0;
+
+ if ((part1 == 0) || (part2 == 0)) {
+ if (part1 == 0) {
+ if (emit) z_llihl(t, part2);
+ len += 4;
+ } else {
+ if (emit) z_llihh(t, part1);
+ len += 4;
+ }
+ } else {
+ if (emit) z_llihf(t, (int)(x >> 32));
+ len += 6;
+ }
+
+ if ((part3 == 0) || (part4 == 0)) {
+ if (part3 == 0) {
+ if (emit) z_iill(t, part4);
+ len += 4;
+ } else {
+ if (emit) z_iilh(t, part3);
+ len += 4;
+ }
+ } else {
+ if (emit) z_iilf(t, (int)(x & 0xffffffff));
+ len += 6;
+ }
+ return len;
+}
+
+//=====================================================================
+//=== H I G H E R L E V E L B R A N C H E M I T T E R S ===
+//=====================================================================
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+ // Right operand is constant.
+ if (x2.is_constant()) {
+ jlong value = x2.as_constant();
+ compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
+ return;
+ }
+
+ // Right operand is in register.
+ compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
+}
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+ // Right operand is constant.
+ if (x2.is_constant()) {
+ jlong value = x2.as_constant();
+ compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
+ return;
+ }
+
+ // Right operand is in register.
+ compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
+}
+
+// Note: In the worst case, one of the scratch registers is destroyed!!!
+void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+ // Right operand is constant.
+ if (x2.is_constant()) {
+ jlong value = x2.as_constant();
+ compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
+ return;
+ }
+
+ // Right operand is in register.
+ compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
+}
+
+void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
+ // Right operand is constant.
+ if (x2.is_constant()) {
+ jlong value = x2.as_constant();
+ compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
+ return;
+ }
+
+ // Right operand is in register.
+ compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
+}
+
+// Generate an optimal branch to the branch target.
+// Optimal means that a relative branch (brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Used registers:
+// Z_R1 - work reg. Holds branch target address.
+// Used in fallback case only.
+//
+// This version of branch_optimized is good for cases where the target address is known
+// and constant, i.e. is never changed (no relocation, no patching).
+void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
+ address branch_origin = pc();
+
+ if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
+ z_brc(cond, branch_addr);
+ } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
+ z_brcl(cond, branch_addr);
+ } else {
+ load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized.
+ z_bcr(cond, Z_R1);
+ }
+}
+
+// This version of branch_optimized is good for cases where the target address
+// is potentially not yet known at the time the code is emitted.
+//
+// One very common case is a branch to an unbound label which is handled here.
+// The caller might know (or hope) that the branch distance is short enough
+// to be encoded in a 16bit relative address. In this case he will pass a
+// NearLabel branch_target.
+// Care must be taken with unbound labels. Each call to target(label) creates
+// an entry in the patch queue for that label to patch all references of the label
+// once it gets bound. Those recorded patch locations must be patchable. Otherwise,
+// an assertion fires at patch time.
+void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
+ if (branch_target.is_bound()) {
+ address branch_addr = target(branch_target);
+ branch_optimized(cond, branch_addr);
+ } else {
+ z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
+ }
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+// r1 - left compare operand
+// r2 - right compare operand
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+ Register r2,
+ Assembler::branch_condition cond,
+ address branch_addr,
+ bool len64,
+ bool has_sign) {
+ unsigned int casenum = (len64?2:0)+(has_sign?0:1);
+
+ address branch_origin = pc();
+ if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
+ switch (casenum) {
+ case 0: z_crj( r1, r2, cond, branch_addr); break;
+ case 1: z_clrj (r1, r2, cond, branch_addr); break;
+ case 2: z_cgrj(r1, r2, cond, branch_addr); break;
+ case 3: z_clgrj(r1, r2, cond, branch_addr); break;
+ default: ShouldNotReachHere(); break;
+ }
+ } else {
+ switch (casenum) {
+ case 0: z_cr( r1, r2); break;
+ case 1: z_clr(r1, r2); break;
+ case 2: z_cgr(r1, r2); break;
+ case 3: z_clgr(r1, r2); break;
+ default: ShouldNotReachHere(); break;
+ }
+ branch_optimized(cond, branch_addr);
+ }
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgij, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+// r1 - left compare operand (in register)
+// x2 - right compare operand (immediate)
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+ jlong x2,
+ Assembler::branch_condition cond,
+ Label& branch_target,
+ bool len64,
+ bool has_sign) {
+ address branch_origin = pc();
+ bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
+ bool is_RelAddr16 = (branch_target.is_bound() &&
+ RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
+ unsigned int casenum = (len64?2:0)+(has_sign?0:1);
+
+ if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
+ switch (casenum) {
+ case 0: z_cij( r1, x2, cond, branch_target); break;
+ case 1: z_clij(r1, x2, cond, branch_target); break;
+ case 2: z_cgij(r1, x2, cond, branch_target); break;
+ case 3: z_clgij(r1, x2, cond, branch_target); break;
+ default: ShouldNotReachHere(); break;
+ }
+ return;
+ }
+
+ if (x2 == 0) {
+ switch (casenum) {
+ case 0: z_ltr(r1, r1); break;
+ case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
+ case 2: z_ltgr(r1, r1); break;
+ case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
+ default: ShouldNotReachHere(); break;
+ }
+ } else {
+ if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
+ switch (casenum) {
+ case 0: z_chi(r1, x2); break;
+ case 1: z_chi(r1, x2); break; // positive immediate < 2**15
+ case 2: z_cghi(r1, x2); break;
+ case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
+ default: break;
+ }
+ } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
+ switch (casenum) {
+ case 0: z_cfi( r1, x2); break;
+ case 1: z_clfi(r1, x2); break;
+ case 2: z_cgfi(r1, x2); break;
+ case 3: z_clgfi(r1, x2); break;
+ default: ShouldNotReachHere(); break;
+ }
+ } else {
+ // No instruction with immediate operand possible, so load into register.
+ Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
+ load_const_optimized(scratch, x2);
+ switch (casenum) {
+ case 0: z_cr( r1, scratch); break;
+ case 1: z_clr(r1, scratch); break;
+ case 2: z_cgr(r1, scratch); break;
+ case 3: z_clgr(r1, scratch); break;
+ default: ShouldNotReachHere(); break;
+ }
+ }
+ }
+ branch_optimized(cond, branch_target);
+}
+
+// Generate an optimal compare and branch to the branch target.
+// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
+// branch distance is short enough. Loading the target address into a
+// register and branching via reg is used as fallback only.
+//
+// Input:
+// r1 - left compare operand
+// r2 - right compare operand
+void MacroAssembler::compare_and_branch_optimized(Register r1,
+ Register r2,
+ Assembler::branch_condition cond,
+ Label& branch_target,
+ bool len64,
+ bool has_sign) {
+ unsigned int casenum = (len64?2:0)+(has_sign?0:1);
+
+ if (branch_target.is_bound()) {
+ address branch_addr = target(branch_target);
+ compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
+ } else {
+ {
+ switch (casenum) {
+ case 0: z_cr( r1, r2); break;
+ case 1: z_clr(r1, r2); break;
+ case 2: z_cgr(r1, r2); break;
+ case 3: z_clgr(r1, r2); break;
+ default: ShouldNotReachHere(); break;
+ }
+ branch_optimized(cond, branch_target);
+ }
+ }
+}
+
+//===========================================================================
+//=== END H I G H E R L E V E L B R A N C H E M I T T E R S ===
+//===========================================================================
+
+AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int index = oop_recorder()->allocate_metadata_index(obj);
+ RelocationHolder rspec = metadata_Relocation::spec(index);
+ return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = metadata_Relocation::spec(index);
+ return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->allocate_oop_index(obj);
+ return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+// NOTE: destroys r
+void MacroAssembler::c2bool(Register r, Register t) {
+ z_lcr(t, r); // t = -r
+ z_or(r, t); // r = -r OR r
+ z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset) {
+ intptr_t value = *delayed_value_addr;
+ if (value != 0) {
+ return RegisterOrConstant(value + offset);
+ }
+
+ BLOCK_COMMENT("delayed_value {");
+ // Load indirectly to solve generation ordering problem.
+ load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
+ z_lg(tmp, 0, tmp); // tmp = *tmp;
+
+#ifdef ASSERT
+ NearLabel L;
+ compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
+ z_illtrap();
+ bind(L);
+#endif
+
+ if (offset != 0) {
+ z_agfi(tmp, offset); // tmp = tmp + offset;
+ }
+
+ BLOCK_COMMENT("} delayed_value");
+ return RegisterOrConstant(tmp);
+}
+
+// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
+// and return the resulting instruction.
+// Dest_pos and inst_pos are 32 bit only. These parms can only designate
+// relative positions.
+// Use correct argument types. Do not pre-calculate distance.
+unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
+ int c = 0;
+ unsigned long patched_inst = 0;
+ if (is_call_pcrelative_short(inst) ||
+ is_branch_pcrelative_short(inst) ||
+ is_branchoncount_pcrelative_short(inst) ||
+ is_branchonindex32_pcrelative_short(inst)) {
+ c = 1;
+ int m = fmask(15, 0); // simm16(-1, 16, 32);
+ int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
+ patched_inst = (inst & ~m) | v;
+ } else if (is_compareandbranch_pcrelative_short(inst)) {
+ c = 2;
+ long m = fmask(31, 16); // simm16(-1, 16, 48);
+ long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
+ patched_inst = (inst & ~m) | v;
+ } else if (is_branchonindex64_pcrelative_short(inst)) {
+ c = 3;
+ long m = fmask(31, 16); // simm16(-1, 16, 48);
+ long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
+ patched_inst = (inst & ~m) | v;
+ } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
+ c = 4;
+ long m = fmask(31, 0); // simm32(-1, 16, 48);
+ long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
+ patched_inst = (inst & ~m) | v;
+ } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
+ c = 5;
+ long m = fmask(31, 0); // simm32(-1, 16, 48);
+ long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
+ patched_inst = (inst & ~m) | v;
+ } else {
+ print_dbg_msg(tty, inst, "not a relative branch", 0);
+ dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
+ ShouldNotReachHere();
+ }
+
+ long new_off = get_pcrel_offset(patched_inst);
+ if (new_off != (dest_pos-inst_pos)) {
+ tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
+ print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0);
+ print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0);
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#endif
+ ShouldNotReachHere();
+ }
+ return patched_inst;
+}
+
+// Only called when binding labels (share/vm/asm/assembler.cpp)
+// Pass arguments as intended. Do not pre-calculate distance.
+void MacroAssembler::pd_patch_instruction(address branch, address target) {
+ unsigned long stub_inst;
+ int inst_len = get_instruction(branch, &stub_inst);
+
+ set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
+}
+
+
+// Extract relative address (aka offset).
+// inv_simm16 works for 4-byte instructions only.
+// compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
+long MacroAssembler::get_pcrel_offset(unsigned long inst) {
+
+ if (MacroAssembler::is_pcrelative_short(inst)) {
+ if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
+ return RelAddr::inv_pcrel_off16(inv_simm16(inst));
+ } else {
+ return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
+ }
+ }
+
+ if (MacroAssembler::is_pcrelative_long(inst)) {
+ return RelAddr::inv_pcrel_off32(inv_simm32(inst));
+ }
+
+ print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ ShouldNotReachHere();
+#endif
+ return -1;
+}
+
+long MacroAssembler::get_pcrel_offset(address pc) {
+ unsigned long inst;
+ unsigned int len = get_instruction(pc, &inst);
+
+#ifdef ASSERT
+ long offset;
+ if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
+ offset = get_pcrel_offset(inst);
+ } else {
+ offset = -1;
+ }
+
+ if (offset == -1) {
+ dump_code_range(tty, pc, 32, "not a pcrelative instruction");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ ShouldNotReachHere();
+#endif
+ }
+ return offset;
+#else
+ return get_pcrel_offset(inst);
+#endif // ASSERT
+}
+
+// Get target address from pc-relative instructions.
+address MacroAssembler::get_target_addr_pcrel(address pc) {
+ assert(is_pcrelative_long(pc), "not a pcrelative instruction");
+ return pc + get_pcrel_offset(pc);
+}
+
+// Patch pc relative load address.
+void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
+ unsigned long inst;
+ // Offset is +/- 2**32 -> use long.
+ ptrdiff_t distance = con - pc;
+
+ get_instruction(pc, &inst);
+
+ if (is_pcrelative_short(inst)) {
+ *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required.
+
+ // Some extra safety net.
+ if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
+ print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
+ dump_code_range(tty, pc, 32, "distance out of range (16bit)");
+ guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
+ }
+ return;
+ }
+
+ if (is_pcrelative_long(inst)) {
+ *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc);
+
+ // Some Extra safety net.
+ if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
+ print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
+ dump_code_range(tty, pc, 32, "distance out of range (32bit)");
+ guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
+ }
+ return;
+ }
+
+ guarantee(false, "not a pcrelative instruction to patch!");
+}
+
+// "Current PC" here means the address just behind the basr instruction.
+address MacroAssembler::get_PC(Register result) {
+ z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
+ return pc();
+}
+
+// Get current PC + offset.
+// Offset given in bytes, must be even!
+// "Current PC" here means the address of the larl instruction plus the given offset.
+address MacroAssembler::get_PC(Register result, int64_t offset) {
+ address here = pc();
+ z_larl(result, offset/2); // Save target instruction address in result.
+ return here + offset;
+}
+
+// Resize_frame with SP(new) = SP(old) - [offset].
+void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
+{
+ assert_different_registers(offset, fp, Z_SP);
+ if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
+
+ z_sgr(Z_SP, offset);
+ z_stg(fp, _z_abi(callers_sp), Z_SP);
+}
+
+// Resize_frame with SP(new) = [addr].
+void MacroAssembler::resize_frame_absolute(Register addr, Register fp, bool load_fp) {
+ assert_different_registers(addr, fp, Z_SP);
+ if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
+
+ if (addr != Z_R0) {
+ // Minimize stalls by not using Z_SP immediately after update.
+ z_stg(fp, _z_abi(callers_sp), addr);
+ z_lgr(Z_SP, addr);
+ } else {
+ z_lgr(Z_SP, addr);
+ z_stg(fp, _z_abi(callers_sp), Z_SP);
+ }
+}
+
+// Resize_frame with SP(new) = SP(old) + offset.
+void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
+ assert_different_registers(fp, Z_SP);
+ if (load_fp) z_lg(fp, _z_abi(callers_sp), Z_SP);
+
+ if (Displacement::is_validDisp((int)_z_abi(callers_sp) + offset.constant_or_zero())) {
+ // Minimize stalls by first using, then updating Z_SP.
+ // Do that only if we have a small positive offset or if ExtImm are available.
+ z_stg(fp, Address(Z_SP, offset, _z_abi(callers_sp)));
+ add64(Z_SP, offset);
+ } else {
+ add64(Z_SP, offset);
+ z_stg(fp, _z_abi(callers_sp), Z_SP);
+ }
+}
+
+void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
+#ifdef ASSERT
+ assert_different_registers(bytes, old_sp, Z_SP);
+ if (!copy_sp) {
+ z_cgr(old_sp, Z_SP);
+ asm_assert_eq("[old_sp]!=[Z_SP]", 0x211);
+ }
+#endif
+ if (copy_sp) { z_lgr(old_sp, Z_SP); }
+ if (bytes_with_inverted_sign) {
+ z_stg(old_sp, 0, bytes, Z_SP);
+ add2reg_with_index(Z_SP, 0, bytes, Z_SP);
+ } else {
+ z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
+ z_stg(old_sp, 0, Z_SP);
+ }
+}
+
+unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
+ long offset = Assembler::align(bytes, frame::alignment_in_bytes);
+
+ if (Displacement::is_validDisp(-offset)) {
+ // Minimize stalls by first using, then updating Z_SP.
+ // Do that only if we have ExtImm available.
+ z_stg(Z_SP, -offset, Z_SP);
+ add2reg(Z_SP, -offset);
+ } else {
+ if (scratch != Z_R0 && scratch != Z_R1) {
+ z_stg(Z_SP, -offset, Z_SP);
+ add2reg(Z_SP, -offset);
+ } else { // scratch == Z_R0 || scratch == Z_R1
+ z_lgr(scratch, Z_SP);
+ add2reg(Z_SP, -offset);
+ z_stg(scratch, 0, Z_SP);
+ }
+ }
+ return offset;
+}
+
+// Push a frame of size `bytes' plus abi160 on top.
+unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
+ BLOCK_COMMENT("push_frame_abi160 {");
+ unsigned int res = push_frame(bytes + frame::z_abi_160_size);
+ BLOCK_COMMENT("} push_frame_abi160");
+ return res;
+}
+
+// Pop current C frame.
+void MacroAssembler::pop_frame() {
+ BLOCK_COMMENT("pop_frame:");
+ Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
+ if (allow_relocation) {
+ call_c(entry_point);
+ } else {
+ call_c_static(entry_point);
+ }
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point) {
+ bool allow_relocation = true;
+ call_VM_leaf_base(entry_point, allow_relocation);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ bool allow_relocation,
+ bool check_exceptions) { // Defaults to true.
+ // Allow_relocation indicates, if true, that the generated code shall
+ // be fit for code relocation or referenced data relocation. In other
+ // words: all addresses must be considered variable. PC-relative addressing
+ // is not possible then.
+ // On the other hand, if (allow_relocation == false), addresses and offsets
+ // may be considered stable, enabling us to take advantage of some PC-relative
+ // addressing tweaks. These might improve performance and reduce code size.
+
+ // Determine last_java_sp register.
+ if (!last_java_sp->is_valid()) {
+ last_java_sp = Z_SP; // Load Z_SP as SP.
+ }
+
+ set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
+
+ // ARG1 must hold thread address.
+ z_lgr(Z_ARG1, Z_thread);
+
+ address return_pc = NULL;
+ if (allow_relocation) {
+ return_pc = call_c(entry_point);
+ } else {
+ return_pc = call_c_static(entry_point);
+ }
+
+ reset_last_Java_frame(allow_relocation);
+
+ // C++ interp handles this in the interpreter.
+ check_and_handle_popframe(Z_thread);
+ check_and_handle_earlyret(Z_thread);
+
+ // Check for pending exceptions.
+ if (check_exceptions) {
+ // Check for pending exceptions (java_thread is set upon return).
+ load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
+
+ // This used to conditionally jump to forward_exception however it is
+ // possible if we relocate that the branch will not reach. So we must jump
+ // around so we can always reach.
+
+ Label ok;
+ z_bre(ok); // Bcondequal is the same as bcondZero.
+ call_stub(StubRoutines::forward_exception_entry());
+ bind(ok);
+ }
+
+ // Get oop result if there is one and reset the value in the thread.
+ if (oop_result->is_valid()) {
+ get_vm_result(oop_result);
+ }
+
+ _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls.
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ bool check_exceptions) { // Defaults to true.
+ bool allow_relocation = true;
+ call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
+}
+
+// VM calls without explicit last_java_sp.
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
+ // Call takes possible detour via InterpreterMacroAssembler.
+ call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ call_VM(oop_result, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ assert(arg_2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg_2);
+ call_VM(oop_result, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+ Register arg_3, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ assert(arg_2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg_2);
+ assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+ lgr_if_needed(Z_ARG4, arg_3);
+ call_VM(oop_result, entry_point, check_exceptions);
+}
+
+// VM static calls without explicit last_java_sp.
+
+void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
+ // Call takes possible detour via InterpreterMacroAssembler.
+ call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
+}
+
+void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+ Register arg_3, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ assert(arg_2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg_2);
+ assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+ lgr_if_needed(Z_ARG4, arg_3);
+ call_VM_static(oop_result, entry_point, check_exceptions);
+}
+
+// VM calls with explicit last_java_sp.
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
+ // Call takes possible detour via InterpreterMacroAssembler.
+ call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
+ Register arg_2, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ assert(arg_2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg_2);
+ call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
+ Register arg_2, Register arg_3, bool check_exceptions) {
+ // Z_ARG1 is reserved for the thread.
+ lgr_if_needed(Z_ARG2, arg_1);
+ assert(arg_2 != Z_ARG2, "smashed argument");
+ lgr_if_needed(Z_ARG3, arg_2);
+ assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
+ lgr_if_needed(Z_ARG4, arg_3);
+ call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
+}
+
+// VM leaf calls.
+
+void MacroAssembler::call_VM_leaf(address entry_point) {
+ // Call takes possible detour via InterpreterMacroAssembler.
+ call_VM_leaf_base(entry_point, true);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ call_VM_leaf(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ assert(arg_2 != Z_ARG1, "smashed argument");
+ if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+ call_VM_leaf(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ assert(arg_2 != Z_ARG1, "smashed argument");
+ if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+ assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
+ if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
+ call_VM_leaf(entry_point);
+}
+
+// Static VM leaf calls.
+// Really static VM leaf calls are never patched.
+
+void MacroAssembler::call_VM_leaf_static(address entry_point) {
+ // Call takes possible detour via InterpreterMacroAssembler.
+ call_VM_leaf_base(entry_point, false);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ call_VM_leaf_static(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ assert(arg_2 != Z_ARG1, "smashed argument");
+ if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+ call_VM_leaf_static(entry_point);
+}
+
+void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+ if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
+ assert(arg_2 != Z_ARG1, "smashed argument");
+ if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
+ assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
+ if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
+ call_VM_leaf_static(entry_point);
+}
+
+// Don't use detour via call_c(reg).
+address MacroAssembler::call_c(address function_entry) {
+ load_const(Z_R1, function_entry);
+ return call(Z_R1);
+}
+
+// Variant for really static (non-relocatable) calls which are never patched.
+address MacroAssembler::call_c_static(address function_entry) {
+ load_absolute_address(Z_R1, function_entry);
+#if 0 // def ASSERT
+ // Verify that call site did not move.
+ load_const_optimized(Z_R0, function_entry);
+ z_cgr(Z_R1, Z_R0);
+ z_brc(bcondEqual, 3);
+ z_illtrap(0xba);
+#endif
+ return call(Z_R1);
+}
+
+address MacroAssembler::call_c_opt(address function_entry) {
+ bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
+ _last_calls_return_pc = success ? pc() : NULL;
+ return _last_calls_return_pc;
+}
+
+// Identify a call_far_patchable instruction: LARL + LG + BASR
+//
+// nop ; optionally, if required for alignment
+// lgrl rx,A(TOC entry) ; PC-relative access into constant pool
+// basr Z_R14,rx ; end of this instruction must be aligned to a word boundary
+//
+// Code pattern will eventually get patched into variant2 (see below for detection code).
+//
+bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
+ address iaddr = instruction_addr;
+
+ // Check for the actual load instruction.
+ if (!is_load_const_from_toc(iaddr)) { return false; }
+ iaddr += load_const_from_toc_size();
+
+ // Check for the call (BASR) instruction, finally.
+ assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
+ return is_call_byregister(iaddr);
+}
+
+// Identify a call_far_patchable instruction: BRASL
+//
+// Code pattern to suits atomic patching:
+// nop ; Optionally, if required for alignment.
+// nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer).
+// nop ; For code pattern detection: Prepend each BRASL with a nop.
+// brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned !
+bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
+ const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
+
+ // Check for correct number of leading nops.
+ address iaddr;
+ for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
+ if (!is_z_nop(iaddr)) { return false; }
+ }
+ assert(iaddr == call_addr, "sanity");
+
+ // --> Check for call instruction.
+ if (is_call_far_pcrelative(call_addr)) {
+ assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
+ return true;
+ }
+
+ return false;
+}
+
+// Emit a NOT mt-safely patchable 64 bit absolute call.
+// If toc_offset == -2, then the destination of the call (= target) is emitted
+// to the constant pool and a runtime_call relocation is added
+// to the code buffer.
+// If toc_offset != -2, target must already be in the constant pool at
+// _ctableStart+toc_offset (a caller can retrieve toc_offset
+// from the runtime_call relocation).
+// Special handling of emitting to scratch buffer when there is no constant pool.
+// Slightly changed code pattern. We emit an additional nop if we would
+// not end emitting at a word aligned address. This is to ensure
+// an atomically patchable displacement in brasl instructions.
+//
+// A call_far_patchable comes in different flavors:
+// - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
+// - LGRL(CP) / BR (address in constant pool, pc-relative accesss)
+// - BRASL (relative address of call target coded in instruction)
+// All flavors occupy the same amount of space. Length differences are compensated
+// by leading nops, such that the instruction sequence always ends at the same
+// byte offset. This is required to keep the return offset constant.
+// Furthermore, the return address (the end of the instruction sequence) is forced
+// to be on a 4-byte boundary. This is required for atomic patching, should we ever
+// need to patch the call target of the BRASL flavor.
+// RETURN value: false, if no constant pool entry could be allocated, true otherwise.
+bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
+ // Get current pc and ensure word alignment for end of instr sequence.
+ const address start_pc = pc();
+ const intptr_t start_off = offset();
+ assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
+ const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
+ const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
+ const bool emit_relative_call = !emit_target_to_pool &&
+ RelAddr::is_in_range_of_RelAddr32(dist) &&
+ ReoptimizeCallSequences &&
+ !code_section()->scratch_emit();
+
+ if (emit_relative_call) {
+ // Add padding to get the same size as below.
+ const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
+ unsigned int current_padding;
+ for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
+ assert(current_padding == padding, "sanity");
+
+ // relative call: len = 2(nop) + 6 (brasl)
+ // CodeBlob resize cannot occur in this case because
+ // this call is emitted into pre-existing space.
+ z_nop(); // Prepend each BRASL with a nop.
+ z_brasl(Z_R14, target);
+ } else {
+ // absolute call: Get address from TOC.
+ // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
+ if (emit_target_to_pool) {
+ // When emitting the call for the first time, we do not need to use
+ // the pc-relative version. It will be patched anyway, when the code
+ // buffer is copied.
+ // Relocation is not needed when !ReoptimizeCallSequences.
+ relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
+ AddressLiteral dest(target, rt);
+ // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
+ // inst_mark(). Reset if possible.
+ bool reset_mark = (inst_mark() == pc());
+ tocOffset = store_oop_in_toc(dest);
+ if (reset_mark) { set_inst_mark(); }
+ if (tocOffset == -1) {
+ return false; // Couldn't create constant pool entry.
+ }
+ }
+ assert(offset() == start_off, "emit no code before this point!");
+
+ address tocPos = pc() + tocOffset;
+ if (emit_target_to_pool) {
+ tocPos = code()->consts()->start() + tocOffset;
+ }
+ load_long_pcrelative(Z_R14, tocPos);
+ z_basr(Z_R14, Z_R14);
+ }
+
+#ifdef ASSERT
+ // Assert that we can identify the emitted call.
+ assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
+ assert(offset() == start_off+call_far_patchable_size(), "wrong size");
+
+ if (emit_target_to_pool) {
+ assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
+ "wrong encoding of dest address");
+ }
+#endif
+ return true; // success
+}
+
+// Identify a call_far_patchable instruction.
+// For more detailed information see header comment of call_far_patchable.
+bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
+ return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL
+ is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR
+}
+
+// Does the call_far_patchable instruction use a pc-relative encoding
+// of the call destination?
+bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
+ // Variant 2 is pc-relative.
+ return is_call_far_patchable_variant2_at(instruction_addr);
+}
+
+bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
+ // Prepend each BRASL with a nop.
+ return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required.
+}
+
+// Set destination address of a call_far_patchable instruction.
+void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
+ ResourceMark rm;
+
+ // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
+ int code_size = MacroAssembler::call_far_patchable_size();
+ CodeBuffer buf(instruction_addr, code_size);
+ MacroAssembler masm(&buf);
+ masm.call_far_patchable(dest, tocOffset);
+ ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
+}
+
+// Get dest address of a call_far_patchable instruction.
+address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
+ // Dynamic TOC: absolute address in constant pool.
+ // Check variant2 first, it is more frequent.
+
+ // Relative address encoded in call instruction.
+ if (is_call_far_patchable_variant2_at(instruction_addr)) {
+ return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
+
+ // Absolute address in constant pool.
+ } else if (is_call_far_patchable_variant0_at(instruction_addr)) {
+ address iaddr = instruction_addr;
+
+ long tocOffset = get_load_const_from_toc_offset(iaddr);
+ address tocLoc = iaddr + tocOffset;
+ return *(address *)(tocLoc);
+ } else {
+ fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
+ fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
+ *(unsigned long*)instruction_addr,
+ *(unsigned long*)(instruction_addr+8),
+ call_far_patchable_size());
+ Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
+ ShouldNotReachHere();
+ return NULL;
+ }
+}
+
+void MacroAssembler::align_call_far_patchable(address pc) {
+ if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+// Read from the polling page.
+// Use TM or TMY instruction, depending on read offset.
+// offset = 0: Use TM, safepoint polling.
+// offset < 0: Use TMY, profiling safepoint polling.
+void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
+ if (Immediate::is_uimm12(offset)) {
+ z_tm(offset, polling_page_address, mask_safepoint);
+ } else {
+ z_tmy(offset, polling_page_address, mask_profiling);
+ }
+}
+
+// Check whether z_instruction is a read access to the polling page
+// which was emitted by load_from_polling_page(..).
+bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
+ unsigned long z_instruction;
+ unsigned int ilen = get_instruction(instr_loc, &z_instruction);
+
+ if (ilen == 2) { return false; } // It's none of the allowed instructions.
+
+ if (ilen == 4) {
+ if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
+
+ int ms = inv_mask(z_instruction,8,32); // mask
+ int ra = inv_reg(z_instruction,16,32); // base register
+ int ds = inv_uimm12(z_instruction); // displacement
+
+ if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
+ return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
+ }
+
+ } else { /* if (ilen == 6) */
+
+ assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
+
+ if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
+
+ int ms = inv_mask(z_instruction,8,48); // mask
+ int ra = inv_reg(z_instruction,16,48); // base register
+ int ds = inv_simm20(z_instruction); // displacement
+ }
+
+ return true;
+}
+
+// Extract poll address from instruction and ucontext.
+address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
+ assert(ucontext != NULL, "must have ucontext");
+ ucontext_t* uc = (ucontext_t*) ucontext;
+ unsigned long z_instruction;
+ unsigned int ilen = get_instruction(instr_loc, &z_instruction);
+
+ if (ilen == 4 && is_z_tm(z_instruction)) {
+ int ra = inv_reg(z_instruction, 16, 32); // base register
+ int ds = inv_uimm12(z_instruction); // displacement
+ address addr = (address)uc->uc_mcontext.gregs[ra];
+ return addr + ds;
+ } else if (ilen == 6 && is_z_tmy(z_instruction)) {
+ int ra = inv_reg(z_instruction, 16, 48); // base register
+ int ds = inv_simm20(z_instruction); // displacement
+ address addr = (address)uc->uc_mcontext.gregs[ra];
+ return addr + ds;
+ }
+
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Extract poll register from instruction.
+uint MacroAssembler::get_poll_register(address instr_loc) {
+ unsigned long z_instruction;
+ unsigned int ilen = get_instruction(instr_loc, &z_instruction);
+
+ if (ilen == 4 && is_z_tm(z_instruction)) {
+ return (uint)inv_reg(z_instruction, 16, 32); // base register
+ } else if (ilen == 6 && is_z_tmy(z_instruction)) {
+ return (uint)inv_reg(z_instruction, 16, 48); // base register
+ }
+
+ ShouldNotReachHere();
+ return 0;
+}
+
+bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) {
+ ShouldNotCallThis();
+ return false;
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
+ assert_different_registers(tmp1, tmp2);
+ z_sllg(tmp2, thread, os::get_serialize_page_shift_count());
+ load_const_optimized(tmp1, (long) os::get_memory_serialize_page());
+
+ int mask = os::get_serialize_page_mask();
+ if (Immediate::is_uimm16(mask)) {
+ z_nill(tmp2, mask);
+ z_llghr(tmp2, tmp2);
+ } else {
+ z_nilf(tmp2, mask);
+ z_llgfr(tmp2, tmp2);
+ }
+
+ z_release();
+ z_st(Z_R0, 0, tmp2, tmp1);
+}
+
+// Don't rely on register locking, always use Z_R1 as scratch register instead.
+void MacroAssembler::bang_stack_with_offset(int offset) {
+ // Stack grows down, caller passes positive offset.
+ assert(offset > 0, "must bang with positive offset");
+ if (Displacement::is_validDisp(-offset)) {
+ z_tmy(-offset, Z_SP, mask_stackbang);
+ } else {
+ add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!!
+ z_tm(0, Z_R1, mask_stackbang); // Just banging.
+ }
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register t1,
+ Label& slow_case) {
+ assert_different_registers(obj, var_size_in_bytes, t1);
+ Register end = t1;
+ Register thread = Z_thread;
+
+ z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
+ if (var_size_in_bytes == noreg) {
+ z_lay(end, Address(obj, con_size_in_bytes));
+ } else {
+ z_lay(end, Address(obj, var_size_in_bytes));
+ }
+ z_cg(end, Address(thread, JavaThread::tlab_end_offset()));
+ branch_optimized(bcondHigh, slow_case);
+
+ // Update the tlab top pointer.
+ z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
+
+ // Recover var_size_in_bytes if necessary.
+ if (var_size_in_bytes == end) {
+ z_sgr(var_size_in_bytes, obj);
+ }
+}
+
+// Emitter for interface method lookup.
+// input: recv_klass, intf_klass, itable_index
+// output: method_result
+// kills: itable_index, temp1_reg, Z_R0, Z_R1
+// TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
+// If the register is still not needed then, remove it.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register temp1_reg,
+ Register temp2_reg,
+ Label& no_such_interface) {
+
+ const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr.
+ const Register itable_entry_addr = Z_R1_scratch;
+ const Register itable_interface = Z_R0_scratch;
+
+ BLOCK_COMMENT("lookup_interface_method {");
+
+ // Load start of itable entries into itable_entry_addr.
+ z_llgf(vtable_len, Address(recv_klass, InstanceKlass::vtable_length_offset()));
+ z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
+
+ // Loop over all itable entries until desired interfaceOop(Rinterface) found.
+ const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset());
+
+ add2reg_with_index(itable_entry_addr,
+ vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(),
+ recv_klass, vtable_len);
+
+ const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
+ Label search;
+
+ bind(search);
+
+ // Handle IncompatibleClassChangeError.
+ // If the entry is NULL then we've reached the end of the table
+ // without finding the expected interface, so throw an exception.
+ load_and_test_long(itable_interface, Address(itable_entry_addr));
+ z_bre(no_such_interface);
+
+ add2reg(itable_entry_addr, itable_offset_search_inc);
+ z_cgr(itable_interface, intf_klass);
+ z_brne(search);
+
+ // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
+
+ const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
+ itableOffsetEntry::interface_offset_in_bytes()) -
+ itable_offset_search_inc;
+
+ // Compute itableMethodEntry and get method and entry point
+ // we use addressing with index and displacement, since the formula
+ // for computing the entry's offset has a fixed and a dynamic part,
+ // the latter depending on the matched interface entry and on the case,
+ // that the itable index has been passed as a register, not a constant value.
+ int method_offset = itableMethodEntry::method_offset_in_bytes();
+ // Fixed part (displacement), common operand.
+ Register itable_offset; // Dynamic part (index register).
+
+ if (itable_index.is_register()) {
+ // Compute the method's offset in that register, for the formula, see the
+ // else-clause below.
+ itable_offset = itable_index.as_register();
+
+ z_sllg(itable_offset, itable_offset, exact_log2(itableMethodEntry::size() * wordSize));
+ z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
+ } else {
+ itable_offset = Z_R1_scratch;
+ // Displacement increases.
+ method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
+
+ // Load index from itable.
+ z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
+ }
+
+ // Finally load the method's oop.
+ z_lg(method_result, method_offset, itable_offset, recv_klass);
+ BLOCK_COMMENT("} lookup_interface_method");
+}
+
+// Lookup for virtual method invocation.
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result) {
+ assert_different_registers(recv_klass, vtable_index.register_or_noreg());
+ assert(vtableEntry::size() * wordSize == wordSize,
+ "else adjust the scaling in the code below");
+
+ BLOCK_COMMENT("lookup_virtual_method {");
+
+ const int base = in_bytes(Klass::vtable_start_offset());
+
+ if (vtable_index.is_constant()) {
+ // Load with base + disp.
+ Address vtable_entry_addr(recv_klass,
+ vtable_index.as_constant() * wordSize +
+ base +
+ vtableEntry::method_offset_in_bytes());
+
+ z_lg(method_result, vtable_entry_addr);
+ } else {
+ // Shift index properly and load with base + index + disp.
+ Register vindex = vtable_index.as_register();
+ Address vtable_entry_addr(recv_klass, vindex,
+ base + vtableEntry::method_offset_in_bytes());
+
+ z_sllg(vindex, vindex, exact_log2(wordSize));
+ z_lg(method_result, vtable_entry_addr);
+ }
+ BLOCK_COMMENT("} lookup_virtual_method");
+}
+
+// Factor out code to call ic_miss_handler.
+// Generate code to call the inline cache miss handler.
+//
+// In most cases, this code will be generated out-of-line.
+// The method parameters are intended to provide some variability.
+// ICM - Label which has to be bound to the start of useful code (past any traps).
+// trapMarker - Marking byte for the generated illtrap instructions (if any).
+// Any value except 0x00 is supported.
+// = 0x00 - do not generate illtrap instructions.
+// use nops to fill ununsed space.
+// requiredSize - required size of the generated code. If the actually
+// generated code is smaller, use padding instructions to fill up.
+// = 0 - no size requirement, no padding.
+// scratch - scratch register to hold branch target address.
+//
+// The method returns the code offset of the bound label.
+unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
+ intptr_t startOffset = offset();
+
+ // Prevent entry at content_begin().
+ if (trapMarker != 0) {
+ z_illtrap(trapMarker);
+ }
+
+ // Load address of inline cache miss code into scratch register
+ // and branch to cache miss handler.
+ BLOCK_COMMENT("IC miss handler {");
+ BIND(ICM);
+ unsigned int labelOffset = offset();
+ AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
+
+ load_const_optimized(scratch, icmiss);
+ z_br(scratch);
+
+ // Fill unused space.
+ if (requiredSize > 0) {
+ while ((offset() - startOffset) < requiredSize) {
+ if (trapMarker == 0) {
+ z_nop();
+ } else {
+ z_illtrap(trapMarker);
+ }
+ }
+ }
+ BLOCK_COMMENT("} IC miss handler");
+ return labelOffset;
+}
+
+void MacroAssembler::nmethod_UEP(Label& ic_miss) {
+ Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
+ int klass_offset = oopDesc::klass_offset_in_bytes();
+ if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+ if (VM_Version::has_CompareBranch()) {
+ z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
+ } else {
+ z_ltgr(Z_ARG1, Z_ARG1);
+ z_bre(ic_miss);
+ }
+ }
+ // Compare cached class against klass from receiver.
+ compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
+ z_brne(ic_miss);
+}
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp1_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ RegisterOrConstant super_check_offset) {
+
+ const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ const int sco_offset = in_bytes(Klass::super_check_offset_offset());
+
+ bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+ bool need_slow_path = (must_load_sco ||
+ super_check_offset.constant_or_zero() == sc_offset);
+
+ // Input registers must not overlap.
+ assert_different_registers(sub_klass, super_klass, temp1_reg);
+ if (super_check_offset.is_register()) {
+ assert_different_registers(sub_klass, super_klass,
+ super_check_offset.as_register());
+ } else if (must_load_sco) {
+ assert(temp1_reg != noreg, "supply either a temp or a register offset");
+ }
+
+ const Register Rsuper_check_offset = temp1_reg;
+
+ NearLabel L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1 ||
+ (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+ "at most one NULL in the batch, usually");
+
+ BLOCK_COMMENT("check_klass_subtype_fast_path {");
+ // If the pointers are equal, we are done (e.g., String[] elements).
+ // This self-check enables sharing of secondary supertype arrays among
+ // non-primary types such as array-of-interface. Otherwise, each such
+ // type would need its own customized SSA.
+ // We move this check to the front of the fast path because many
+ // type checks are in fact trivially successful in this manner,
+ // so we get a nicely predicted branch right at the start of the check.
+ compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
+
+ // Check the supertype display, which is uint.
+ if (must_load_sco) {
+ z_llgf(Rsuper_check_offset, sco_offset, super_klass);
+ super_check_offset = RegisterOrConstant(Rsuper_check_offset);
+ }
+ Address super_check_addr(sub_klass, super_check_offset, 0);
+ z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
+
+ // This check has worked decisively for primary supers.
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
+ // This works in the same check above because of a tricky aliasing
+ // between the super_cache and the primary super display elements.
+ // (The 'super_check_addr' can address either, as the case requires.)
+ // Note that the cache is updated below if it does not help us find
+ // what we need immediately.
+ // So if it was a primary super, we can just fail immediately.
+ // Otherwise, it's the slow path for us (no success at this point).
+
+ // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label) \
+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \
+ else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
+
+ if (super_check_offset.is_register()) {
+ branch_optimized(Assembler::bcondEqual, *L_success);
+ z_cfi(super_check_offset.as_register(), sc_offset);
+ if (L_failure == &L_fallthrough) {
+ branch_optimized(Assembler::bcondEqual, *L_slow_path);
+ } else {
+ branch_optimized(Assembler::bcondNotEqual, *L_failure);
+ final_jmp(*L_slow_path);
+ }
+ } else if (super_check_offset.as_constant() == sc_offset) {
+ // Need a slow path; fast failure is impossible.
+ if (L_slow_path == &L_fallthrough) {
+ branch_optimized(Assembler::bcondEqual, *L_success);
+ } else {
+ branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
+ final_jmp(*L_success);
+ }
+ } else {
+ // No slow path; it's a fast decision.
+ if (L_failure == &L_fallthrough) {
+ branch_optimized(Assembler::bcondEqual, *L_success);
+ } else {
+ branch_optimized(Assembler::bcondNotEqual, *L_failure);
+ final_jmp(*L_success);
+ }
+ }
+
+ bind(L_fallthrough);
+#undef local_brc
+#undef final_jmp
+ BLOCK_COMMENT("} check_klass_subtype_fast_path");
+ // fallthru (to slow path)
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
+ Register Rsuperklass,
+ Register Rarray_ptr, // tmp
+ Register Rlength, // tmp
+ Label* L_success,
+ Label* L_failure) {
+ // Input registers must not overlap.
+ // Also check for R1 which is explicitely used here.
+ assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
+ NearLabel L_fallthrough, L_loop;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ const int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+
+ const int length_offset = Array<Klass*>::length_offset_in_bytes();
+ const int base_offset = Array<Klass*>::base_offset_in_bytes();
+
+ // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label) \
+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \
+ else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
+
+ NearLabel loop_iterate, loop_count, match;
+
+ BLOCK_COMMENT("check_klass_subtype_slow_path {");
+ z_lg(Rarray_ptr, ss_offset, Rsubklass);
+
+ load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
+ branch_optimized(Assembler::bcondZero, *L_failure);
+
+ // Oops in table are NO MORE compressed.
+ z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
+ z_bre(match); // Shortcut for array length = 1.
+
+ // No match yet, so we must walk the array's elements.
+ z_lngfr(Rlength, Rlength);
+ z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
+ z_llill(Z_R1, BytesPerWord); // Set increment/end index.
+ add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord
+ z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord
+ z_bru(loop_count);
+
+ BIND(loop_iterate);
+ z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
+ z_bre(match);
+ BIND(loop_count);
+ z_brxlg(Rlength, Z_R1, loop_iterate);
+
+ // Rsuperklass not found among secondary super classes -> failure.
+ branch_optimized(Assembler::bcondAlways, *L_failure);
+
+ // Got a hit. Return success (zero result). Set cache.
+ // Cache load doesn't happen here. For speed it is directly emitted by the compiler.
+
+ BIND(match);
+
+ z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
+
+ final_jmp(*L_success);
+
+ // Exit to the surrounding code.
+ BIND(L_fallthrough);
+#undef local_brc
+#undef final_jmp
+ BLOCK_COMMENT("} check_klass_subtype_slow_path");
+}
+
+// Emitter for combining fast and slow path.
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp1_reg,
+ Register temp2_reg,
+ Label& L_success) {
+ NearLabel failure;
+ BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
+ check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
+ &L_success, &failure, NULL);
+ check_klass_subtype_slow_path(sub_klass, super_klass,
+ temp1_reg, temp2_reg, &L_success, NULL);
+ BIND(failure);
+ BLOCK_COMMENT("} check_klass_subtype");
+}
+
+// Increment a counter at counter_address when the eq condition code is
+// set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
+void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
+ Label l;
+ z_brne(l);
+ load_const(tmp1_reg, counter_address);
+ add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
+ z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
+ bind(l);
+}
+
+// Semantics are dependent on the slow_case label:
+// If the slow_case label is not NULL, failure to biased-lock the object
+// transfers control to the location of the slow_case label. If the
+// object could be biased-locked, control is transferred to the done label.
+// The condition code is unpredictable.
+//
+// If the slow_case label is NULL, failure to biased-lock the object results
+// in a transfer of control to the done label with a condition code of not_equal.
+// If the biased-lock could be successfully obtained, control is transfered to
+// the done label with a condition code of equal.
+// It is mandatory to react on the condition code At the done label.
+//
+void MacroAssembler::biased_locking_enter(Register obj_reg,
+ Register mark_reg,
+ Register temp_reg,
+ Register temp2_reg, // May be Z_RO!
+ Label &done,
+ Label *slow_case) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
+
+ Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise.
+
+ BLOCK_COMMENT("biased_locking_enter {");
+
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid.
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits.
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits,
+ "biased locking makes assumptions about bit layout");
+ z_lr(temp_reg, mark_reg);
+ z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
+ z_chi(temp_reg, markOopDesc::biased_lock_pattern);
+ z_brne(cas_label); // Try cas if object is not biased, i.e. cannot be biased locked.
+
+ load_prototype_header(temp_reg, obj_reg);
+ load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
+
+ z_ogr(temp_reg, Z_thread);
+ z_xgr(temp_reg, mark_reg);
+ z_ngr(temp_reg, temp2_reg);
+ if (PrintBiasedLockingStatistics) {
+ increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg);
+ // Restore mark_reg.
+ z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
+ }
+ branch_optimized(Assembler::bcondEqual, done); // Biased lock obtained, return success.
+
+ Label try_revoke_bias;
+ Label try_rebias;
+ Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
+
+ //----------------------------------------------------------------------------
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ z_tmll(temp_reg, markOopDesc::biased_lock_mask_in_place);
+ z_brnaz(try_revoke_bias);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ z_tmll(temp_reg, markOopDesc::epoch_mask_in_place);
+ z_brnaz(try_rebias);
+
+ //----------------------------------------------------------------------------
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+ z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place |
+ markOopDesc::epoch_mask_in_place);
+ z_lgr(temp_reg, Z_thread);
+ z_llgfr(mark_reg, mark_reg);
+ z_ogr(temp_reg, mark_reg);
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+ z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+
+ if (PrintBiasedLockingStatistics) {
+ increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(),
+ temp_reg, temp2_reg);
+ }
+ if (slow_case != NULL) {
+ branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
+ }
+ branch_optimized(Assembler::bcondAlways, done); // Biased lock status given in condition code.
+
+ //----------------------------------------------------------------------------
+ bind(try_rebias);
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+
+ z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+ load_prototype_header(temp_reg, obj_reg);
+ z_llgfr(mark_reg, mark_reg);
+
+ z_ogr(temp_reg, Z_thread);
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+ z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+
+ if (PrintBiasedLockingStatistics) {
+ increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg);
+ }
+ if (slow_case != NULL) {
+ branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
+ }
+ z_bru(done); // Biased lock status given in condition code.
+
+ //----------------------------------------------------------------------------
+ bind(try_revoke_bias);
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+ load_prototype_header(temp_reg, obj_reg);
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+ z_csg(mark_reg, temp_reg, 0, obj_reg);
+
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+ if (PrintBiasedLockingStatistics) {
+ // z_cgr(mark_reg, temp2_reg);
+ increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg);
+ }
+
+ bind(cas_label);
+ BLOCK_COMMENT("} biased_locking_enter");
+}
+
+void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) {
+ // Check for biased locking unlock case, which is a no-op
+ // Note: we do not have to check the thread ID for two reasons.
+ // First, the interpreter checks for IllegalMonitorStateException at
+ // a higher level. Second, if the bias was revoked while we held the
+ // lock, the object could not be rebiased toward another thread, so
+ // the bias bit would be clear.
+ BLOCK_COMMENT("biased_locking_exit {");
+
+ z_lg(temp_reg, 0, mark_addr);
+ z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
+
+ z_chi(temp_reg, markOopDesc::biased_lock_pattern);
+ z_bre(done);
+ BLOCK_COMMENT("} biased_locking_exit");
+}
+
+void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
+ Register displacedHeader = temp1;
+ Register currentHeader = temp1;
+ Register temp = temp2;
+ NearLabel done, object_has_monitor;
+
+ BLOCK_COMMENT("compiler_fast_lock_object {");
+
+ // Load markOop from oop into mark.
+ z_lg(displacedHeader, 0, oop);
+
+ if (try_bias) {
+ biased_locking_enter(oop, displacedHeader, temp, Z_R0, done);
+ }
+
+ // Handle existing monitor.
+ if ((EmitSync & 0x01) == 0) {
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+ z_lr(temp, displacedHeader);
+ z_nill(temp, markOopDesc::monitor_value);
+ z_brne(object_has_monitor);
+ }
+
+ // Set mark to markOop | markOopDesc::unlocked_value.
+ z_oill(displacedHeader, markOopDesc::unlocked_value);
+
+ // Load Compare Value application register.
+
+ // Initialize the box (must happen before we update the object mark).
+ z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box);
+
+ // Memory Fence (in cmpxchgd)
+ // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
+
+ // If the compare-and-swap succeeded, then we found an unlocked object and we
+ // have now locked it.
+ z_csg(displacedHeader, box, 0, oop);
+ assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture.
+ z_bre(done);
+
+ // We did not see an unlocked object so try the fast recursive case.
+
+ z_sgr(currentHeader, Z_SP);
+ load_const_optimized(temp, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+
+ z_ngr(currentHeader, temp);
+ // z_brne(done);
+ // z_release();
+ z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box);
+
+ z_bru(done);
+
+ if ((EmitSync & 0x01) == 0) {
+ Register zero = temp;
+ Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
+ bind(object_has_monitor);
+ // The object's monitor m is unlocked iff m->owner == NULL,
+ // otherwise m->owner may contain a thread or a stack address.
+ //
+ // Try to CAS m->owner from NULL to current thread.
+ z_lghi(zero, 0);
+ // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+ z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
+ // Store a non-null value into the box.
+ z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
+#ifdef ASSERT
+ z_brne(done);
+ // We've acquired the monitor, check some invariants.
+ // Invariant 1: _recursions should be 0.
+ asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
+ "monitor->_recursions should be 0", -1);
+ z_ltgr(zero, zero); // Set CR=EQ.
+#endif
+ }
+ bind(done);
+
+ BLOCK_COMMENT("} compiler_fast_lock_object");
+ // If locking was successful, CR should indicate 'EQ'.
+ // The compiler or the native wrapper generates a branch to the runtime call
+ // _complete_monitor_locking_Java.
+}
+
+void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
+ Register displacedHeader = temp1;
+ Register currentHeader = temp2;
+ Register temp = temp1;
+ Register monitor = temp2;
+
+ Label done, object_has_monitor;
+
+ BLOCK_COMMENT("compiler_fast_unlock_object {");
+
+ if (try_bias) {
+ biased_locking_exit(oop, currentHeader, done);
+ }
+
+ // Find the lock address and load the displaced header from the stack.
+ // if the displaced header is zero, we have a recursive unlock.
+ load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ z_bre(done);
+
+ // Handle existing monitor.
+ if ((EmitSync & 0x02) == 0) {
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
+ guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+ z_nill(currentHeader, markOopDesc::monitor_value);
+ z_brne(object_has_monitor);
+ }
+
+ // Check if it is still a light weight lock, this is true if we see
+ // the stack address of the basicLock in the markOop of the object
+ // copy box to currentHeader such that csg does not kill it.
+ z_lgr(currentHeader, box);
+ z_csg(currentHeader, displacedHeader, 0, oop);
+ z_bru(done); // Csg sets CR as desired.
+
+ // Handle existing monitor.
+ if ((EmitSync & 0x02) == 0) {
+ bind(object_has_monitor);
+ z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ z_brne(done);
+ z_release();
+ z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+ }
+
+ bind(done);
+
+ BLOCK_COMMENT("} compiler_fast_unlock_object");
+ // flag == EQ indicates success
+ // flag == NE indicates failure
+}
+
+// Write to card table for modification at store_addr - register is destroyed afterwards.
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register tmp) {
+ CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableForRS ||
+ bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+ assert_different_registers(store_addr, tmp);
+ z_srlg(store_addr, store_addr, CardTableModRefBS::card_shift);
+ load_absolute_address(tmp, (address)bs->byte_map_base);
+ z_agr(store_addr, tmp);
+ z_mvi(0, store_addr, 0); // Store byte 0.
+}
+
+#if INCLUDE_ALL_GCS
+
+//------------------------------------------------------
+// General G1 pre-barrier generator.
+// Purpose: record the previous value if it is not null.
+// All non-tmps are preserved.
+//------------------------------------------------------
+void MacroAssembler::g1_write_barrier_pre(Register Robj,
+ RegisterOrConstant offset,
+ Register Rpre_val, // Ideally, this is a non-volatile register.
+ Register Rval, // Will be preserved.
+ Register Rtmp1, // If Rpre_val is volatile, either Rtmp1
+ Register Rtmp2, // or Rtmp2 has to be non-volatile..
+ bool pre_val_needed // Save Rpre_val across runtime call, caller uses it.
+ ) {
+ Label callRuntime, filtered;
+ const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active());
+ const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
+ const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index());
+ assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
+
+ BLOCK_COMMENT("g1_write_barrier_pre {");
+
+ // Is marking active?
+ // Note: value is loaded for test purposes only. No further use here.
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
+ } else {
+ guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
+ }
+ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
+
+ // Do we need to load the previous value into Rpre_val?
+ if (Robj != noreg) {
+ // Load the previous value...
+ Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+ if (UseCompressedOops) {
+ z_llgf(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
+ } else {
+ z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
+ }
+ }
+ assert(Rpre_val != noreg, "must have a real register");
+
+ // Is the previous value NULL?
+ // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
+ // Register contents is preserved across runtime call if caller requests to do so.
+ z_ltgr(Rpre_val, Rpre_val);
+ z_bre(filtered); // previous value is NULL, so we don't need to record it.
+
+ // Decode the oop now. We know it's not NULL.
+ if (Robj != noreg && UseCompressedOops) {
+ oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false);
+ }
+
+ // OK, it's not filtered, so we'll need to call enqueue.
+
+ // We can store the original value in the thread's buffer
+ // only if index > 0. Otherwise, we need runtime to handle.
+ // (The index field is typed as size_t.)
+ Register Rbuffer = Rtmp1, Rindex = Rtmp2;
+
+ z_lg(Rbuffer, buffer_offset, Z_thread);
+
+ load_and_test_long(Rindex, Address(Z_thread, index_offset));
+ z_bre(callRuntime); // If index == 0, goto runtime.
+
+ add2reg(Rindex, -wordSize); // Decrement index.
+ z_stg(Rindex, index_offset, Z_thread);
+
+ // Record the previous value.
+ z_stg(Rpre_val, 0, Rbuffer, Rindex);
+ z_bru(filtered); // We are done.
+
+ Rbuffer = noreg; // end of life
+ Rindex = noreg; // end of life
+
+ bind(callRuntime);
+
+ // Save Rpre_val (result) over runtime call.
+ // Requires Rtmp1, Rtmp2, or Rpre_val to be non-volatile.
+ Register Rpre_save = Rpre_val;
+ if (pre_val_needed && Rpre_val->is_volatile()) {
+ guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
+ Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
+ }
+ lgr_if_needed(Rpre_save, Rpre_val);
+
+ // Preserve inputs by spilling them into the top frame.
+ if (Robj != noreg && Robj->is_volatile()) {
+ z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+ }
+ if (offset.is_register() && offset.as_register()->is_volatile()) {
+ Register Roff = offset.as_register();
+ z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+ }
+ if (Rval != noreg && Rval->is_volatile()) {
+ z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+ }
+
+ // Push frame to protect top frame with return pc and spilled register values.
+ save_return_pc();
+ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
+
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, Z_thread);
+
+ pop_frame();
+ restore_return_pc();
+
+ // Restore spilled values.
+ if (Robj != noreg && Robj->is_volatile()) {
+ z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
+ }
+ if (offset.is_register() && offset.as_register()->is_volatile()) {
+ Register Roff = offset.as_register();
+ z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP);
+ }
+ if (Rval != noreg && Rval->is_volatile()) {
+ z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
+ }
+
+ // Restore Rpre_val (result) after runtime call.
+ lgr_if_needed(Rpre_val, Rpre_save);
+
+ bind(filtered);
+ BLOCK_COMMENT("} g1_write_barrier_pre");
+}
+
+// General G1 post-barrier generator.
+// Purpose: Store cross-region card.
+void MacroAssembler::g1_write_barrier_post(Register Rstore_addr,
+ Register Rnew_val,
+ Register Rtmp1,
+ Register Rtmp2,
+ Register Rtmp3) {
+ Label callRuntime, filtered;
+
+ assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
+
+ G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+
+ BLOCK_COMMENT("g1_write_barrier_post {");
+
+ // Does store cross heap regions?
+ // It does if the two addresses specify different grain addresses.
+ if (G1RSBarrierRegionFilter) {
+ if (VM_Version::has_DistinctOpnds()) {
+ z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
+ } else {
+ z_lgr(Rtmp1, Rstore_addr);
+ z_xgr(Rtmp1, Rnew_val);
+ }
+ z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
+ z_bre(filtered);
+ }
+
+ // Crosses regions, storing NULL?
+#ifdef ASSERT
+ z_ltgr(Rnew_val, Rnew_val);
+ asm_assert_ne("null oop not allowed (G1)", 0x255); // TODO: also on z? Checked by caller on PPC64, so following branch is obsolete:
+ z_bre(filtered); // Safety net: don't break if we have a NULL oop.
+#endif
+ Rnew_val = noreg; // end of lifetime
+
+ // Storing region crossing non-NULL, is card already dirty?
+ assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code");
+ assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
+ // Make sure not to use Z_R0 for any of these registers.
+ Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
+ Register Rbase = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
+
+ // calculate address of card
+ load_const_optimized(Rbase, (address)bs->byte_map_base); // Card table base.
+ z_srlg(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); // Index into card table.
+ add2reg_with_index(Rcard_addr, 0, Rcard_addr, Rbase); // Explicit calculation needed for cli.
+ Rbase = noreg; // end of lifetime
+
+ // Filter young.
+ assert((unsigned int)G1SATBCardTableModRefBS::g1_young_card_val() <= 255, "otherwise check this code");
+ z_cli(0, Rcard_addr, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ z_bre(filtered);
+
+ // Check the card value. If dirty, we're done.
+ // This also avoids false sharing of the (already dirty) card.
+ z_sync(); // Required to support concurrent cleaning.
+ assert((unsigned int)CardTableModRefBS::dirty_card_val() <= 255, "otherwise check this code");
+ z_cli(0, Rcard_addr, CardTableModRefBS::dirty_card_val()); // Reload after membar.
+ z_bre(filtered);
+
+ // Storing a region crossing, non-NULL oop, card is clean.
+ // Dirty card and log.
+ z_mvi(0, Rcard_addr, CardTableModRefBS::dirty_card_val());
+
+ Register Rcard_addr_x = Rcard_addr;
+ Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
+ Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
+ const int qidx_off = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_index());
+ const int qbuf_off = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
+ if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
+ Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
+ }
+ lgr_if_needed(Rcard_addr_x, Rcard_addr);
+
+ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
+ z_bre(callRuntime); // Index == 0 then jump to runtime.
+
+ z_lg(Rqueue_buf, qbuf_off, Z_thread);
+
+ add2reg(Rqueue_index, -wordSize); // Decrement index.
+ z_stg(Rqueue_index, qidx_off, Z_thread);
+
+ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
+ z_bru(filtered);
+
+ bind(callRuntime);
+
+ // TODO: do we need a frame? Introduced to be on the safe side.
+ bool needs_frame = true;
+
+ // VM call need frame to access(write) O register.
+ if (needs_frame) {
+ save_return_pc();
+ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
+ }
+
+ // Save the live input values.
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr_x, Z_thread);
+
+ if (needs_frame) {
+ pop_frame();
+ restore_return_pc();
+ }
+
+ bind(filtered);
+
+ BLOCK_COMMENT("} g1_write_barrier_post");
+}
+#endif // INCLUDE_ALL_GCS
+
+// Last_Java_sp must comply to the rules in frame_s390.hpp.
+void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
+ BLOCK_COMMENT("set_last_Java_frame {");
+
+ // Always set last_Java_pc and flags first because once last_Java_sp
+ // is visible has_last_Java_frame is true and users will look at the
+ // rest of the fields. (Note: flags should always be zero before we
+ // get here so doesn't need to be set.)
+
+ // Verify that last_Java_pc was zeroed on return to Java.
+ if (allow_relocation) {
+ asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
+ Z_thread,
+ "last_Java_pc not zeroed before leaving Java",
+ 0x200);
+ } else {
+ asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
+ Z_thread,
+ "last_Java_pc not zeroed before leaving Java",
+ 0x200);
+ }
+
+ // When returning from calling out from Java mode the frame anchor's
+ // last_Java_pc will always be set to NULL. It is set here so that
+ // if we are doing a call to native (not VM) that we capture the
+ // known pc and don't have to rely on the native call having a
+ // standard frame linkage where we can find the pc.
+ if (last_Java_pc!=noreg) {
+ z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
+ }
+
+ // This membar release is not required on z/Architecture, since the sequence of stores
+ // in maintained. Nevertheless, we leave it in to document the required ordering.
+ // The implementation of z_release() should be empty.
+ // z_release();
+
+ z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
+ BLOCK_COMMENT("} set_last_Java_frame");
+}
+
+void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
+ BLOCK_COMMENT("reset_last_Java_frame {");
+
+ if (allow_relocation) {
+ asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
+ Z_thread,
+ "SP was not set, still zero",
+ 0x202);
+ } else {
+ asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
+ Z_thread,
+ "SP was not set, still zero",
+ 0x202);
+ }
+
+ // _last_Java_sp = 0
+ // Clearing storage must be atomic here, so don't use clear_mem()!
+ store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
+
+ // _last_Java_pc = 0
+ store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
+
+ BLOCK_COMMENT("} reset_last_Java_frame");
+ return;
+}
+
+void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
+ assert_different_registers(sp, tmp1);
+
+ // We cannot trust that code generated by the C++ compiler saves R14
+ // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
+ // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
+ // Therefore we load the PC into tmp1 and let set_last_Java_frame() save
+ // it into the frame anchor.
+ get_PC(tmp1);
+ set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
+}
+
+void MacroAssembler::set_thread_state(JavaThreadState new_state) {
+ z_release();
+
+ assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
+ assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
+ store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result) {
+ verify_thread();
+
+ z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
+ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*));
+
+ verify_oop(oop_result);
+}
+
+void MacroAssembler::get_vm_result_2(Register result) {
+ verify_thread();
+
+ z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset()));
+ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*));
+}
+
+// We require that C code which does not return a value in vm_result will
+// leave it undisturbed.
+void MacroAssembler::set_vm_result(Register oop_result) {
+ z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
+}
+
+// Explicit null checks (used for method handle code).
+void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
+ if (!ImplicitNullChecks) {
+ NearLabel ok;
+
+ compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
+
+ // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
+ address exception_entry = Interpreter::throw_NullPointerException_entry();
+ load_absolute_address(reg, exception_entry);
+ z_br(reg);
+
+ bind(ok);
+ } else {
+ if (needs_explicit_null_check((intptr_t)offset)) {
+ // Provoke OS NULL exception if reg = NULL by
+ // accessing M[reg] w/o changing any registers.
+ z_lg(tmp, 0, reg);
+ }
+ // else
+ // Nothing to do, (later) access of M[reg + offset]
+ // will provoke OS NULL exception if reg = NULL.
+ }
+}
+
+//-------------------------------------
+// Compressed Klass Pointers
+//-------------------------------------
+
+// Klass oop manipulations if compressed.
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+ Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
+ address base = Universe::narrow_klass_base();
+ int shift = Universe::narrow_klass_shift();
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+ BLOCK_COMMENT("cKlass encoder {");
+
+#ifdef ASSERT
+ Label ok;
+ z_tmll(current, KlassAlignmentInBytes-1); // Check alignment.
+ z_brc(Assembler::bcondAllZero, ok);
+ // The plain disassembler does not recognize illtrap. It instead displays
+ // a 32-bit value. Issueing two illtraps assures the disassembler finds
+ // the proper beginning of the next instruction.
+ z_illtrap(0xee);
+ z_illtrap(0xee);
+ bind(ok);
+#endif
+
+ if (base != NULL) {
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ lgr_if_needed(dst, current);
+ z_aih(dst, -((int)base_h)); // Base has no set bits in lower half.
+ } else if ((base_h == 0) && (base_l != 0)) {
+ lgr_if_needed(dst, current);
+ z_agfi(dst, -(int)base_l);
+ } else {
+ load_const(Z_R0, base);
+ lgr_if_needed(dst, current);
+ z_sgr(dst, Z_R0);
+ }
+ current = dst;
+ }
+ if (shift != 0) {
+ assert (LogKlassAlignmentInBytes == shift, "decode alg wrong");
+ z_srlg(dst, current, shift);
+ current = dst;
+ }
+ lgr_if_needed(dst, current); // Move may be required (if neither base nor shift != 0).
+
+ BLOCK_COMMENT("} cKlass encoder");
+}
+
+// This function calculates the size of the code generated by
+// decode_klass_not_null(register dst, Register src)
+// when (Universe::heap() != NULL). Hence, if the instructions
+// it generates change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+ address base = Universe::narrow_klass_base();
+ int shift_size = Universe::narrow_klass_shift() == 0 ? 0 : 6; /* sllg */
+ int addbase_size = 0;
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+ if (base != NULL) {
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ addbase_size += 6; /* aih */
+ } else if ((base_h == 0) && (base_l != 0)) {
+ addbase_size += 6; /* algfi */
+ } else {
+ addbase_size += load_const_size();
+ addbase_size += 4; /* algr */
+ }
+ }
+#ifdef ASSERT
+ addbase_size += 10;
+ addbase_size += 2; // Extra sigill.
+#endif
+ return addbase_size + shift_size;
+}
+
+// !!! If the instructions that get generated here change
+// then function instr_size_for_decode_klass_not_null()
+// needs to get updated.
+// This variant of decode_klass_not_null() must generate predictable code!
+// The code must only depend on globally known parameters.
+void MacroAssembler::decode_klass_not_null(Register dst) {
+ address base = Universe::narrow_klass_base();
+ int shift = Universe::narrow_klass_shift();
+ int beg_off = offset();
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+ BLOCK_COMMENT("cKlass decoder (const size) {");
+
+ if (shift != 0) { // Shift required?
+ z_sllg(dst, dst, shift);
+ }
+ if (base != NULL) {
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ z_aih(dst, base_h); // Base has no set bits in lower half.
+ } else if ((base_h == 0) && (base_l != 0)) {
+ z_algfi(dst, base_l); // Base has no set bits in upper half.
+ } else {
+ load_const(Z_R0, base); // Base has set bits everywhere.
+ z_algr(dst, Z_R0);
+ }
+ }
+
+#ifdef ASSERT
+ Label ok;
+ z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
+ z_brc(Assembler::bcondAllZero, ok);
+ // The plain disassembler does not recognize illtrap. It instead displays
+ // a 32-bit value. Issueing two illtraps assures the disassembler finds
+ // the proper beginning of the next instruction.
+ z_illtrap(0xd1);
+ z_illtrap(0xd1);
+ bind(ok);
+#endif
+ assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
+
+ BLOCK_COMMENT("} cKlass decoder (const size)");
+}
+
+// This variant of decode_klass_not_null() is for cases where
+// 1) the size of the generated instructions may vary
+// 2) the result is (potentially) stored in a register different from the source.
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+ address base = Universe::narrow_klass_base();
+ int shift = Universe::narrow_klass_shift();
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+
+ BLOCK_COMMENT("cKlass decoder {");
+
+ if (src == noreg) src = dst;
+
+ if (shift != 0) { // Shift or at least move required?
+ z_sllg(dst, src, shift);
+ } else {
+ lgr_if_needed(dst, src);
+ }
+
+ if (base != NULL) {
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ z_aih(dst, base_h); // Base has not set bits in lower half.
+ } else if ((base_h == 0) && (base_l != 0)) {
+ z_algfi(dst, base_l); // Base has no set bits in upper half.
+ } else {
+ load_const_optimized(Z_R0, base); // Base has set bits everywhere.
+ z_algr(dst, Z_R0);
+ }
+ }
+
+#ifdef ASSERT
+ Label ok;
+ z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
+ z_brc(Assembler::bcondAllZero, ok);
+ // The plain disassembler does not recognize illtrap. It instead displays
+ // a 32-bit value. Issueing two illtraps assures the disassembler finds
+ // the proper beginning of the next instruction.
+ z_illtrap(0xd2);
+ z_illtrap(0xd2);
+ bind(ok);
+#endif
+ BLOCK_COMMENT("} cKlass decoder");
+}
+
+void MacroAssembler::load_klass(Register klass, Address mem) {
+ if (UseCompressedClassPointers) {
+ z_llgf(klass, mem);
+ // Attention: no null check here!
+ decode_klass_not_null(klass);
+ } else {
+ z_lg(klass, mem);
+ }
+}
+
+void MacroAssembler::load_klass(Register klass, Register src_oop) {
+ if (UseCompressedClassPointers) {
+ z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
+ // Attention: no null check here!
+ decode_klass_not_null(klass);
+ } else {
+ z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
+ }
+}
+
+void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) {
+ assert_different_registers(Rheader, Rsrc_oop);
+ load_klass(Rheader, Rsrc_oop);
+ z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset()));
+}
+
+void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
+ if (UseCompressedClassPointers) {
+ assert_different_registers(dst_oop, klass, Z_R0);
+ if (ck == noreg) ck = klass;
+ encode_klass_not_null(ck, klass);
+ z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+ } else {
+ z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+ }
+}
+
+void MacroAssembler::store_klass_gap(Register s, Register d) {
+ if (UseCompressedClassPointers) {
+ assert(s != d, "not enough registers");
+ z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
+ }
+}
+
+// Compare klass ptr in memory against klass ptr in register.
+//
+// Rop1 - klass in register, always uncompressed.
+// disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
+// Rbase - Base address of cKlass in memory.
+// maybeNULL - True if Rop1 possibly is a NULL.
+void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) {
+
+ BLOCK_COMMENT("compare klass ptr {");
+
+ if (UseCompressedClassPointers) {
+ const int shift = Universe::narrow_klass_shift();
+ address base = Universe::narrow_klass_base();
+
+ assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift");
+ assert_different_registers(Rop1, Z_R0);
+ assert_different_registers(Rop1, Rbase, Z_R1);
+
+ // First encode register oop and then compare with cOop in memory.
+ // This sequence saves an unnecessary cOop load and decode.
+ if (base == NULL) {
+ if (shift == 0) {
+ z_cl(Rop1, disp, Rbase); // Unscaled
+ } else {
+ z_srlg(Z_R0, Rop1, shift); // ZeroBased
+ z_cl(Z_R0, disp, Rbase);
+ }
+ } else { // HeapBased
+#ifdef ASSERT
+ bool used_R0 = true;
+ bool used_R1 = true;
+#endif
+ Register current = Rop1;
+ Label done;
+
+ if (maybeNULL) { // NULL ptr must be preserved!
+ z_ltgr(Z_R0, current);
+ z_bre(done);
+ current = Z_R0;
+ }
+
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ lgr_if_needed(Z_R0, current);
+ z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
+ } else if ((base_h == 0) && (base_l != 0)) {
+ lgr_if_needed(Z_R0, current);
+ z_agfi(Z_R0, -(int)base_l);
+ } else {
+ int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
+ add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
+ }
+
+ if (shift != 0) {
+ z_srlg(Z_R0, Z_R0, shift);
+ }
+ bind(done);
+ z_cl(Z_R0, disp, Rbase);
+#ifdef ASSERT
+ if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
+ if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
+#endif
+ }
+ } else {
+ z_clg(Rop1, disp, Z_R0, Rbase);
+ }
+ BLOCK_COMMENT("} compare klass ptr");
+}
+
+//---------------------------
+// Compressed oops
+//---------------------------
+
+void MacroAssembler::encode_heap_oop(Register oop) {
+ oop_encoder(oop, oop, true /*maybe null*/);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register oop) {
+ oop_encoder(oop, oop, false /*not null*/);
+}
+
+// Called with something derived from the oop base. e.g. oop_base>>3.
+int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
+ unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff;
+ unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
+ unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
+ unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
+ unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
+ + (oop_base_lh == 0 ? 0:1)
+ + (oop_base_hl == 0 ? 0:1)
+ + (oop_base_hh == 0 ? 0:1);
+
+ assert(oop_base != 0, "This is for HeapBased cOops only");
+
+ if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2.
+ uint64_t pow2_offset = 0x10000 - oop_base_ll;
+ if (pow2_offset < 0x8000) { // This might not be necessary.
+ uint64_t oop_base2 = oop_base + pow2_offset;
+
+ oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff;
+ oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
+ oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
+ oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
+ n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
+ (oop_base_lh == 0 ? 0:1) +
+ (oop_base_hl == 0 ? 0:1) +
+ (oop_base_hh == 0 ? 0:1);
+ if (n_notzero_parts == 1) {
+ assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
+ return -pow2_offset;
+ }
+ }
+ }
+ return 0;
+}
+
+// If base address is offset from a straight power of two by just a few pages,
+// return this offset to the caller for a possible later composite add.
+// TODO/FIX: will only work correctly for 4k pages.
+int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
+ int pow2_offset = get_oop_base_pow2_offset(oop_base);
+
+ load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
+
+ return pow2_offset;
+}
+
+int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
+ int offset = get_oop_base(Rbase, oop_base);
+ z_lcgr(Rbase, Rbase);
+ return -offset;
+}
+
+// Compare compressed oop in memory against oop in register.
+// Rop1 - Oop in register.
+// disp - Offset of cOop in memory.
+// Rbase - Base address of cOop in memory.
+// maybeNULL - True if Rop1 possibly is a NULL.
+// maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction.
+void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) {
+ Register Rbase = mem.baseOrR0();
+ Register Rindex = mem.indexOrR0();
+ int64_t disp = mem.disp();
+
+ const int shift = Universe::narrow_oop_shift();
+ address base = Universe::narrow_oop_base();
+
+ assert(UseCompressedOops, "must be on to call this method");
+ assert(Universe::heap() != NULL, "java heap must be initialized to call this method");
+ assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
+ assert_different_registers(Rop1, Z_R0);
+ assert_different_registers(Rop1, Rbase, Z_R1);
+ assert_different_registers(Rop1, Rindex, Z_R1);
+
+ BLOCK_COMMENT("compare heap oop {");
+
+ // First encode register oop and then compare with cOop in memory.
+ // This sequence saves an unnecessary cOop load and decode.
+ if (base == NULL) {
+ if (shift == 0) {
+ z_cl(Rop1, disp, Rindex, Rbase); // Unscaled
+ } else {
+ z_srlg(Z_R0, Rop1, shift); // ZeroBased
+ z_cl(Z_R0, disp, Rindex, Rbase);
+ }
+ } else { // HeapBased
+#ifdef ASSERT
+ bool used_R0 = true;
+ bool used_R1 = true;
+#endif
+ Label done;
+ int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
+
+ if (maybeNULL) { // NULL ptr must be preserved!
+ z_ltgr(Z_R0, Rop1);
+ z_bre(done);
+ }
+
+ add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
+ z_srlg(Z_R0, Z_R0, shift);
+
+ bind(done);
+ z_cl(Z_R0, disp, Rindex, Rbase);
+#ifdef ASSERT
+ if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
+ if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
+#endif
+ }
+ BLOCK_COMMENT("} compare heap oop");
+}
+
+// Load heap oop and decompress, if necessary.
+void MacroAssembler::load_heap_oop(Register dest, const Address &a) {
+ if (UseCompressedOops) {
+ z_llgf(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
+ oop_decoder(dest, dest, true);
+ } else {
+ z_lg(dest, a.disp(), a.indexOrR0(), a.baseOrR0());
+ }
+}
+
+// Load heap oop and decompress, if necessary.
+void MacroAssembler::load_heap_oop(Register dest, int64_t disp, Register base) {
+ if (UseCompressedOops) {
+ z_llgf(dest, disp, base);
+ oop_decoder(dest, dest, true);
+ } else {
+ z_lg(dest, disp, base);
+ }
+}
+
+// Load heap oop and decompress, if necessary.
+void MacroAssembler::load_heap_oop_not_null(Register dest, int64_t disp, Register base) {
+ if (UseCompressedOops) {
+ z_llgf(dest, disp, base);
+ oop_decoder(dest, dest, false);
+ } else {
+ z_lg(dest, disp, base);
+ }
+}
+
+// Compress, if necessary, and store oop to heap.
+void MacroAssembler::store_heap_oop(Register Roop, RegisterOrConstant offset, Register base) {
+ Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+ if (UseCompressedOops) {
+ assert_different_registers(Roop, offset.register_or_noreg(), base);
+ encode_heap_oop(Roop);
+ z_st(Roop, offset.constant_or_zero(), Ridx, base);
+ } else {
+ z_stg(Roop, offset.constant_or_zero(), Ridx, base);
+ }
+}
+
+// Compress, if necessary, and store oop to heap. Oop is guaranteed to be not NULL.
+void MacroAssembler::store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base) {
+ Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+ if (UseCompressedOops) {
+ assert_different_registers(Roop, offset.register_or_noreg(), base);
+ encode_heap_oop_not_null(Roop);
+ z_st(Roop, offset.constant_or_zero(), Ridx, base);
+ } else {
+ z_stg(Roop, offset.constant_or_zero(), Ridx, base);
+ }
+}
+
+// Store NULL oop to heap.
+void MacroAssembler::store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base) {
+ Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0;
+ if (UseCompressedOops) {
+ z_st(zero, offset.constant_or_zero(), Ridx, base);
+ } else {
+ z_stg(zero, offset.constant_or_zero(), Ridx, base);
+ }
+}
+
+//-------------------------------------------------
+// Encode compressed oop. Generally usable encoder.
+//-------------------------------------------------
+// Rsrc - contains regular oop on entry. It remains unchanged.
+// Rdst - contains compressed oop on exit.
+// Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
+//
+// Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
+// Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
+//
+// only32bitValid is set, if later code only uses the lower 32 bits. In this
+// case we must not fix the upper 32 bits.
+void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
+ Register Rbase, int pow2_offset, bool only32bitValid) {
+
+ const address oop_base = Universe::narrow_oop_base();
+ const int oop_shift = Universe::narrow_oop_shift();
+ const bool disjoint = Universe::narrow_oop_base_disjoint();
+
+ assert(UseCompressedOops, "must be on to call this method");
+ assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder");
+ assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
+
+ if (disjoint || (oop_base == NULL)) {
+ BLOCK_COMMENT("cOop encoder zeroBase {");
+ if (oop_shift == 0) {
+ if (oop_base != NULL && !only32bitValid) {
+ z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
+ } else {
+ lgr_if_needed(Rdst, Rsrc);
+ }
+ } else {
+ z_srlg(Rdst, Rsrc, oop_shift);
+ if (oop_base != NULL && !only32bitValid) {
+ z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+ }
+ }
+ BLOCK_COMMENT("} cOop encoder zeroBase");
+ return;
+ }
+
+ bool used_R0 = false;
+ bool used_R1 = false;
+
+ BLOCK_COMMENT("cOop encoder general {");
+ assert_different_registers(Rdst, Z_R1);
+ assert_different_registers(Rsrc, Rbase);
+ if (maybeNULL) {
+ Label done;
+ // We reorder shifting and subtracting, so that we can compare
+ // and shift in parallel:
+ //
+ // cycle 0: potential LoadN, base = <const>
+ // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0)
+ // cycle 2: if (cr) br, dst = dst + base + offset
+
+ // Get oop_base components.
+ if (pow2_offset == -1) {
+ if (Rdst == Rbase) {
+ if (Rdst == Z_R1 || Rsrc == Z_R1) {
+ Rbase = Z_R0;
+ used_R0 = true;
+ } else {
+ Rdst = Z_R1;
+ used_R1 = true;
+ }
+ }
+ if (Rbase == Z_R1) {
+ used_R1 = true;
+ }
+ pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
+ }
+ assert_different_registers(Rdst, Rbase);
+
+ // Check for NULL oop (must be left alone) and shift.
+ if (oop_shift != 0) { // Shift out alignment bits
+ if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
+ z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
+ } else {
+ z_srlg(Rdst, Rsrc, oop_shift);
+ z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero.
+ // This probably is faster, as it does not write a register. No!
+ // z_cghi(Rsrc, 0);
+ }
+ } else {
+ z_ltgr(Rdst, Rsrc); // Move NULL to result register.
+ }
+ z_bre(done);
+
+ // Subtract oop_base components.
+ if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
+ z_algr(Rdst, Rbase);
+ if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
+ } else {
+ add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
+ }
+ if (!only32bitValid) {
+ z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+ }
+ bind(done);
+
+ } else { // not null
+ // Get oop_base components.
+ if (pow2_offset == -1) {
+ pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
+ }
+
+ // Subtract oop_base components and shift.
+ if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
+ // Don't use lay instruction.
+ if (Rdst == Rsrc) {
+ z_algr(Rdst, Rbase);
+ } else {
+ lgr_if_needed(Rdst, Rbase);
+ z_algr(Rdst, Rsrc);
+ }
+ if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
+ } else {
+ add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
+ }
+ if (oop_shift != 0) { // Shift out alignment bits.
+ z_srlg(Rdst, Rdst, oop_shift);
+ }
+ if (!only32bitValid) {
+ z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
+ }
+ }
+#ifdef ASSERT
+ if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
+ if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
+#endif
+ BLOCK_COMMENT("} cOop encoder general");
+}
+
+//-------------------------------------------------
+// decode compressed oop. Generally usable decoder.
+//-------------------------------------------------
+// Rsrc - contains compressed oop on entry.
+// Rdst - contains regular oop on exit.
+// Rdst and Rsrc may indicate same register.
+// Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
+// Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
+// Rbase - register to use for the base
+// pow2_offset - offset of base to nice value. If -1, base must be loaded.
+// For performance, it is good to
+// - avoid Z_R0 for any of the argument registers.
+// - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
+// - avoid Z_R1 for Rdst if Rdst == Rbase.
+void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) {
+
+ const address oop_base = Universe::narrow_oop_base();
+ const int oop_shift = Universe::narrow_oop_shift();
+ const bool disjoint = Universe::narrow_oop_base_disjoint();
+
+ assert(UseCompressedOops, "must be on to call this method");
+ assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder");
+ assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
+ "cOop encoder detected bad shift");
+
+ // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
+
+ if (oop_base != NULL) {
+ unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
+ unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
+ unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
+ if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
+ BLOCK_COMMENT("cOop decoder disjointBase {");
+ // We do not need to load the base. Instead, we can install the upper bits
+ // with an OR instead of an ADD.
+ Label done;
+
+ // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
+ if (maybeNULL) { // NULL ptr must be preserved!
+ z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
+ z_bre(done);
+ } else {
+ z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone.
+ }
+ if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
+ z_oihf(Rdst, oop_base_hf);
+ } else if (oop_base_hl != 0) {
+ z_oihl(Rdst, oop_base_hl);
+ } else {
+ assert(oop_base_hh != 0, "not heapbased mode");
+ z_oihh(Rdst, oop_base_hh);
+ }
+ bind(done);
+ BLOCK_COMMENT("} cOop decoder disjointBase");
+ } else {
+ BLOCK_COMMENT("cOop decoder general {");
+ // There are three decode steps:
+ // scale oop offset (shift left)
+ // get base (in reg) and pow2_offset (constant)
+ // add base, pow2_offset, and oop offset
+ // The following register overlap situations may exist:
+ // Rdst == Rsrc, Rbase any other
+ // not a problem. Scaling in-place leaves Rbase undisturbed.
+ // Loading Rbase does not impact the scaled offset.
+ // Rdst == Rbase, Rsrc any other
+ // scaling would destroy a possibly preloaded Rbase. Loading Rbase
+ // would destroy the scaled offset.
+ // Remedy: use Rdst_tmp if Rbase has been preloaded.
+ // use Rbase_tmp if base has to be loaded.
+ // Rsrc == Rbase, Rdst any other
+ // Only possible without preloaded Rbase.
+ // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
+ // Rsrc == Rbase, Rdst == Rbase
+ // Only possible without preloaded Rbase.
+ // Loading Rbase would destroy compressed oop. Scaling in-place is ok.
+ // Remedy: use Rbase_tmp.
+ //
+ Label done;
+ Register Rdst_tmp = Rdst;
+ Register Rbase_tmp = Rbase;
+ bool used_R0 = false;
+ bool used_R1 = false;
+ bool base_preloaded = pow2_offset >= 0;
+ guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
+ assert(oop_shift != 0, "room for optimization");
+
+ // Check if we need to use scratch registers.
+ if (Rdst == Rbase) {
+ assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
+ if (Rdst != Rsrc) {
+ if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
+ else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
+ } else {
+ Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
+ }
+ }
+ if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
+
+ // Scale oop and check for NULL.
+ // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
+ if (maybeNULL) { // NULL ptr must be preserved!
+ z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
+ z_bre(done);
+ } else {
+ z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone.
+ }
+
+ // Get oop_base components.
+ if (!base_preloaded) {
+ pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
+ }
+
+ // Add up all components.
+ if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
+ z_algr(Rdst_tmp, Rbase_tmp);
+ if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
+ } else {
+ add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
+ }
+
+ bind(done);
+ lgr_if_needed(Rdst, Rdst_tmp);
+#ifdef ASSERT
+ if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
+ if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
+#endif
+ BLOCK_COMMENT("} cOop decoder general");
+ }
+ } else {
+ BLOCK_COMMENT("cOop decoder zeroBase {");
+ if (oop_shift == 0) {
+ lgr_if_needed(Rdst, Rsrc);
+ } else {
+ z_sllg(Rdst, Rsrc, oop_shift);
+ }
+ BLOCK_COMMENT("} cOop decoder zeroBase");
+ }
+}
+
+void MacroAssembler::load_mirror(Register mirror, Register method) {
+ mem2reg_opt(mirror, Address(method, Method::const_offset()));
+ mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
+ mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
+ mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
+}
+
+//---------------------------------------------------------------
+//--- Operations on arrays.
+//---------------------------------------------------------------
+
+// Compiler ensures base is doubleword aligned and cnt is #doublewords.
+// Emitter does not KILL cnt and base arguments, since they need to be copied to
+// work registers anyway.
+// Actually, only r0, r1, and r5 are killed.
+unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
+ // Src_addr is evenReg.
+ // Src_len is odd_Reg.
+
+ int block_start = offset();
+ Register tmp_reg = src_len; // Holds target instr addr for EX.
+ Register dst_len = Z_R1; // Holds dst len for MVCLE.
+ Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
+
+ Label doXC, doMVCLE, done;
+
+ BLOCK_COMMENT("Clear_Array {");
+
+ // Check for zero len and convert to long.
+ z_ltgfr(src_len, cnt_arg); // Remember casted value for doSTG case.
+ z_bre(done); // Nothing to do if len == 0.
+
+ // Prefetch data to be cleared.
+ if (VM_Version::has_Prefetch()) {
+ z_pfd(0x02, 0, Z_R0, base_pointer_arg);
+ z_pfd(0x02, 256, Z_R0, base_pointer_arg);
+ }
+
+ z_sllg(dst_len, src_len, 3); // #bytes to clear.
+ z_cghi(src_len, 32); // Check for len <= 256 bytes (<=32 DW).
+ z_brnh(doXC); // If so, use executed XC to clear.
+
+ // MVCLE: initialize long arrays (general case).
+ bind(doMVCLE);
+ z_lgr(dst_addr, base_pointer_arg);
+ clear_reg(src_len, true, false); // Src len of MVCLE is zero.
+
+ MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+ z_bru(done);
+
+ // XC: initialize short arrays.
+ Label XC_template; // Instr template, never exec directly!
+ bind(XC_template);
+ z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
+
+ bind(doXC);
+ add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
+ if (VM_Version::has_ExecuteExtensions()) {
+ z_exrl(dst_len, XC_template); // Execute XC with var. len.
+ } else {
+ z_larl(tmp_reg, XC_template);
+ z_ex(dst_len,0,Z_R0,tmp_reg); // Execute XC with var. len.
+ }
+ // z_bru(done); // fallthru
+
+ bind(done);
+
+ BLOCK_COMMENT("} Clear_Array");
+
+ int block_end = offset();
+ return block_end - block_start;
+}
+
+// Compiler ensures base is doubleword aligned and cnt is count of doublewords.
+// Emitter does not KILL any arguments nor work registers.
+// Emitter generates up to 16 XC instructions, depending on the array length.
+unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
+ int block_start = offset();
+ int off;
+ int lineSize_Bytes = AllocatePrefetchStepSize;
+ int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord;
+ bool doPrefetch = VM_Version::has_Prefetch();
+ int XC_maxlen = 256;
+ int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
+
+ BLOCK_COMMENT("Clear_Array_Const {");
+ assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
+
+ // Do less prefetching for very short arrays.
+ if (numXCInstr > 0) {
+ // Prefetch only some cache lines, then begin clearing.
+ if (doPrefetch) {
+ if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear,
+ z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line.
+ } else {
+ assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
+ for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
+ z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
+ }
+ }
+ }
+
+ for (off=0; off<(numXCInstr-1); off++) {
+ z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
+
+ // Prefetch some cache lines in advance.
+ if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
+ z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
+ }
+ }
+ if (off*XC_maxlen < cnt*BytesPerWord) {
+ z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
+ }
+ }
+ BLOCK_COMMENT("} Clear_Array_Const");
+
+ int block_end = offset();
+ return block_end - block_start;
+}
+
+// Compiler ensures base is doubleword aligned and cnt is #doublewords.
+// Emitter does not KILL cnt and base arguments, since they need to be copied to
+// work registers anyway.
+// Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
+//
+// For very large arrays, exploit MVCLE H/W support.
+// MVCLE instruction automatically exploits H/W-optimized page mover.
+// - Bytes up to next page boundary are cleared with a series of XC to self.
+// - All full pages are cleared with the page mover H/W assist.
+// - Remaining bytes are again cleared by a series of XC to self.
+//
+unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
+ // Src_addr is evenReg.
+ // Src_len is odd_Reg.
+
+ int block_start = offset();
+ Register dst_len = Z_R1; // Holds dst len for MVCLE.
+ Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
+
+ BLOCK_COMMENT("Clear_Array_Const_Big {");
+
+ // Get len to clear.
+ load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8
+
+ // Prepare other args to MVCLE.
+ z_lgr(dst_addr, base_pointer_arg);
+ // Indicate unused result.
+ (void) clear_reg(src_len, true, false); // Src len of MVCLE is zero.
+
+ // Clear.
+ MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+ BLOCK_COMMENT("} Clear_Array_Const_Big");
+
+ int block_end = offset();
+ return block_end - block_start;
+}
+
+// Allocator.
+unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
+ Register cnt_reg,
+ Register tmp1_reg, Register tmp2_reg) {
+ // Tmp1 is oddReg.
+ // Tmp2 is evenReg.
+
+ int block_start = offset();
+ Label doMVC, doMVCLE, done, MVC_template;
+
+ BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
+
+ // Check for zero len and convert to long.
+ z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case.
+ z_bre(done); // Nothing to do if len == 0.
+
+ z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready.
+
+ z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW).
+ z_brnh(doMVC); // If so, use executed MVC to clear.
+
+ bind(doMVCLE); // A lot of data (more than 256 bytes).
+ // Prep dest reg pair.
+ z_lgr(Z_R0, dst_reg); // dst addr
+ // Dst len already in Z_R1.
+ // Prep src reg pair.
+ z_lgr(tmp2_reg, src_reg); // src addr
+ z_lgr(tmp1_reg, Z_R1); // Src len same as dst len.
+
+ // Do the copy.
+ move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
+ z_bru(done); // All done.
+
+ bind(MVC_template); // Just some data (not more than 256 bytes).
+ z_mvc(0, 0, dst_reg, 0, src_reg);
+
+ bind(doMVC);
+
+ if (VM_Version::has_ExecuteExtensions()) {
+ add2reg(Z_R1, -1);
+ } else {
+ add2reg(tmp1_reg, -1, Z_R1);
+ z_larl(Z_R1, MVC_template);
+ }
+
+ if (VM_Version::has_Prefetch()) {
+ z_pfd(1, 0,Z_R0,src_reg);
+ z_pfd(2, 0,Z_R0,dst_reg);
+ // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy.
+ // z_pfd(2,256,Z_R0,dst_reg);
+ }
+
+ if (VM_Version::has_ExecuteExtensions()) {
+ z_exrl(Z_R1, MVC_template);
+ } else {
+ z_ex(tmp1_reg, 0, Z_R0, Z_R1);
+ }
+
+ bind(done);
+
+ BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
+
+ int block_end = offset();
+ return block_end - block_start;
+}
+
+//------------------------------------------------------
+// Special String Intrinsics. Implementation
+//------------------------------------------------------
+
+// Intrinsics for CompactStrings
+
+// Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result
+// The result is the number of characters copied before the first incompatible character was found.
+// If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise.
+//
+// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
+// - Different number of characters may have been written to dead array (if tmp2 not provided).
+// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
+unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register odd_reg,
+ Register even_reg, Register tmp, Register tmp2) {
+ int block_start = offset();
+ Label Lloop1, Lloop2, Lslow, Ldone;
+ const Register addr2 = dst, ind1 = result, mask = tmp;
+ const bool precise = (tmp2 != noreg);
+
+ BLOCK_COMMENT("string_compress {");
+
+ z_sll(odd_reg, 1); // Number of bytes to read. (Must be a positive simm32.)
+ clear_reg(ind1); // Index to read.
+ z_llilf(mask, 0xFF00FF00);
+ z_ahi(odd_reg, -16); // Last possible index for fast loop.
+ z_brl(Lslow);
+
+ // ind1: index, even_reg: index increment, odd_reg: index limit
+ z_iihf(mask, 0xFF00FF00);
+ z_lhi(even_reg, 16);
+
+ bind(Lloop1); // 8 Characters per iteration.
+ z_lg(Z_R0, Address(src, ind1));
+ z_lg(Z_R1, Address(src, ind1, 8));
+ if (precise) {
+ if (VM_Version::has_DistinctOpnds()) {
+ z_ogrk(tmp2, Z_R0, Z_R1);
+ } else {
+ z_lgr(tmp2, Z_R0);
+ z_ogr(tmp2, Z_R1);
+ }
+ z_ngr(tmp2, mask);
+ z_brne(Lslow); // Failed fast case, retry slowly.
+ }
+ z_stcmh(Z_R0, 5, 0, addr2);
+ z_stcm(Z_R0, 5, 2, addr2);
+ if (!precise) { z_ogr(Z_R0, Z_R1); }
+ z_stcmh(Z_R1, 5, 4, addr2);
+ z_stcm(Z_R1, 5, 6, addr2);
+ if (!precise) {
+ z_ngr(Z_R0, mask);
+ z_brne(Ldone); // Failed (more than needed was written).
+ }
+ z_aghi(addr2, 8);
+ z_brxle(ind1, even_reg, Lloop1);
+
+ bind(Lslow);
+ // Compute index limit and skip if negative.
+ z_ahi(odd_reg, 16-2); // Last possible index for slow loop.
+ z_lhi(even_reg, 2);
+ z_cr(ind1, odd_reg);
+ z_brh(Ldone);
+
+ bind(Lloop2); // 1 Character per iteration.
+ z_llh(Z_R0, Address(src, ind1));
+ z_tmll(Z_R0, 0xFF00);
+ z_brnaz(Ldone); // Failed slow case: Return number of written characters.
+ z_stc(Z_R0, Address(addr2));
+ z_aghi(addr2, 1);
+ z_brxle(ind1, even_reg, Lloop2);
+
+ bind(Ldone); // result = ind1 = 2*cnt
+ z_srl(ind1, 1);
+
+ BLOCK_COMMENT("} string_compress");
+
+ return offset() - block_start;
+}
+
+// Inflate byte[] to char[].
+unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
+ int block_start = offset();
+
+ BLOCK_COMMENT("string_inflate {");
+
+ Register stop_char = Z_R0;
+ Register table = Z_R1;
+ Register src_addr = tmp;
+
+ assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
+ assert(dst->encoding()%2 == 0, "must be even reg");
+ assert(cnt->encoding()%2 == 1, "must be odd reg");
+ assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
+
+ StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT)
+ clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value.
+ lgr_if_needed(src_addr, src);
+ z_llgfr(cnt, cnt); // # src characters, must be a positive simm32.
+
+ translate_ot(dst, src_addr, /* mask = */ 0x0001);
+
+ BLOCK_COMMENT("} string_inflate");
+
+ return offset() - block_start;
+}
+
+// Inflate byte[] to char[]. odd_reg contains cnt. Kills src.
+unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg,
+ Register even_reg, Register tmp) {
+ int block_start = offset();
+
+ BLOCK_COMMENT("string_inflate {");
+
+ Label Lloop1, Lloop2, Lslow, Ldone;
+ const Register addr1 = src, ind2 = tmp;
+
+ z_sll(odd_reg, 1); // Number of bytes to write. (Must be a positive simm32.)
+ clear_reg(ind2); // Index to write.
+ z_ahi(odd_reg, -16); // Last possible index for fast loop.
+ z_brl(Lslow);
+
+ // ind2: index, even_reg: index increment, odd_reg: index limit
+ clear_reg(Z_R0);
+ clear_reg(Z_R1);
+ z_lhi(even_reg, 16);
+
+ bind(Lloop1); // 8 Characters per iteration.
+ z_icmh(Z_R0, 5, 0, addr1);
+ z_icmh(Z_R1, 5, 4, addr1);
+ z_icm(Z_R0, 5, 2, addr1);
+ z_icm(Z_R1, 5, 6, addr1);
+ z_aghi(addr1, 8);
+ z_stg(Z_R0, Address(dst, ind2));
+ z_stg(Z_R1, Address(dst, ind2, 8));
+ z_brxle(ind2, even_reg, Lloop1);
+
+ bind(Lslow);
+ // Compute index limit and skip if negative.
+ z_ahi(odd_reg, 16-2); // Last possible index for slow loop.
+ z_lhi(even_reg, 2);
+ z_cr(ind2, odd_reg);
+ z_brh(Ldone);
+
+ bind(Lloop2); // 1 Character per iteration.
+ z_llc(Z_R0, Address(addr1));
+ z_sth(Z_R0, Address(dst, ind2));
+ z_aghi(addr1, 1);
+ z_brxle(ind2, even_reg, Lloop2);
+
+ bind(Ldone);
+
+ BLOCK_COMMENT("} string_inflate");
+
+ return offset() - block_start;
+}
+
+// Kills src.
+unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
+ Register odd_reg, Register even_reg, Register tmp) {
+ int block_start = offset();
+ Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
+ const Register addr = src, mask = tmp;
+
+ BLOCK_COMMENT("has_negatives {");
+
+ z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.)
+ z_llilf(mask, 0x80808080);
+ z_lhi(result, 1); // Assume true.
+ // Last possible addr for fast loop.
+ z_lay(odd_reg, -16, Z_R1, src);
+ z_chi(cnt, 16);
+ z_brl(Lslow);
+
+ // ind1: index, even_reg: index increment, odd_reg: index limit
+ z_iihf(mask, 0x80808080);
+ z_lghi(even_reg, 16);
+
+ bind(Lloop1); // 16 bytes per iteration.
+ z_lg(Z_R0, Address(addr));
+ z_lg(Z_R1, Address(addr, 8));
+ z_ogr(Z_R0, Z_R1);
+ z_ngr(Z_R0, mask);
+ z_brne(Ldone); // If found return 1.
+ z_brxlg(addr, even_reg, Lloop1);
+
+ bind(Lslow);
+ z_aghi(odd_reg, 16-1); // Last possible addr for slow loop.
+ z_lghi(even_reg, 1);
+ z_cgr(addr, odd_reg);
+ z_brh(Lnotfound);
+
+ bind(Lloop2); // 1 byte per iteration.
+ z_cli(Address(addr), 0x80);
+ z_brnl(Ldone); // If found return 1.
+ z_brxlg(addr, even_reg, Lloop2);
+
+ bind(Lnotfound);
+ z_lhi(result, 0);
+
+ bind(Ldone);
+
+ BLOCK_COMMENT("} has_negatives");
+
+ return offset() - block_start;
+}
+
+// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result
+unsigned int MacroAssembler::string_compare(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ Register odd_reg, Register even_reg, Register result, int ae) {
+ int block_start = offset();
+
+ assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);
+ assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);
+
+ // If strings are equal up to min length, return the length difference.
+ const Register diff = result, // Pre-set result with length difference.
+ min = cnt1, // min number of bytes
+ tmp = cnt2;
+
+ // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+ // we interchange str1 and str2 in the UL case and negate the result.
+ // Like this, str1 is always latin1 encoded, except for the UU case.
+ // In addition, we need 0 (or sign which is 0) extend when using 64 bit register.
+ const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
+
+ BLOCK_COMMENT("string_compare {");
+
+ if (used_as_LU) {
+ z_srl(cnt2, 1);
+ }
+
+ // See if the lengths are different, and calculate min in cnt1.
+ // Save diff in case we need it for a tie-breaker.
+
+ // diff = cnt1 - cnt2
+ if (VM_Version::has_DistinctOpnds()) {
+ z_srk(diff, cnt1, cnt2);
+ } else {
+ z_lr(diff, cnt1);
+ z_sr(diff, cnt2);
+ }
+ if (str1 != str2) {
+ if (VM_Version::has_LoadStoreConditional()) {
+ z_locr(min, cnt2, Assembler::bcondHigh);
+ } else {
+ Label Lskip;
+ z_brl(Lskip); // min ok if cnt1 < cnt2
+ z_lr(min, cnt2); // min = cnt2
+ bind(Lskip);
+ }
+ }
+
+ if (ae == StrIntrinsicNode::UU) {
+ z_sra(diff, 1);
+ }
+ if (str1 != str2) {
+ Label Ldone;
+ if (used_as_LU) {
+ // Loop which searches the first difference character by character.
+ Label Lloop;
+ const Register ind1 = Z_R1,
+ ind2 = min;
+ int stride1 = 1, stride2 = 2; // See comment above.
+
+ // ind1: index, even_reg: index increment, odd_reg: index limit
+ z_llilf(ind1, (unsigned int)(-stride1));
+ z_lhi(even_reg, stride1);
+ add2reg(odd_reg, -stride1, min);
+ clear_reg(ind2); // kills min
+
+ bind(Lloop);
+ z_brxh(ind1, even_reg, Ldone);
+ z_llc(tmp, Address(str1, ind1));
+ z_llh(Z_R0, Address(str2, ind2));
+ z_ahi(ind2, stride2);
+ z_sr(tmp, Z_R0);
+ z_bre(Lloop);
+
+ z_lr(result, tmp);
+
+ } else {
+ // Use clcle in fast loop (only for same encoding).
+ z_lgr(Z_R0, str1);
+ z_lgr(even_reg, str2);
+ z_llgfr(Z_R1, min);
+ z_llgfr(odd_reg, min);
+
+ if (ae == StrIntrinsicNode::LL) {
+ compare_long_ext(Z_R0, even_reg, 0);
+ } else {
+ compare_long_uni(Z_R0, even_reg, 0);
+ }
+ z_bre(Ldone);
+ z_lgr(Z_R1, Z_R0);
+ if (ae == StrIntrinsicNode::LL) {
+ z_llc(Z_R0, Address(even_reg));
+ z_llc(result, Address(Z_R1));
+ } else {
+ z_llh(Z_R0, Address(even_reg));
+ z_llh(result, Address(Z_R1));
+ }
+ z_sr(result, Z_R0);
+ }
+
+ // Otherwise, return the difference between the first mismatched chars.
+ bind(Ldone);
+ }
+
+ if (ae == StrIntrinsicNode::UL) {
+ z_lcr(result, result); // Negate result (see note above).
+ }
+
+ BLOCK_COMMENT("} string_compare");
+
+ return offset() - block_start;
+}
+
+unsigned int MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
+ Register odd_reg, Register even_reg, Register result, bool is_byte) {
+ int block_start = offset();
+
+ BLOCK_COMMENT("array_equals {");
+
+ assert_different_registers(ary1, limit, odd_reg, even_reg);
+ assert_different_registers(ary2, limit, odd_reg, even_reg);
+
+ Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;
+ int base_offset = 0;
+
+ if (ary1 != ary2) {
+ if (is_array_equ) {
+ base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+
+ // Return true if the same array.
+ compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
+
+ // Return false if one of them is NULL.
+ compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
+ compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
+
+ // Load the lengths of arrays.
+ z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
+
+ // Return false if the two arrays are not equal length.
+ z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
+ z_brne(Ldone_false);
+
+ // string len in bytes (right operand)
+ if (!is_byte) {
+ z_chi(odd_reg, 128);
+ z_sll(odd_reg, 1); // preserves flags
+ z_brh(Lclcle);
+ } else {
+ compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
+ }
+ } else {
+ z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
+ compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
+ }
+
+
+ // Use clc instruction for up to 256 bytes.
+ {
+ Register str1_reg = ary1,
+ str2_reg = ary2;
+ if (is_array_equ) {
+ str1_reg = Z_R1;
+ str2_reg = even_reg;
+ add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
+ add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
+ }
+ z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
+ z_brl(Ldone_true);
+ // Note: We could jump to the template if equal.
+
+ assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
+ z_exrl(odd_reg, CLC_template);
+ z_bre(Ldone_true);
+ // fall through
+
+ bind(Ldone_false);
+ clear_reg(result);
+ z_bru(Ldone);
+
+ bind(CLC_template);
+ z_clc(0, 0, str1_reg, 0, str2_reg);
+ }
+
+ // Use clcle instruction.
+ {
+ bind(Lclcle);
+ add2reg(even_reg, base_offset, ary2); // string addr (right operand)
+ add2reg(Z_R0, base_offset, ary1); // string addr (left operand)
+
+ z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)
+ if (is_byte) {
+ compare_long_ext(Z_R0, even_reg, 0);
+ } else {
+ compare_long_uni(Z_R0, even_reg, 0);
+ }
+ z_lghi(result, 0); // Preserve flags.
+ z_brne(Ldone);
+ }
+ }
+ // fall through
+
+ bind(Ldone_true);
+ z_lghi(result, 1); // All characters are equal.
+ bind(Ldone);
+
+ BLOCK_COMMENT("} array_equals");
+
+ return offset() - block_start;
+}
+
+// kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result
+unsigned int MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
+ Register needle, Register needlecnt, int needlecntval,
+ Register odd_reg, Register even_reg, int ae) {
+ int block_start = offset();
+
+ // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+ const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
+ const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
+ Label L_needle1, L_Found, L_NotFound;
+
+ BLOCK_COMMENT("string_indexof {");
+
+ if (needle == haystack) {
+ z_lhi(result, 0);
+ } else {
+
+ // Load first character of needle (R0 used by search_string instructions).
+ if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
+
+ // Compute last haystack addr to use if no match gets found.
+ if (needlecnt != noreg) { // variable needlecnt
+ z_ahi(needlecnt, -1); // Remaining characters after first one.
+ z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.
+ if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
+ } else { // constant needlecnt
+ assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");
+ // Compute index succeeding last element to compare.
+ if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
+ }
+
+ z_llgfr(haycnt, haycnt); // Clear high half.
+ z_lgr(result, haystack); // Final result will be computed from needle start pointer.
+ if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
+ z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
+
+ if (h_csize != n_csize) {
+ assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
+
+ if (needlecnt != noreg || needlecntval != 1) {
+ if (needlecnt != noreg) {
+ compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
+ }
+
+ // Main Loop: UL version (now we have at least 2 characters).
+ Label L_OuterLoop, L_InnerLoop, L_Skip;
+ bind(L_OuterLoop); // Search for 1st 2 characters.
+ z_lgr(Z_R1, haycnt);
+ MacroAssembler::search_string_uni(Z_R1, result);
+ z_brc(Assembler::bcondNotFound, L_NotFound);
+ z_lgr(result, Z_R1);
+
+ z_lghi(Z_R1, n_csize);
+ z_lghi(even_reg, h_csize);
+ bind(L_InnerLoop);
+ z_llgc(odd_reg, Address(needle, Z_R1));
+ z_ch(odd_reg, Address(result, even_reg));
+ z_brne(L_Skip);
+ if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }
+ z_brnl(L_Found);
+ z_aghi(Z_R1, n_csize);
+ z_aghi(even_reg, h_csize);
+ z_bru(L_InnerLoop);
+
+ bind(L_Skip);
+ z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+ z_bru(L_OuterLoop);
+ }
+
+ } else {
+ const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);
+ Label L_clcle;
+
+ if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {
+ if (needlecnt != noreg) {
+ compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);
+ z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)
+ z_brl(L_needle1);
+ }
+
+ // Main Loop: clc version (now we have at least 2 characters).
+ Label L_OuterLoop, CLC_template;
+ bind(L_OuterLoop); // Search for 1st 2 characters.
+ z_lgr(Z_R1, haycnt);
+ if (h_csize == 1) {
+ MacroAssembler::search_string(Z_R1, result);
+ } else {
+ MacroAssembler::search_string_uni(Z_R1, result);
+ }
+ z_brc(Assembler::bcondNotFound, L_NotFound);
+ z_lgr(result, Z_R1);
+
+ if (needlecnt != noreg) {
+ assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
+ z_exrl(needlecnt, CLC_template);
+ } else {
+ z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
+ }
+ z_bre(L_Found);
+ z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+ z_bru(L_OuterLoop);
+
+ if (needlecnt != noreg) {
+ bind(CLC_template);
+ z_clc(h_csize, 0, Z_R1, n_csize, needle);
+ }
+ }
+
+ if (needlecnt != noreg || needle_bytes > 256) {
+ bind(L_clcle);
+
+ // Main Loop: clcle version (now we have at least 256 bytes).
+ Label L_OuterLoop, CLC_template;
+ bind(L_OuterLoop); // Search for 1st 2 characters.
+ z_lgr(Z_R1, haycnt);
+ if (h_csize == 1) {
+ MacroAssembler::search_string(Z_R1, result);
+ } else {
+ MacroAssembler::search_string_uni(Z_R1, result);
+ }
+ z_brc(Assembler::bcondNotFound, L_NotFound);
+
+ add2reg(Z_R0, n_csize, needle);
+ add2reg(even_reg, h_csize, Z_R1);
+ z_lgr(result, Z_R1);
+ if (needlecnt != noreg) {
+ z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)
+ z_llgfr(odd_reg, needlecnt);
+ } else {
+ load_const_optimized(Z_R1, needle_bytes);
+ if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }
+ }
+ if (h_csize == 1) {
+ compare_long_ext(Z_R0, even_reg, 0);
+ } else {
+ compare_long_uni(Z_R0, even_reg, 0);
+ }
+ z_bre(L_Found);
+
+ if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
+ z_aghi(result, h_csize); // This is the new address we want to use for comparing.
+ z_bru(L_OuterLoop);
+ }
+ }
+
+ if (needlecnt != noreg || needlecntval == 1) {
+ bind(L_needle1);
+
+ // Single needle character version.
+ if (h_csize == 1) {
+ MacroAssembler::search_string(haycnt, result);
+ } else {
+ MacroAssembler::search_string_uni(haycnt, result);
+ }
+ z_lgr(result, haycnt);
+ z_brc(Assembler::bcondFound, L_Found);
+ }
+
+ bind(L_NotFound);
+ add2reg(result, -1, haystack); // Return -1.
+
+ bind(L_Found); // Return index (or -1 in fallthrough case).
+ z_sgr(result, haystack);
+ if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }
+ }
+ BLOCK_COMMENT("} string_indexof");
+
+ return offset() - block_start;
+}
+
+// early clobber: result
+unsigned int MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
+ Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {
+ int block_start = offset();
+
+ BLOCK_COMMENT("string_indexof_char {");
+
+ if (needle == haystack) {
+ z_lhi(result, 0);
+ } else {
+
+ Label Ldone;
+
+ z_llgfr(odd_reg, haycnt); // Preset loop ctr/searchrange end.
+ if (needle == noreg) {
+ load_const_optimized(Z_R0, (unsigned long)needleChar);
+ } else {
+ if (is_byte) {
+ z_llgcr(Z_R0, needle); // First (and only) needle char.
+ } else {
+ z_llghr(Z_R0, needle); // First (and only) needle char.
+ }
+ }
+
+ if (!is_byte) {
+ z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.
+ }
+
+ z_lgr(even_reg, haystack); // haystack addr
+ z_agr(odd_reg, haystack); // First char after range end.
+ z_lghi(result, -1);
+
+ if (is_byte) {
+ MacroAssembler::search_string(odd_reg, even_reg);
+ } else {
+ MacroAssembler::search_string_uni(odd_reg, even_reg);
+ }
+ z_brc(Assembler::bcondNotFound, Ldone);
+ if (is_byte) {
+ if (VM_Version::has_DistinctOpnds()) {
+ z_sgrk(result, odd_reg, haystack);
+ } else {
+ z_sgr(odd_reg, haystack);
+ z_lgr(result, odd_reg);
+ }
+ } else {
+ z_slgr(odd_reg, haystack);
+ z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));
+ }
+
+ bind(Ldone);
+ }
+ BLOCK_COMMENT("} string_indexof_char");
+
+ return offset() - block_start;
+}
+
+
+//-------------------------------------------------
+// Constants (scalar and oop) in constant pool
+//-------------------------------------------------
+
+// Add a non-relocated constant to the CP.
+int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
+ long value = val.value();
+ address tocPos = long_constant(value);
+
+ if (tocPos != NULL) {
+ int tocOffset = (int)(tocPos - code()->consts()->start());
+ return tocOffset;
+ }
+ // Address_constant returned NULL, so no constant entry has been created.
+ // In that case, we return a "fatal" offset, just in case that subsequently
+ // generated access code is executed.
+ return -1;
+}
+
+// Returns the TOC offset where the address is stored.
+// Add a relocated constant to the CP.
+int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
+ // Use RelocationHolder::none for the constant pool entry.
+ // Otherwise we will end up with a failing NativeCall::verify(x),
+ // where x is the address of the constant pool entry.
+ address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
+
+ if (tocPos != NULL) {
+ int tocOffset = (int)(tocPos - code()->consts()->start());
+ RelocationHolder rsp = oop.rspec();
+ Relocation *rel = rsp.reloc();
+
+ // Store toc_offset in relocation, used by call_far_patchable.
+ if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
+ ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
+ }
+ // Relocate at the load's pc.
+ relocate(rsp);
+
+ return tocOffset;
+ }
+ // Address_constant returned NULL, so no constant entry has been created
+ // in that case, we return a "fatal" offset, just in case that subsequently
+ // generated access code is executed.
+ return -1;
+}
+
+bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
+ int tocOffset = store_const_in_toc(a);
+ if (tocOffset == -1) return false;
+ address tocPos = tocOffset + code()->consts()->start();
+ assert((address)code()->consts()->start() != NULL, "Please add CP address");
+
+ load_long_pcrelative(dst, tocPos);
+ return true;
+}
+
+bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
+ int tocOffset = store_oop_in_toc(a);
+ if (tocOffset == -1) return false;
+ address tocPos = tocOffset + code()->consts()->start();
+ assert((address)code()->consts()->start() != NULL, "Please add CP address");
+
+ load_addr_pcrelative(dst, tocPos);
+ return true;
+}
+
+// If the instruction sequence at the given pc is a load_const_from_toc
+// sequence, return the value currently stored at the referenced position
+// in the TOC.
+intptr_t MacroAssembler::get_const_from_toc(address pc) {
+
+ assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
+
+ long offset = get_load_const_from_toc_offset(pc);
+ address dataLoc = NULL;
+ if (is_load_const_from_toc_pcrelative(pc)) {
+ dataLoc = pc + offset;
+ } else {
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); // Else we get assertion if nmethod is zombie.
+ assert(cb && cb->is_nmethod(), "sanity");
+ nmethod* nm = (nmethod*)cb;
+ dataLoc = nm->ctable_begin() + offset;
+ }
+ return *(intptr_t *)dataLoc;
+}
+
+// If the instruction sequence at the given pc is a load_const_from_toc
+// sequence, copy the passed-in new_data value into the referenced
+// position in the TOC.
+void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
+ assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
+
+ long offset = MacroAssembler::get_load_const_from_toc_offset(pc);
+ address dataLoc = NULL;
+ if (is_load_const_from_toc_pcrelative(pc)) {
+ dataLoc = pc+offset;
+ } else {
+ nmethod* nm = CodeCache::find_nmethod(pc);
+ assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
+ dataLoc = nm->ctable_begin() + offset;
+ }
+ if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
+ *(unsigned long *)dataLoc = new_data;
+ }
+}
+
+// Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
+// site. Verify by calling is_load_const_from_toc() before!!
+// Offset is +/- 2**32 -> use long.
+long MacroAssembler::get_load_const_from_toc_offset(address a) {
+ assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
+ // expected code sequence:
+ // z_lgrl(t, simm32); len = 6
+ unsigned long inst;
+ unsigned int len = get_instruction(a, &inst);
+ return get_pcrel_offset(inst);
+}
+
+//**********************************************************************************
+// inspection of generated instruction sequences for a particular pattern
+//**********************************************************************************
+
+bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
+#ifdef ASSERT
+ unsigned long inst;
+ unsigned int len = get_instruction(a+2, &inst);
+ if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
+ const int range = 128;
+ Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
+ VM_Version::z_SIGSEGV();
+ }
+#endif
+ // expected code sequence:
+ // z_lgrl(t, relAddr32); len = 6
+ //TODO: verify accessed data is in CP, if possible.
+ return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used.
+}
+
+bool MacroAssembler::is_load_const_from_toc_call(address a) {
+ return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
+}
+
+bool MacroAssembler::is_load_const_call(address a) {
+ return is_load_const(a) && is_call_byregister(a + load_const_size());
+}
+
+//-------------------------------------------------
+// Emitters for some really CICS instructions
+//-------------------------------------------------
+
+void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
+ assert(dst->encoding()%2==0, "must be an even/odd register pair");
+ assert(src->encoding()%2==0, "must be an even/odd register pair");
+ assert(pad<256, "must be a padding BYTE");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_mvcle(dst, src, pad);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
+ assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
+ assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
+ assert(pad<256, "must be a padding BYTE");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_clcle(left, right, pad, Z_R0);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
+ assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
+ assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
+ assert(pad<=0xfff, "must be a padding HALFWORD");
+ assert(VM_Version::has_ETF2(), "instruction must be available");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_clclu(left, right, pad, Z_R0);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::search_string(Register end, Register start) {
+ assert(end->encoding() != 0, "end address must not be in R0");
+ assert(start->encoding() != 0, "start address must not be in R0");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_srst(end, start);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::search_string_uni(Register end, Register start) {
+ assert(end->encoding() != 0, "end address must not be in R0");
+ assert(start->encoding() != 0, "start address must not be in R0");
+ assert(VM_Version::has_ETF3(), "instruction must be available");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_srstu(end, start);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kmac(Register srcBuff) {
+ assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
+ assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_kmac(Z_R0, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kimd(Register srcBuff) {
+ assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
+ assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_kimd(Z_R0, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::klmd(Register srcBuff) {
+ assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
+ assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_klmd(Z_R0, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::km(Register dstBuff, Register srcBuff) {
+ // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
+ // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
+ assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
+ assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
+ assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_km(dstBuff, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
+ // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
+ // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
+ assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
+ assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
+ assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_kmc(dstBuff, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
+ assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_cksm(crcBuff, srcBuff);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
+ assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+ assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_troo(r1, r2, m3);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
+ assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+ assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_trot(r1, r2, m3);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
+ assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+ assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_trto(r1, r2, m3);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
+ assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
+ assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
+
+ Label retry;
+ bind(retry);
+ Assembler::z_trtt(r1, r2, m3);
+ Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
+}
+
+void MacroAssembler::generate_safepoint_check(Label& slow_path, Register scratch, bool may_relocate) {
+ if (scratch == noreg) scratch = Z_R1;
+ address Astate = SafepointSynchronize::address_of_state();
+ BLOCK_COMMENT("safepoint check:");
+
+ if (may_relocate) {
+ ptrdiff_t total_distance = Astate - this->pc();
+ if (RelAddr::is_in_range_of_RelAddr32(total_distance)) {
+ RelocationHolder rspec = external_word_Relocation::spec(Astate);
+ (this)->relocate(rspec, relocInfo::pcrel_addr_format);
+ load_absolute_address(scratch, Astate);
+ } else {
+ load_const_optimized(scratch, Astate);
+ }
+ } else {
+ load_absolute_address(scratch, Astate);
+ }
+ z_cli(/*SafepointSynchronize::sz_state()*/4-1, scratch, SafepointSynchronize::_not_synchronized);
+ z_brne(slow_path);
+}
+
+
+void MacroAssembler::generate_type_profiling(const Register Rdata,
+ const Register Rreceiver_klass,
+ const Register Rwanted_receiver_klass,
+ const Register Rmatching_row,
+ bool is_virtual_call) {
+ const int row_size = in_bytes(ReceiverTypeData::receiver_offset(1)) -
+ in_bytes(ReceiverTypeData::receiver_offset(0));
+ const int num_rows = ReceiverTypeData::row_limit();
+ NearLabel found_free_row;
+ NearLabel do_increment;
+ NearLabel found_no_slot;
+
+ BLOCK_COMMENT("type profiling {");
+
+ // search for:
+ // a) The type given in Rwanted_receiver_klass.
+ // b) The *first* empty row.
+
+ // First search for a) only, just running over b) with no regard.
+ // This is possible because
+ // wanted_receiver_class == receiver_class && wanted_receiver_class == 0
+ // is never true (receiver_class can't be zero).
+ for (int row_num = 0; row_num < num_rows; row_num++) {
+ // Row_offset should be a well-behaved positive number. The generated code relies
+ // on that wrt constant code size. Add2reg can handle all row_offset values, but
+ // will have to vary generated code size.
+ int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
+ assert(Displacement::is_shortDisp(row_offset), "Limitation of generated code");
+
+ // Is Rwanted_receiver_klass in this row?
+ if (VM_Version::has_CompareBranch()) {
+ z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
+ // Rmatching_row = Rdata + row_offset;
+ add2reg(Rmatching_row, row_offset, Rdata);
+ // if (*row_recv == (intptr_t) receiver_klass) goto fill_existing_slot;
+ compare64_and_branch(Rwanted_receiver_klass, Rreceiver_klass, Assembler::bcondEqual, do_increment);
+ } else {
+ add2reg(Rmatching_row, row_offset, Rdata);
+ z_cg(Rreceiver_klass, row_offset, Z_R0, Rdata);
+ z_bre(do_increment);
+ }
+ }
+
+ // Now that we did not find a match, let's search for b).
+
+ // We could save the first calculation of Rmatching_row if we woud search for a) in reverse order.
+ // We would then end up here with Rmatching_row containing the value for row_num == 0.
+ // We would not see much benefit, if any at all, because the CPU can schedule
+ // two instructions together with a branch anyway.
+ for (int row_num = 0; row_num < num_rows; row_num++) {
+ int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num));
+
+ // Has this row a zero receiver_klass, i.e. is it empty?
+ if (VM_Version::has_CompareBranch()) {
+ z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata);
+ // Rmatching_row = Rdata + row_offset
+ add2reg(Rmatching_row, row_offset, Rdata);
+ // if (*row_recv == (intptr_t) 0) goto found_free_row
+ compare64_and_branch(Rwanted_receiver_klass, (intptr_t)0, Assembler::bcondEqual, found_free_row);
+ } else {
+ add2reg(Rmatching_row, row_offset, Rdata);
+ load_and_test_long(Rwanted_receiver_klass, Address(Rdata, row_offset));
+ z_bre(found_free_row); // zero -> Found a free row.
+ }
+ }
+
+ // No match, no empty row found.
+ // Increment total counter to indicate polymorphic case.
+ if (is_virtual_call) {
+ add2mem_64(Address(Rdata, CounterData::count_offset()), 1, Rmatching_row);
+ }
+ z_bru(found_no_slot);
+
+ // Here we found an empty row, but we have not found Rwanted_receiver_klass.
+ // Rmatching_row holds the address to the first empty row.
+ bind(found_free_row);
+ // Store receiver_klass into empty slot.
+ z_stg(Rreceiver_klass, 0, Z_R0, Rmatching_row);
+
+ // Increment the counter of Rmatching_row.
+ bind(do_increment);
+ ByteSize counter_offset = ReceiverTypeData::receiver_count_offset(0) - ReceiverTypeData::receiver_offset(0);
+ add2mem_64(Address(Rmatching_row, counter_offset), 1, Rdata);
+
+ bind(found_no_slot);
+
+ BLOCK_COMMENT("} type profiling");
+}
+
+//---------------------------------------
+// Helpers for Intrinsic Emitters
+//---------------------------------------
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
+ assert_different_registers(crc, table, tmp);
+ assert_different_registers(val, table);
+ if (crc == val) { // Must rotate first to use the unmodified value.
+ rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+ z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
+ } else {
+ z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
+ rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
+ }
+ z_x(crc, Address(table, tmp, 0));
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+ fold_byte_crc32(crc, crc, table, tmp);
+}
+
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table.
+ *
+ * @param [in,out]crc Register containing the crc.
+ * @param [in]val Register containing the byte to fold into the CRC.
+ * @param [in]table Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+ z_xr(val, crc);
+ fold_byte_crc32(crc, val, table, val);
+}
+
+
+/**
+ * @param crc register containing existing CRC (32-bit)
+ * @param buf register pointing to input byte buffer (byte*)
+ * @param len register containing number of bytes
+ * @param table register pointing to CRC table
+ */
+void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+ Register data, bool invertCRC) {
+ assert_different_registers(crc, buf, len, table, data);
+
+ Label L_mainLoop, L_done;
+ const int mainLoop_stepping = 1;
+
+ // Process all bytes in a single-byte loop.
+ z_ltr(len, len);
+ z_brnh(L_done);
+
+ if (invertCRC) {
+ not_(crc, noreg, false); // ~c
+ }
+
+ bind(L_mainLoop);
+ z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
+ add2reg(buf, mainLoop_stepping); // Advance buffer position.
+ update_byte_crc32(crc, data, table);
+ z_brct(len, L_mainLoop); // Iterate.
+
+ if (invertCRC) {
+ not_(crc, noreg, false); // ~c
+ }
+
+ bind(L_done);
+}
+
+/**
+ * Emits code to update CRC-32 with a 4-byte value according to constants in table.
+ * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
+ *
+ */
+void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+ Register t0, Register t1, Register t2, Register t3) {
+ // This is what we implement (the DOBIG4 part):
+ //
+ // #define DOBIG4 c ^= *++buf4; \
+ // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+ // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+ // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+ const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
+ const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
+ const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
+ const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
+
+ // XOR crc with next four bytes of buffer.
+ lgr_if_needed(t0, crc);
+ z_x(t0, Address(buf, bufDisp));
+ if (bufInc != 0) {
+ add2reg(buf, bufInc);
+ }
+
+ // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
+ rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
+ rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
+ rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
+ rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
+
+ // Load pre-calculated table values.
+ // Use columns 4..7 for big-endian.
+ z_ly(t3, Address(table, t3, (intptr_t)ix0));
+ z_ly(t2, Address(table, t2, (intptr_t)ix1));
+ z_ly(t1, Address(table, t1, (intptr_t)ix2));
+ z_ly(t0, Address(table, t0, (intptr_t)ix3));
+
+ // Calculate new crc from table values.
+ z_xr(t2, t3);
+ z_xr(t0, t1);
+ z_xr(t0, t2); // Now crc contains the final checksum value.
+ lgr_if_needed(crc, t0);
+}
+
+/**
+ * @param crc register containing existing CRC (32-bit)
+ * @param buf register pointing to input byte buffer (byte*)
+ * @param len register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3) {
+ assert_different_registers(crc, buf, len, table);
+
+ Label L_mainLoop, L_tail;
+ Register data = t0;
+ Register ctr = Z_R0;
+ const int mainLoop_stepping = 8;
+ const int tailLoop_stepping = 1;
+ const int log_stepping = exact_log2(mainLoop_stepping);
+
+ // Don't test for len <= 0 here. This pathological case should not occur anyway.
+ // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+ // The situation itself is detected and handled correctly by the conditional branches
+ // following aghi(len, -stepping) and aghi(len, +stepping).
+
+ not_(crc, noreg, false); // 1s complement of crc
+
+#if 0
+ {
+ // Pre-mainLoop alignment did not show any positive effect on performance.
+ // We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
+
+ z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams.
+ z_brnh(L_tail);
+
+ // Align buf to word (4-byte) boundary.
+ z_lcr(ctr, buf);
+ rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
+ z_sgfr(len, ctr); // Remaining len after alignment.
+
+ update_byteLoop_crc32(crc, buf, ctr, table, data, false);
+ }
+#endif
+
+ // Check for short (<mainLoop_stepping bytes) buffer.
+ z_srag(ctr, len, log_stepping);
+ z_brnh(L_tail);
+
+ z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
+ rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
+
+ BIND(L_mainLoop);
+ update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
+ update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
+ z_brct(ctr, L_mainLoop); // Iterate.
+
+ z_lrvr(crc, crc); // Revert byte order back to original.
+
+ // Process last few (<8) bytes of buffer.
+ BIND(L_tail);
+ update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+ not_(crc, noreg, false); // 1s complement of crc
+}
+
+/**
+ * @param crc register containing existing CRC (32-bit)
+ * @param buf register pointing to input byte buffer (byte*)
+ * @param len register containing number of bytes
+ * @param table register pointing to CRC table
+ *
+ * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
+ */
+void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3) {
+ assert_different_registers(crc, buf, len, table);
+
+ Label L_mainLoop, L_tail;
+ Register data = t0;
+ Register ctr = Z_R0;
+ const int mainLoop_stepping = 4;
+ const int log_stepping = exact_log2(mainLoop_stepping);
+
+ // Don't test for len <= 0 here. This pathological case should not occur anyway.
+ // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
+ // The situation itself is detected and handled correctly by the conditional branches
+ // following aghi(len, -stepping) and aghi(len, +stepping).
+
+ not_(crc, noreg, false); // 1s complement of crc
+
+ // Check for short (<4 bytes) buffer.
+ z_srag(ctr, len, log_stepping);
+ z_brnh(L_tail);
+
+ z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
+ rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
+
+ BIND(L_mainLoop);
+ update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
+ z_brct(ctr, L_mainLoop); // Iterate.
+ z_lrvr(crc, crc); // Revert byte order back to original.
+
+ // Process last few (<8) bytes of buffer.
+ BIND(L_tail);
+ update_byteLoop_crc32(crc, buf, len, table, data, false);
+
+ not_(crc, noreg, false); // 1s complement of crc
+}
+
+/**
+ * @param crc register containing existing CRC (32-bit)
+ * @param buf register pointing to input byte buffer (byte*)
+ * @param len register containing number of bytes
+ * @param table register pointing to CRC table
+ */
+void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3) {
+ assert_different_registers(crc, buf, len, table);
+ Register data = t0;
+
+ update_byteLoop_crc32(crc, buf, len, table, data, true);
+}
+
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+ assert_different_registers(crc, buf, len, table, tmp);
+
+ not_(crc, noreg, false); // ~c
+
+ z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
+ update_byte_crc32(crc, tmp, table);
+
+ not_(crc, noreg, false); // ~c
+}
+
+//
+// Code for BigInteger::multiplyToLen() intrinsic.
+//
+
+// dest_lo += src1 + src2
+// dest_hi += carry1 + carry2
+// Z_R7 is destroyed !
+void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
+ Register src1, Register src2) {
+ clear_reg(Z_R7);
+ z_algr(dest_lo, src1);
+ z_alcgr(dest_hi, Z_R7);
+ z_algr(dest_lo, src2);
+ z_alcgr(dest_hi, Z_R7);
+}
+
+// Multiply 64 bit by 64 bit first loop.
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
+ Register x_xstart,
+ Register y, Register y_idx,
+ Register z,
+ Register carry,
+ Register product,
+ Register idx, Register kdx) {
+ // jlong carry, x[], y[], z[];
+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+ // huge_128 product = y[idx] * x[xstart] + carry;
+ // z[kdx] = (jlong)product;
+ // carry = (jlong)(product >>> 64);
+ // }
+ // z[xstart] = carry;
+
+ Label L_first_loop, L_first_loop_exit;
+ Label L_one_x, L_one_y, L_multiply;
+
+ z_aghi(xstart, -1);
+ z_brl(L_one_x); // Special case: length of x is 1.
+
+ // Load next two integers of x.
+ z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+ mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
+
+
+ bind(L_first_loop);
+
+ z_aghi(idx, -1);
+ z_brl(L_first_loop_exit);
+ z_aghi(idx, -1);
+ z_brl(L_one_y);
+
+ // Load next two integers of y.
+ z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
+ mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
+
+
+ bind(L_multiply);
+
+ Register multiplicand = product->successor();
+ Register product_low = multiplicand;
+
+ lgr_if_needed(multiplicand, x_xstart);
+ z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand
+ clear_reg(Z_R7);
+ z_algr(product_low, carry); // Add carry to result.
+ z_alcgr(product, Z_R7); // Add carry of the last addition.
+ add2reg(kdx, -2);
+
+ // Store result.
+ z_sllg(Z_R7, kdx, LogBytesPerInt);
+ reg2mem_opt(product_low, Address(z, Z_R7, 0));
+ lgr_if_needed(carry, product);
+ z_bru(L_first_loop);
+
+
+ bind(L_one_y); // Load one 32 bit portion of y as (0,value).
+
+ clear_reg(y_idx);
+ mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
+ z_bru(L_multiply);
+
+
+ bind(L_one_x); // Load one 32 bit portion of x as (0,value).
+
+ clear_reg(x_xstart);
+ mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
+ z_bru(L_first_loop);
+
+ bind(L_first_loop_exit);
+}
+
+// Multiply 64 bit by 64 bit and add 128 bit.
+void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
+ Register z,
+ Register yz_idx, Register idx,
+ Register carry, Register product,
+ int offset) {
+ // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
+ // z[kdx] = (jlong)product;
+
+ Register multiplicand = product->successor();
+ Register product_low = multiplicand;
+
+ z_sllg(Z_R7, idx, LogBytesPerInt);
+ mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
+
+ lgr_if_needed(multiplicand, x_xstart);
+ z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
+ mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
+
+ add2_with_carry(product, product_low, carry, yz_idx);
+
+ z_sllg(Z_R7, idx, LogBytesPerInt);
+ reg2mem_opt(product_low, Address(z, Z_R7, offset));
+
+}
+
+// Multiply 128 bit by 128 bit. Unrolled inner loop.
+void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
+ Register y, Register z,
+ Register yz_idx, Register idx,
+ Register jdx,
+ Register carry, Register product,
+ Register carry2) {
+ // jlong carry, x[], y[], z[];
+ // int kdx = ystart+1;
+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+ // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
+ // z[kdx+idx+1] = (jlong)product;
+ // jlong carry2 = (jlong)(product >>> 64);
+ // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
+ // z[kdx+idx] = (jlong)product;
+ // carry = (jlong)(product >>> 64);
+ // }
+ // idx += 2;
+ // if (idx > 0) {
+ // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
+ // z[kdx+idx] = (jlong)product;
+ // carry = (jlong)(product >>> 64);
+ // }
+
+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+ // scale the index
+ lgr_if_needed(jdx, idx);
+ and_imm(jdx, 0xfffffffffffffffcL);
+ rshift(jdx, 2);
+
+
+ bind(L_third_loop);
+
+ z_aghi(jdx, -1);
+ z_brl(L_third_loop_exit);
+ add2reg(idx, -4);
+
+ multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
+ lgr_if_needed(carry2, product);
+
+ multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
+ lgr_if_needed(carry, product);
+ z_bru(L_third_loop);
+
+
+ bind(L_third_loop_exit); // Handle any left-over operand parts.
+
+ and_imm(idx, 0x3);
+ z_brz(L_post_third_loop_done);
+
+ Label L_check_1;
+
+ z_aghi(idx, -2);
+ z_brl(L_check_1);
+
+ multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
+ lgr_if_needed(carry, product);
+
+
+ bind(L_check_1);
+
+ add2reg(idx, 0x2);
+ and_imm(idx, 0x1);
+ z_aghi(idx, -1);
+ z_brl(L_post_third_loop_done);
+
+ Register multiplicand = product->successor();
+ Register product_low = multiplicand;
+
+ z_sllg(Z_R7, idx, LogBytesPerInt);
+ clear_reg(yz_idx);
+ mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
+ lgr_if_needed(multiplicand, x_xstart);
+ z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
+ clear_reg(yz_idx);
+ mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
+
+ add2_with_carry(product, product_low, yz_idx, carry);
+
+ z_sllg(Z_R7, idx, LogBytesPerInt);
+ reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
+ rshift(product_low, 32);
+
+ lshift(product, 32);
+ z_ogr(product_low, product);
+ lgr_if_needed(carry, product_low);
+
+ bind(L_post_third_loop_done);
+}
+
+void MacroAssembler::multiply_to_len(Register x, Register xlen,
+ Register y, Register ylen,
+ Register z,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ Register tmp5) {
+ ShortBranchVerifier sbv(this);
+
+ assert_different_registers(x, xlen, y, ylen, z,
+ tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
+ assert_different_registers(x, xlen, y, ylen, z,
+ tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
+
+ z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
+
+ // In openJdk, we store the argument as 32-bit value to slot.
+ Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian.
+
+ const Register idx = tmp1;
+ const Register kdx = tmp2;
+ const Register xstart = tmp3;
+
+ const Register y_idx = tmp4;
+ const Register carry = tmp5;
+ const Register product = Z_R0_scratch;
+ const Register x_xstart = Z_R8;
+
+ // First Loop.
+ //
+ // final static long LONG_MASK = 0xffffffffL;
+ // int xstart = xlen - 1;
+ // int ystart = ylen - 1;
+ // long carry = 0;
+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+ // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
+ // z[kdx] = (int)product;
+ // carry = product >>> 32;
+ // }
+ // z[xstart] = (int)carry;
+ //
+
+ lgr_if_needed(idx, ylen); // idx = ylen
+ z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
+ clear_reg(carry); // carry = 0
+
+ Label L_done;
+
+ lgr_if_needed(xstart, xlen);
+ z_aghi(xstart, -1);
+ z_brl(L_done);
+
+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+ NearLabel L_second_loop;
+ compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
+
+ NearLabel L_carry;
+ z_aghi(kdx, -1);
+ z_brz(L_carry);
+
+ // Store lower 32 bits of carry.
+ z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
+ reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+ rshift(carry, 32);
+ z_aghi(kdx, -1);
+
+
+ bind(L_carry);
+
+ // Store upper 32 bits of carry.
+ z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
+ reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+
+ // Second and third (nested) loops.
+ //
+ // for (int i = xstart-1; i >= 0; i--) { // Second loop
+ // carry = 0;
+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+ // (z[k] & LONG_MASK) + carry;
+ // z[k] = (int)product;
+ // carry = product >>> 32;
+ // }
+ // z[i] = (int)carry;
+ // }
+ //
+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
+
+ const Register jdx = tmp1;
+
+ bind(L_second_loop);
+
+ clear_reg(carry); // carry = 0;
+ lgr_if_needed(jdx, ylen); // j = ystart+1
+
+ z_aghi(xstart, -1); // i = xstart-1;
+ z_brl(L_done);
+
+ // Use free slots in the current stackframe instead of push/pop.
+ Address zsave(Z_SP, _z_abi(carg_1));
+ reg2mem_opt(z, zsave);
+
+
+ Label L_last_x;
+
+ z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+ load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
+ z_aghi(xstart, -1); // i = xstart-1;
+ z_brl(L_last_x);
+
+ z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
+ mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
+
+
+ Label L_third_loop_prologue;
+
+ bind(L_third_loop_prologue);
+
+ Address xsave(Z_SP, _z_abi(carg_2));
+ Address xlensave(Z_SP, _z_abi(carg_3));
+ Address ylensave(Z_SP, _z_abi(carg_4));
+
+ reg2mem_opt(x, xsave);
+ reg2mem_opt(xstart, xlensave);
+ reg2mem_opt(ylen, ylensave);
+
+
+ multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
+
+ mem2reg_opt(z, zsave);
+ mem2reg_opt(x, xsave);
+ mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter!
+ mem2reg_opt(ylen, ylensave);
+
+ add2reg(tmp3, 1, xlen);
+ z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
+ reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+ z_aghi(tmp3, -1);
+ z_brl(L_done);
+
+ rshift(carry, 32);
+ z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
+ reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
+ z_bru(L_second_loop);
+
+ // Next infrequent code is moved outside loops.
+ bind(L_last_x);
+
+ clear_reg(x_xstart);
+ mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
+ z_bru(L_third_loop_prologue);
+
+ bind(L_done);
+
+ z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
+}
+
+#ifndef PRODUCT
+// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
+void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
+ Label ok;
+ if (check_equal) {
+ z_bre(ok);
+ } else {
+ z_brne(ok);
+ }
+ stop(msg, id);
+ bind(ok);
+}
+
+// Assert if CC indicates "low".
+void MacroAssembler::asm_assert_low(const char *msg, int id) {
+ Label ok;
+ z_brnl(ok);
+ stop(msg, id);
+ bind(ok);
+}
+
+// Assert if CC indicates "high".
+void MacroAssembler::asm_assert_high(const char *msg, int id) {
+ Label ok;
+ z_brnh(ok);
+ stop(msg, id);
+ bind(ok);
+}
+
+// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false)
+// generate non-relocatable code.
+void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) {
+ Label ok;
+ if (check_equal) { z_bre(ok); }
+ else { z_brne(ok); }
+ stop_static(msg, id);
+ bind(ok);
+}
+
+void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
+ Register mem_base, const char* msg, int id) {
+ switch (size) {
+ case 4:
+ load_and_test_int(Z_R0, Address(mem_base, mem_offset));
+ break;
+ case 8:
+ load_and_test_long(Z_R0, Address(mem_base, mem_offset));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ if (allow_relocation) { asm_assert(check_equal, msg, id); }
+ else { asm_assert_static(check_equal, msg, id); }
+}
+
+// Check the condition
+// expected_size == FP - SP
+// after transformation:
+// expected_size - FP + SP == 0
+// Destroys Register expected_size if no tmp register is passed.
+void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
+ if (tmp == noreg) {
+ tmp = expected_size;
+ } else {
+ if (tmp != expected_size) {
+ z_lgr(tmp, expected_size);
+ }
+ z_algr(tmp, Z_SP);
+ z_slg(tmp, 0, Z_R0, Z_SP);
+ asm_assert_eq(msg, id);
+ }
+}
+#endif // !PRODUCT
+
+void MacroAssembler::verify_thread() {
+ if (VerifyThread) {
+ unimplemented("", 117);
+ }
+}
+
+// Plausibility check for oops.
+void MacroAssembler::verify_oop(Register oop, const char* msg) {
+ if (!VerifyOops) return;
+
+ BLOCK_COMMENT("verify_oop {");
+ Register tmp = Z_R0;
+ unsigned int nbytes_save = 6 *8;
+ address entry = StubRoutines::verify_oop_subroutine_entry_address();
+ save_return_pc();
+ push_frame_abi160(nbytes_save);
+ z_stmg(Z_R0, Z_R5, 160, Z_SP);
+
+ z_lgr(Z_ARG2, oop);
+ load_const(Z_ARG1, (address) msg);
+ load_const(Z_R1, entry);
+ z_lg(Z_R1, 0, Z_R1);
+ call_c(Z_R1);
+
+ z_lmg(Z_R0, Z_R5, 160, Z_SP);
+ pop_frame();
+
+ restore_return_pc();
+ BLOCK_COMMENT("} verify_oop ");
+}
+
+const char* MacroAssembler::stop_types[] = {
+ "stop",
+ "untested",
+ "unimplemented",
+ "shouldnotreachhere"
+};
+
+static void stop_on_request(const char* tp, const char* msg) {
+ tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
+ guarantee(false, "Z assembly code requires stop: %s", msg);
+}
+
+void MacroAssembler::stop(int type, const char* msg, int id) {
+ BLOCK_COMMENT(err_msg("stop: %s {", msg));
+
+ // Setup arguments.
+ load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
+ load_const(Z_ARG2, (void*) msg);
+ get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
+ save_return_pc(); // Saves return pc Z_R14.
+ push_frame_abi160(0);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+ // The plain disassembler does not recognize illtrap. It instead displays
+ // a 32-bit value. Issueing two illtraps assures the disassembler finds
+ // the proper beginning of the next instruction.
+ z_illtrap(); // Illegal instruction.
+ z_illtrap(); // Illegal instruction.
+
+ BLOCK_COMMENT(" } stop");
+}
+
+// Special version of stop() for code size reduction.
+// Reuses the previously generated call sequence, if any.
+// Generates the call sequence on its own, if necessary.
+// Note: This code will work only in non-relocatable code!
+// The relative address of the data elements (arg1, arg2) must not change.
+// The reentry point must not move relative to it's users. This prerequisite
+// should be given for "hand-written" code, if all chain calls are in the same code blob.
+// Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
+address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
+ BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg));
+
+ // Setup arguments.
+ if (allow_relocation) {
+ // Relocatable version (for comparison purposes). Remove after some time.
+ load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
+ load_const(Z_ARG2, (void*) msg);
+ } else {
+ load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
+ load_absolute_address(Z_ARG2, (address)msg);
+ }
+ if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
+ BLOCK_COMMENT("branch to reentry point:");
+ z_brc(bcondAlways, reentry);
+ } else {
+ BLOCK_COMMENT("reentry point:");
+ reentry = pc(); // Re-entry point for subsequent stop calls.
+ save_return_pc(); // Saves return pc Z_R14.
+ push_frame_abi160(0);
+ if (allow_relocation) {
+ reentry = NULL; // Prevent reentry if code relocation is allowed.
+ call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+ } else {
+ call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
+ }
+ z_illtrap(); // Illegal instruction as emergency stop, should the above call return.
+ }
+ BLOCK_COMMENT(" } stop_chain");
+
+ return reentry;
+}
+
+// Special version of stop() for code size reduction.
+// Assumes constant relative addresses for data and runtime call.
+void MacroAssembler::stop_static(int type, const char* msg, int id) {
+ stop_chain(NULL, type, msg, id, false);
+}
+
+void MacroAssembler::stop_subroutine() {
+ unimplemented("stop_subroutine", 710);
+}
+
+// Prints msg to stdout from within generated code..
+void MacroAssembler::warn(const char* msg) {
+ RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
+ load_absolute_address(Z_R1, (address) warning);
+ load_absolute_address(Z_ARG1, (address) msg);
+ (void) call(Z_R1);
+ RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
+}
+
+#ifndef PRODUCT
+
+// Write pattern 0x0101010101010101 in region [low-before, high+after].
+void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
+ if (!ZapEmptyStackFields) return;
+ BLOCK_COMMENT("zap memory region {");
+ load_const_optimized(val, 0x0101010101010101);
+ int size = before + after;
+ if (low == high && size < 5 && size > 0) {
+ int offset = -before*BytesPerWord;
+ for (int i = 0; i < size; ++i) {
+ z_stg(val, Address(low, offset));
+ offset +=(1*BytesPerWord);
+ }
+ } else {
+ add2reg(addr, -before*BytesPerWord, low);
+ if (after) {
+#ifdef ASSERT
+ jlong check = after * BytesPerWord;
+ assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
+#endif
+ add2reg(high, after * BytesPerWord);
+ }
+ NearLabel loop;
+ bind(loop);
+ z_stg(val, Address(addr));
+ add2reg(addr, 8);
+ compare64_and_branch(addr, high, bcondNotHigh, loop);
+ if (after) {
+ add2reg(high, -after * BytesPerWord);
+ }
+ }
+ BLOCK_COMMENT("} zap memory region");
+}
+#endif // !PRODUCT
+
+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) {
+ _masm = masm;
+ _masm->load_absolute_address(_rscratch, (address)flag_addr);
+ _masm->load_and_test_int(_rscratch, Address(_rscratch));
+ if (value) {
+ _masm->z_brne(_label); // Skip if true, i.e. != 0.
+ } else {
+ _masm->z_bre(_label); // Skip if false, i.e. == 0.
+ }
+}
+
+SkipIfEqual::~SkipIfEqual() {
+ _masm->bind(_label);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1073 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_MACROASSEMBLER_S390_HPP
+#define CPU_S390_VM_MACROASSEMBLER_S390_HPP
+
+#include "asm/assembler.hpp"
+
+#define MODERN_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name)
+#define CLASSIC_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name)
+#define MODERN_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name)
+#define CLASSIC_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name)
+
+class MacroAssembler: public Assembler {
+ public:
+ MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+ //
+ // Optimized instruction emitters
+ //
+
+ // Move register if destination register and target register are different.
+ void lr_if_needed(Register rd, Register rs);
+ void lgr_if_needed(Register rd, Register rs);
+ void llgfr_if_needed(Register rd, Register rs);
+ void ldr_if_needed(FloatRegister rd, FloatRegister rs);
+
+ void move_reg_if_needed(Register dest, BasicType dest_type, Register src, BasicType src_type);
+ void move_freg_if_needed(FloatRegister dest, BasicType dest_type, FloatRegister src, BasicType src_type);
+
+ void freg2mem_opt(FloatRegister reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+ Register scratch = Z_R0);
+ void freg2mem_opt(FloatRegister reg,
+ const Address &a, bool is_double = true);
+
+ void mem2freg_opt(FloatRegister reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
+ Register scratch = Z_R0);
+ void mem2freg_opt(FloatRegister reg,
+ const Address &a, bool is_double = true);
+
+ void reg2mem_opt(Register reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
+ Register scratch = Z_R0);
+ // returns offset of the store instruction
+ int reg2mem_opt(Register reg, const Address &a, bool is_double = true);
+
+ void mem2reg_opt(Register reg,
+ int64_t disp,
+ Register index,
+ Register base,
+ void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
+ void (MacroAssembler::*classic)(Register, int64_t, Register, Register));
+ void mem2reg_opt(Register reg, const Address &a, bool is_double = true);
+ void mem2reg_signed_opt(Register reg, const Address &a);
+
+ // AND immediate and set condition code, works for 64 bit immediates/operation as well.
+ void and_imm(Register r, long mask, Register tmp = Z_R0, bool wide = false);
+
+ // 1's complement, 32bit or 64bit. Optimized to exploit distinct operands facility.
+ // Note: The condition code is neither preserved nor correctly set by this code!!!
+ // Note: (wide == false) does not protect the high order half of the target register
+ // from alternation. It only serves as optimization hint for 32-bit results.
+ void not_(Register r1, Register r2 = noreg, bool wide = false); // r1 = ~r2
+
+ // Expanded support of all "rotate_then_<logicalOP>" instructions.
+ //
+ // Generalize and centralize rotate_then_<logicalOP> emitter.
+ // Functional description. For details, see Principles of Operation, Chapter 7, "Rotate Then Insert..."
+ // - Bits in a register are numbered left (most significant) to right (least significant), i.e. [0..63].
+ // - Bytes in a register are numbered left (most significant) to right (least significant), i.e. [0..7].
+ // - Register src is rotated to the left by (nRotate&0x3f) positions.
+ // - Negative values for nRotate result in a rotation to the right by abs(nRotate) positions.
+ // - The bits in positions [lBitPos..rBitPos] of the _ROTATED_ src operand take part in the
+ // logical operation performed on the contents (in those positions) of the dst operand.
+ // - The logical operation that is performed on the dst operand is one of
+ // o insert the selected bits (replacing the original contents of those bit positions)
+ // o and the selected bits with the corresponding bits of the dst operand
+ // o or the selected bits with the corresponding bits of the dst operand
+ // o xor the selected bits with the corresponding bits of the dst operand
+ // - For clear_dst == true, the destination register is cleared before the bits are inserted.
+ // For clear_dst == false, only the bit positions that get data inserted from src
+ // are changed. All other bit positions remain unchanged.
+ // - For test_only == true, the result of the logicalOP is only used to set the condition code, dst remains unchanged.
+ // For test_only == false, the result of the logicalOP replaces the selected bits of dst.
+ // - src32bit and dst32bit indicate the respective register is used as 32bit value only.
+ // Knowledge can simplify code generation.
+ //
+ // Here is an important performance note, valid for all <logicalOP>s except "insert":
+ // Due to the too complex nature of the operation, it cannot be done in a single cycle.
+ // Timing constraints require the instructions to be cracked into two micro-ops, taking
+ // one or two cycles each to execute. In some cases, an additional pipeline bubble might get added.
+ // Macroscopically, that makes up for a three- or four-cycle instruction where you would
+ // expect just a single cycle.
+ // It is thus not beneficial from a performance point of view to exploit those instructions.
+ // Other reasons (code compactness, register pressure, ...) might outweigh this penalty.
+ //
+ unsigned long create_mask(int lBitPos, int rBitPos);
+ void rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
+ int nRotate, bool src32bit, bool dst32bit, bool oneBits);
+ void rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+ bool clear_dst);
+ void rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+ bool test_only);
+ void rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+ bool test_onlyt);
+ void rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, int nRotate,
+ bool test_only);
+
+ void add64(Register r1, RegisterOrConstant inc);
+
+ // Helper function to multiply the 64bit contents of a register by a 16bit constant.
+ // The optimization tries to avoid the mghi instruction, since it uses the FPU for
+ // calculation and is thus rather slow.
+ //
+ // There is no handling for special cases, e.g. cval==0 or cval==1.
+ //
+ // Returns len of generated code block.
+ unsigned int mul_reg64_const16(Register rval, Register work, int cval);
+
+ // Generic operation r1 := r2 + imm.
+ void add2reg(Register r1, int64_t imm, Register r2 = noreg);
+ // Generic operation r := b + x + d.
+ void add2reg_with_index(Register r, int64_t d, Register x, Register b = noreg);
+
+ // Add2mem* methods for direct memory increment.
+ void add2mem_32(const Address &a, int64_t imm, Register tmp);
+ void add2mem_64(const Address &a, int64_t imm, Register tmp);
+
+ // *((int8_t*)(dst)) |= imm8
+ inline void or2mem_8(Address& dst, int64_t imm8);
+
+ // Load values by size and signedness.
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
+ void store_sized_value(Register src, Address dst, size_t size_in_bytes);
+
+ // Load values with large offsets to base address.
+ private:
+ int split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate);
+ public:
+ void load_long_largeoffset(Register t, int64_t si20, Register a, Register tmp);
+ void load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp);
+ void load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp);
+
+ private:
+ long toc_distance();
+ public:
+ void load_toc(Register Rtoc);
+ void load_long_pcrelative(Register Rdst, address dataLocation);
+ static int load_long_pcrelative_size() { return 6; }
+ void load_addr_pcrelative(Register Rdst, address dataLocation);
+ static int load_addr_pcrel_size() { return 6; } // Just a LARL.
+
+ // Load a value from memory and test (set CC).
+ void load_and_test_byte (Register dst, const Address &a);
+ void load_and_test_short (Register dst, const Address &a);
+ void load_and_test_int (Register dst, const Address &a);
+ void load_and_test_int2long(Register dst, const Address &a);
+ void load_and_test_long (Register dst, const Address &a);
+
+ // Test a bit in memory. Result is reflected in CC.
+ void testbit(const Address &a, unsigned int bit);
+ // Test a bit in a register. Result is reflected in CC.
+ void testbit(Register r, unsigned int bitPos);
+
+ // Clear a register, i.e. load const zero into reg. Return len (in bytes) of
+ // generated instruction(s).
+ // whole_reg: Clear 64 bits if true, 32 bits otherwise.
+ // set_cc: Use instruction that sets the condition code, if true.
+ int clear_reg(Register r, bool whole_reg = true, bool set_cc = true);
+
+#ifdef ASSERT
+ int preset_reg(Register r, unsigned long pattern, int pattern_len);
+#endif
+
+ // Clear (store zeros) a small piece of memory.
+ // CAUTION: Do not use this for atomic memory clearing. Use store_const() instead.
+ // addr: Address descriptor of memory to clear.
+ // Index register will not be used!
+ // size: Number of bytes to clear.
+ void clear_mem(const Address& addr, unsigned size);
+
+ // Move immediate values to memory. Currently supports 32 and 64 bit stores,
+ // but may be extended to 16 bit store operation, if needed.
+ // For details, see implementation in *.cpp file.
+ int store_const(const Address &dest, long imm,
+ unsigned int lm, unsigned int lc,
+ Register scratch = Z_R0);
+ inline int store_const(const Address &dest, long imm,
+ Register scratch = Z_R0, bool is_long = true);
+
+ // Move/initialize arbitrarily large memory area. No check for destructive overlap.
+ // Being interruptible, these instructions need a retry-loop.
+ void move_long_ext(Register dst, Register src, unsigned int pad);
+
+ void compare_long_ext(Register left, Register right, unsigned int pad);
+ void compare_long_uni(Register left, Register right, unsigned int pad);
+
+ void search_string(Register end, Register start);
+ void search_string_uni(Register end, Register start);
+
+ // Translate instructions
+ // Being interruptible, these instructions need a retry-loop.
+ void translate_oo(Register dst, Register src, uint mask);
+ void translate_ot(Register dst, Register src, uint mask);
+ void translate_to(Register dst, Register src, uint mask);
+ void translate_tt(Register dst, Register src, uint mask);
+
+ // Crypto instructions.
+ // Being interruptible, these instructions need a retry-loop.
+ void cksm(Register crcBuff, Register srcBuff);
+ void km( Register dstBuff, Register srcBuff);
+ void kmc(Register dstBuff, Register srcBuff);
+ void kimd(Register srcBuff);
+ void klmd(Register srcBuff);
+ void kmac(Register srcBuff);
+
+ // nop padding
+ void align(int modulus);
+ void align_address(int modulus);
+
+ //
+ // Constants, loading constants, TOC support
+ //
+ // Safepoint check factored out.
+ void generate_safepoint_check(Label& slow_path, Register scratch = noreg, bool may_relocate = true);
+
+ // Load generic address: d <- base(a) + index(a) + disp(a).
+ inline void load_address(Register d, const Address &a);
+ // Load absolute address (and try to optimize).
+ void load_absolute_address(Register d, address addr);
+
+ // Address of Z_ARG1 and argument_offset.
+ // If temp_reg == arg_slot, arg_slot will be overwritten.
+ Address argument_address(RegisterOrConstant arg_slot,
+ Register temp_reg = noreg,
+ int64_t extra_slot_offset = 0);
+
+ // Load a narrow ptr constant (oop or klass ptr).
+ void load_narrow_oop( Register t, narrowOop a);
+ void load_narrow_klass(Register t, Klass* k);
+
+ static bool is_load_const_32to64(address pos);
+ static bool is_load_narrow_oop(address pos) { return is_load_const_32to64(pos); }
+ static bool is_load_narrow_klass(address pos) { return is_load_const_32to64(pos); }
+
+ static int load_const_32to64_size() { return 6; }
+ static bool load_narrow_oop_size() { return load_const_32to64_size(); }
+ static bool load_narrow_klass_size() { return load_const_32to64_size(); }
+
+ static int patch_load_const_32to64(address pos, int64_t a);
+ static int patch_load_narrow_oop(address pos, oop o);
+ static int patch_load_narrow_klass(address pos, Klass* k);
+
+ // cOops. CLFI exploit.
+ void compare_immediate_narrow_oop(Register oop1, narrowOop oop2);
+ void compare_immediate_narrow_klass(Register op1, Klass* op2);
+ static bool is_compare_immediate32(address pos);
+ static bool is_compare_immediate_narrow_oop(address pos);
+ static bool is_compare_immediate_narrow_klass(address pos);
+ static int compare_immediate_narrow_size() { return 6; }
+ static int compare_immediate_narrow_oop_size() { return compare_immediate_narrow_size(); }
+ static int compare_immediate_narrow_klass_size() { return compare_immediate_narrow_size(); }
+ static int patch_compare_immediate_32(address pos, int64_t a);
+ static int patch_compare_immediate_narrow_oop(address pos, oop o);
+ static int patch_compare_immediate_narrow_klass(address pos, Klass* k);
+
+ // Load a 32bit constant into a 64bit register.
+ void load_const_32to64(Register t, int64_t x, bool sign_extend=true);
+ // Load a 64 bit constant.
+ void load_const(Register t, long a);
+ inline void load_const(Register t, void* a);
+ inline void load_const(Register t, Label& L);
+ inline void load_const(Register t, const AddressLiteral& a);
+ // Get the 64 bit constant from a `load_const' sequence.
+ static long get_const(address load_const);
+ // Patch the 64 bit constant of a `load_const' sequence. This is a low level
+ // procedure. It neither flushes the instruction cache nor is it atomic.
+ static void patch_const(address load_const, long x);
+ static int load_const_size() { return 12; }
+
+ // Turn a char into boolean. NOTE: destroys r.
+ void c2bool(Register r, Register t = Z_R0);
+
+ // Optimized version of load_const for constants that do not need to be
+ // loaded by a sequence of instructions of fixed length and that do not
+ // need to be patched.
+ int load_const_optimized_rtn_len(Register t, long x, bool emit);
+ inline void load_const_optimized(Register t, long x);
+ inline void load_const_optimized(Register t, void* a);
+ inline void load_const_optimized(Register t, Label& L);
+ inline void load_const_optimized(Register t, const AddressLiteral& a);
+
+ public:
+
+ //----------------------------------------------------------
+ // oops in code -------------
+ // including compressed oops support -------------
+ //----------------------------------------------------------
+
+ // Metadata in code that we have to keep track of.
+ AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
+ AddressLiteral constant_metadata_address(Metadata* obj); // find_index
+
+ // allocate_index
+ AddressLiteral allocate_oop_address(jobject obj);
+ // find_index
+ AddressLiteral constant_oop_address(jobject obj);
+ // Uses allocate_oop_address.
+ inline void set_oop (jobject obj, Register d);
+ // Uses constant_oop_address.
+ inline void set_oop_constant(jobject obj, Register d);
+ // Uses constant_metadata_address.
+ inline bool set_metadata_constant(Metadata* md, Register d);
+
+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset);
+ //
+ // branch, jump
+ //
+
+ // Use one generic function for all branch patches.
+ static unsigned long patched_branch(address dest_pos, unsigned long inst, address inst_pos);
+
+ void pd_patch_instruction(address branch, address target);
+
+ // Extract relative address from "relative" instructions.
+ static long get_pcrel_offset(unsigned long inst);
+ static long get_pcrel_offset(address pc);
+ static address get_target_addr_pcrel(address pc);
+
+ static inline bool is_call_pcrelative_short(unsigned long inst);
+ static inline bool is_call_pcrelative_long(unsigned long inst);
+ static inline bool is_branch_pcrelative_short(unsigned long inst);
+ static inline bool is_branch_pcrelative_long(unsigned long inst);
+ static inline bool is_compareandbranch_pcrelative_short(unsigned long inst);
+ static inline bool is_branchoncount_pcrelative_short(unsigned long inst);
+ static inline bool is_branchonindex32_pcrelative_short(unsigned long inst);
+ static inline bool is_branchonindex64_pcrelative_short(unsigned long inst);
+ static inline bool is_branchonindex_pcrelative_short(unsigned long inst);
+ static inline bool is_branch_pcrelative16(unsigned long inst);
+ static inline bool is_branch_pcrelative32(unsigned long inst);
+ static inline bool is_branch_pcrelative(unsigned long inst);
+ static inline bool is_load_pcrelative_long(unsigned long inst);
+ static inline bool is_misc_pcrelative_long(unsigned long inst);
+ static inline bool is_pcrelative_short(unsigned long inst);
+ static inline bool is_pcrelative_long(unsigned long inst);
+ // PCrelative TOC access. Variants with address argument.
+ static inline bool is_load_pcrelative_long(address iLoc);
+ static inline bool is_pcrelative_short(address iLoc);
+ static inline bool is_pcrelative_long(address iLoc);
+
+ static inline bool is_pcrelative_instruction(address iloc);
+ static inline bool is_load_addr_pcrel(address a);
+
+ static void patch_target_addr_pcrel(address pc, address con);
+ static void patch_addr_pcrel(address pc, address con) {
+ patch_target_addr_pcrel(pc, con); // Just delegate. This is only for nativeInst_s390.cpp.
+ }
+
+ //---------------------------------------------------------
+ // Some macros for more comfortable assembler programming.
+ //---------------------------------------------------------
+
+ // NOTE: pass NearLabel T to signal that the branch target T will be bound to a near address.
+
+ void compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+ void compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+ void compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+ void compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target);
+
+ void branch_optimized(Assembler::branch_condition cond, address branch_target);
+ void branch_optimized(Assembler::branch_condition cond, Label& branch_target);
+ void compare_and_branch_optimized(Register r1,
+ Register r2,
+ Assembler::branch_condition cond,
+ address branch_addr,
+ bool len64,
+ bool has_sign);
+ void compare_and_branch_optimized(Register r1,
+ jlong x2,
+ Assembler::branch_condition cond,
+ Label& branch_target,
+ bool len64,
+ bool has_sign);
+ void compare_and_branch_optimized(Register r1,
+ Register r2,
+ Assembler::branch_condition cond,
+ Label& branch_target,
+ bool len64,
+ bool has_sign);
+
+ //
+ // Support for frame handling
+ //
+ // Specify the register that should be stored as the return pc in the
+ // current frame (default is R14).
+ inline void save_return_pc(Register pc = Z_R14);
+ inline void restore_return_pc();
+
+ // Get current PC.
+ address get_PC(Register result);
+
+ // Get current PC + offset. Offset given in bytes, must be even!
+ address get_PC(Register result, int64_t offset);
+
+ // Resize current frame either relatively wrt to current SP or absolute.
+ void resize_frame_sub(Register offset, Register fp, bool load_fp=true);
+ void resize_frame_absolute(Register addr, Register fp, bool load_fp=true);
+ void resize_frame(RegisterOrConstant offset, Register fp, bool load_fp=true);
+
+ // Push a frame of size bytes, if copy_sp is false, old_sp must already
+ // contain a copy of Z_SP.
+ void push_frame(Register bytes, Register old_sp, bool copy_sp = true, bool bytes_with_inverted_sign = false);
+
+ // Push a frame of size `bytes'. no abi space provided.
+ // Don't rely on register locking, instead pass a scratch register
+ // (Z_R0 by default).
+ // CAUTION! passing registers >= Z_R2 may produce bad results on
+ // old CPUs!
+ unsigned int push_frame(unsigned int bytes, Register scratch = Z_R0);
+
+ // Push a frame of size `bytes' with abi160 on top.
+ unsigned int push_frame_abi160(unsigned int bytes);
+
+ // Pop current C frame.
+ void pop_frame();
+
+ //
+ // Calls
+ //
+
+ private:
+ address _last_calls_return_pc;
+
+ public:
+ // Support for VM calls. This is the base routine called by the
+ // different versions of call_VM_leaf. The interpreter may customize
+ // this version by overriding it for its purposes (e.g., to
+ // save/restore additional registers when doing a VM call).
+ void call_VM_leaf_base(address entry_point);
+ void call_VM_leaf_base(address entry_point, bool allow_relocation);
+
+ // It is imperative that all calls into the VM are handled via the
+ // call_VM macros. They make sure that the stack linkage is setup
+ // correctly. Call_VM's correspond to ENTRY/ENTRY_X entry points
+ // while call_VM_leaf's correspond to LEAF entry points.
+ //
+ // This is the base routine called by the different versions of
+ // call_VM. The interpreter may customize this version by overriding
+ // it for its purposes (e.g., to save/restore additional registers
+ // when doing a VM call).
+
+ // If no last_java_sp is specified (noreg) then SP will be used instead.
+
+ virtual void call_VM_base(
+ Register oop_result, // Where an oop-result ends up if any; use noreg otherwise.
+ Register last_java_sp, // To set up last_Java_frame in stubs; use noreg otherwise.
+ address entry_point, // The entry point.
+ bool check_exception); // Flag which indicates if exception should be checked.
+ virtual void call_VM_base(
+ Register oop_result, // Where an oop-result ends up if any; use noreg otherwise.
+ Register last_java_sp, // To set up last_Java_frame in stubs; use noreg otherwise.
+ address entry_point, // The entry point.
+ bool allow_relocation, // Flag to request generation of relocatable code.
+ bool check_exception); // Flag which indicates if exception should be checked.
+
+ // Call into the VM.
+ // Passes the thread pointer (in Z_ARG1) as a prepended argument.
+ // Makes sure oop return values are visible to the GC.
+ void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+ Register arg_3, bool check_exceptions = true);
+
+ void call_VM_static(Register oop_result, address entry_point, bool check_exceptions = true);
+ void call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
+ Register arg_3, bool check_exceptions = true);
+
+ // Overloaded with last_java_sp.
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+ Register arg_1, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+ Register arg_1, Register arg_2, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point,
+ Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+ void call_VM_leaf(address entry_point);
+ void call_VM_leaf(address entry_point, Register arg_1);
+ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+ // Really static VM leaf call (never patched).
+ void call_VM_leaf_static(address entry_point);
+ void call_VM_leaf_static(address entry_point, Register arg_1);
+ void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2);
+ void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+ // Call a C function via its function entry. Updates and returns _last_calls_return_pc.
+ inline address call(Register function_entry);
+ inline address call_c(Register function_entry);
+ address call_c(address function_entry);
+ // Variant for really static (non-relocatable) calls which are never patched.
+ address call_c_static(address function_entry);
+ // TOC or pc-relative call + emits a runtime_call relocation.
+ address call_c_opt(address function_entry);
+
+ inline address call_stub(Register function_entry);
+ inline address call_stub(address function_entry);
+
+ // Get the pc where the last call will return to. Returns _last_calls_return_pc.
+ inline address last_calls_return_pc();
+
+ private:
+ static bool is_call_far_patchable_variant0_at(address instruction_addr); // Dynamic TOC: load target addr from CP and call.
+ static bool is_call_far_patchable_variant2_at(address instruction_addr); // PC-relative call, prefixed with NOPs.
+
+
+ public:
+ bool call_far_patchable(address target, int64_t toc_offset);
+ static bool is_call_far_patchable_at(address inst_start); // All supported forms of patchable calls.
+ static bool is_call_far_patchable_pcrelative_at(address inst_start); // Pc-relative call with leading nops.
+ static bool is_call_far_pcrelative(address instruction_addr); // Pure far pc-relative call, with one leading size adjustment nop.
+ static void set_dest_of_call_far_patchable_at(address inst_start, address target, int64_t toc_offset);
+ static address get_dest_of_call_far_patchable_at(address inst_start, address toc_start);
+
+ void align_call_far_patchable(address pc);
+
+ // PCrelative TOC access.
+
+ // This value is independent of code position - constant for the lifetime of the VM.
+ static int call_far_patchable_size() {
+ return load_const_from_toc_size() + call_byregister_size();
+ }
+
+ static int call_far_patchable_ret_addr_offset() { return call_far_patchable_size(); }
+
+ static bool call_far_patchable_requires_alignment_nop(address pc) {
+ int size = call_far_patchable_size();
+ return ((intptr_t)(pc + size) & 0x03L) != 0;
+ }
+
+ // END OF PCrelative TOC access.
+
+ static int jump_byregister_size() { return 2; }
+ static int jump_pcrelative_size() { return 4; }
+ static int jump_far_pcrelative_size() { return 6; }
+ static int call_byregister_size() { return 2; }
+ static int call_pcrelative_size() { return 4; }
+ static int call_far_pcrelative_size() { return 2 + 6; } // Prepend each BRASL with a nop.
+ static int call_far_pcrelative_size_raw() { return 6; } // Prepend each BRASL with a nop.
+
+ //
+ // Java utilities
+ //
+
+ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+ // The implementation is only non-empty for the InterpreterMacroAssembler,
+ // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+ virtual void check_and_handle_popframe(Register java_thread);
+ virtual void check_and_handle_earlyret(Register java_thread);
+
+ // Polling page support.
+ enum poll_mask {
+ mask_stackbang = 0xde, // 222 (dec)
+ mask_safepoint = 0x6f, // 111 (dec)
+ mask_profiling = 0xba // 186 (dec)
+ };
+
+ // Read from the polling page.
+ void load_from_polling_page(Register polling_page_address, int64_t offset = 0);
+
+ // Check if given instruction is a read from the polling page
+ // as emitted by load_from_polling_page.
+ static bool is_load_from_polling_page(address instr_loc);
+ // Extract poll address from instruction and ucontext.
+ static address get_poll_address(address instr_loc, void* ucontext);
+ // Extract poll register from instruction.
+ static uint get_poll_register(address instr_loc);
+
+ // Check if instruction is a write access to the memory serialization page
+ // realized by one of the instructions stw, stwu, stwx, or stwux.
+ static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext);
+
+ // Support for serializing memory accesses between threads.
+ void serialize_memory(Register thread, Register tmp1, Register tmp2);
+
+ // Stack overflow checking
+ void bang_stack_with_offset(int offset);
+
+ // Atomics
+ // -- none?
+
+ void tlab_allocate(Register obj, // Result: pointer to object after successful allocation
+ Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise.
+ int con_size_in_bytes, // Object size in bytes if known at compile time.
+ Register t1, // temp register
+ Label& slow_case); // Continuation point if fast allocation fails.
+
+ // Emitter for interface method lookup.
+ // input: recv_klass, intf_klass, itable_index
+ // output: method_result
+ // kills: itable_index, temp1_reg, Z_R0, Z_R1
+ void lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register temp1_reg,
+ Register temp2_reg,
+ Label& no_such_interface);
+
+ // virtual method calling
+ void lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result);
+
+ // Factor out code to call ic_miss_handler.
+ unsigned int call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch);
+ void nmethod_UEP(Label& ic_miss);
+
+ // Emitters for "partial subtype" checks.
+
+ // Test sub_klass against super_klass, with fast and slow paths.
+
+ // The fast path produces a tri-state answer: yes / no / maybe-slow.
+ // One of the three labels can be NULL, meaning take the fall-through.
+ // If super_check_offset is -1, the value is loaded up from super_klass.
+ // No registers are killed, except temp_reg and temp2_reg.
+ // If super_check_offset is not -1, temp1_reg is not used and can be noreg.
+ void check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp1_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+ // The rest of the type check; must be wired to a corresponding fast path.
+ // It does not repeat the fast path logic, so don't use it standalone.
+ // The temp_reg can be noreg, if no temps are available.
+ // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
+ // Updates the sub's secondary super cache as necessary.
+ void check_klass_subtype_slow_path(Register Rsubklass,
+ Register Rsuperklas,
+ Register Rarray_ptr, // tmp
+ Register Rlength, // tmp
+ Label* L_success,
+ Label* L_failure);
+
+ // Simplified, combined version, good for typical uses.
+ // Falls through on failure.
+ void check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp1_reg,
+ Register temp2_reg,
+ Label& L_success);
+
+ // Increment a counter at counter_address when the eq condition code is set.
+ // Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
+ void increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg);
+ // Biased locking support
+ // Upon entry,obj_reg must contain the target object, and mark_reg
+ // must contain the target object's header.
+ // Destroys mark_reg if an attempt is made to bias an anonymously
+ // biased lock. In this case a failure will go either to the slow
+ // case or fall through with the notEqual condition code set with
+ // the expectation that the slow case in the runtime will be called.
+ // In the fall-through case where the CAS-based lock is done,
+ // mark_reg is not destroyed.
+ void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
+ Register temp2_reg, Label& done, Label* slow_case = NULL);
+ // Upon entry, the base register of mark_addr must contain the oop.
+ // Destroys temp_reg.
+ // If allow_delay_slot_filling is set to true, the next instruction
+ // emitted after this one will go in an annulled delay slot if the
+ // biased locking exit case failed.
+ void biased_locking_exit(Register mark_addr, Register temp_reg, Label& done);
+
+ void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
+ void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking);
+
+ // Write to card table for modification at store_addr - register is destroyed afterwards.
+ void card_write_barrier_post(Register store_addr, Register tmp);
+
+#if INCLUDE_ALL_GCS
+ // General G1 pre-barrier generator.
+ // Purpose: record the previous value if it is not null.
+ // All non-tmps are preserved.
+ void g1_write_barrier_pre(Register Robj,
+ RegisterOrConstant offset,
+ Register Rpre_val, // Ideally, this is a non-volatile register.
+ Register Rval, // Will be preserved.
+ Register Rtmp1, // If Rpre_val is volatile, either Rtmp1
+ Register Rtmp2, // or Rtmp2 has to be non-volatile.
+ bool pre_val_needed); // Save Rpre_val across runtime call, caller uses it.
+
+ // General G1 post-barrier generator.
+ // Purpose: Store cross-region card.
+ void g1_write_barrier_post(Register Rstore_addr,
+ Register Rnew_val,
+ Register Rtmp1,
+ Register Rtmp2,
+ Register Rtmp3);
+#endif // INCLUDE_ALL_GCS
+
+ // Support for last Java frame (but use call_VM instead where possible).
+ private:
+ void set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation);
+ void reset_last_Java_frame(bool allow_relocation);
+ void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation);
+ public:
+ inline void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
+ inline void set_last_Java_frame_static(Register last_java_sp, Register last_Java_pc);
+ inline void reset_last_Java_frame(void);
+ inline void reset_last_Java_frame_static(void);
+ inline void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1);
+ inline void set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1);
+
+ void set_thread_state(JavaThreadState new_state);
+
+ // Read vm result from thread.
+ void get_vm_result (Register oop_result);
+ void get_vm_result_2(Register result);
+
+ // Vm result is currently getting hijacked to for oop preservation.
+ void set_vm_result(Register oop_result);
+
+ // Support for NULL-checks
+ //
+ // Generates code that causes a NULL OS exception if the content of reg is NULL.
+ // If the accessed location is M[reg + offset] and the offset is known, provide the
+ // offset. No explicit code generation is needed if the offset is within a certain
+ // range (0 <= offset <= page_size).
+ //
+ // %%%%%% Currently not done for z/Architecture
+
+ void null_check(Register reg, Register tmp = Z_R0, int64_t offset = -1);
+ static bool needs_explicit_null_check(intptr_t offset); // Implemented in shared file ?!
+
+ // Klass oop manipulations if compressed.
+ void encode_klass_not_null(Register dst, Register src = noreg);
+ void decode_klass_not_null(Register dst, Register src);
+ void decode_klass_not_null(Register dst);
+ void load_klass(Register klass, Address mem);
+ void load_klass(Register klass, Register src_oop);
+ void load_prototype_header(Register Rheader, Register Rsrc_oop);
+ void store_klass(Register klass, Register dst_oop, Register ck = noreg); // Klass will get compressed if ck not provided.
+ void store_klass_gap(Register s, Register dst_oop);
+
+ // This function calculates the size of the code generated by
+ // decode_klass_not_null(register dst)
+ // when (Universe::heap() != NULL). Hence, if the instructions
+ // it generates change, then this method needs to be updated.
+ static int instr_size_for_decode_klass_not_null();
+
+ void encode_heap_oop(Register oop);
+ void encode_heap_oop_not_null(Register oop);
+
+ static int get_oop_base_pow2_offset(uint64_t oop_base);
+ int get_oop_base(Register Rbase, uint64_t oop_base);
+ int get_oop_base_complement(Register Rbase, uint64_t oop_base);
+ void compare_heap_oop(Register Rop1, Address mem, bool maybeNULL);
+ void compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL);
+ void load_heap_oop(Register dest, const Address &a);
+ void load_heap_oop(Register d, int64_t si16, Register s1);
+ void load_heap_oop_not_null(Register d, int64_t si16, Register s1);
+ void store_heap_oop(Register Roop, RegisterOrConstant offset, Register base);
+ void store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base);
+ void store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base);
+ void oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
+ Register Rbase = Z_R1, int pow2_offset = -1, bool only32bitValid = false);
+ void oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL,
+ Register Rbase = Z_R1, int pow2_offset = -1);
+
+ void load_mirror(Register mirror, Register method);
+
+ //--------------------------
+ //--- perations on arrays.
+ //--------------------------
+ unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len);
+ unsigned int Clear_Array_Const(long cnt, Register base);
+ unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len);
+ unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
+ Register cnt_reg,
+ Register tmp1_reg, Register tmp2_reg);
+
+ //-------------------------------------------
+ // Special String Intrinsics Implementation.
+ //-------------------------------------------
+ // Intrinsics for CompactStrings
+ // Compress char[] to byte[]. odd_reg contains cnt. tmp3 is only needed for precise behavior in failure case. Kills dst.
+ unsigned int string_compress(Register result, Register src, Register dst, Register odd_reg,
+ Register even_reg, Register tmp, Register tmp2 = noreg);
+
+ // Kills src.
+ unsigned int has_negatives(Register result, Register src, Register cnt,
+ Register odd_reg, Register even_reg, Register tmp);
+
+ // Inflate byte[] to char[].
+ unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
+ // Odd_reg contains cnt. Kills src.
+ unsigned int string_inflate(Register src, Register dst, Register odd_reg,
+ Register even_reg, Register tmp);
+
+ unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
+ Register odd_reg, Register even_reg, Register result, int ae);
+
+ unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
+ Register odd_reg, Register even_reg, Register result, bool is_byte);
+
+ unsigned int string_indexof(Register result, Register haystack, Register haycnt,
+ Register needle, Register needlecnt, int needlecntval,
+ Register odd_reg, Register even_reg, int ae);
+
+ unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
+ Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
+
+ // Emit an oop const to the constant pool and set a relocation info
+ // with address current_pc. Return the TOC offset of the constant.
+ int store_const_in_toc(AddressLiteral& val);
+ int store_oop_in_toc(AddressLiteral& oop);
+ // Emit an oop const to the constant pool via store_oop_in_toc, or
+ // emit a scalar const to the constant pool via store_const_in_toc,
+ // and load the constant into register dst.
+ bool load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg);
+ // Get CPU version dependent size of load_const sequence.
+ // The returned value is valid only for code sequences
+ // generated by load_const, not load_const_optimized.
+ static int load_const_from_toc_size() {
+ return load_long_pcrelative_size();
+ }
+ bool load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg);
+ static intptr_t get_const_from_toc(address pc);
+ static void set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb);
+
+ // Dynamic TOC.
+ static bool is_load_const(address a);
+ static bool is_load_const_from_toc_pcrelative(address a);
+ static bool is_load_const_from_toc(address a) { return is_load_const_from_toc_pcrelative(a); }
+
+ // PCrelative TOC access.
+ static bool is_call_byregister(address a) { return is_z_basr(*(short*)a); }
+ static bool is_load_const_from_toc_call(address a);
+ static bool is_load_const_call(address a);
+ static int load_const_call_size() { return load_const_size() + call_byregister_size(); }
+ static int load_const_from_toc_call_size() { return load_const_from_toc_size() + call_byregister_size(); }
+ // Offset is +/- 2**32 -> use long.
+ static long get_load_const_from_toc_offset(address a);
+
+
+ void generate_type_profiling(const Register Rdata,
+ const Register Rreceiver_klass,
+ const Register Rwanted_receiver_klass,
+ const Register Rmatching_row,
+ bool is_virtual_call);
+
+ // Bit operations for single register operands.
+ inline void lshift(Register r, int places, bool doubl = true); // <<
+ inline void rshift(Register r, int places, bool doubl = true); // >>
+
+ //
+ // Debugging
+ //
+
+ // Assert on CC (condition code in CPU state).
+ void asm_assert(bool check_equal, const char* msg, int id) PRODUCT_RETURN;
+ void asm_assert_low(const char *msg, int id) PRODUCT_RETURN;
+ void asm_assert_high(const char *msg, int id) PRODUCT_RETURN;
+ void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); }
+ void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); }
+
+ void asm_assert_static(bool check_equal, const char* msg, int id) PRODUCT_RETURN;
+
+ private:
+ // Emit assertions.
+ void asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
+ Register mem_base, const char* msg, int id) PRODUCT_RETURN;
+
+ public:
+ inline void asm_assert_mem4_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(true, true, 4, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem8_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(true, true, 8, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem4_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(false, true, 4, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem8_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(false, true, 8, mem_offset, mem_base, msg, id);
+ }
+
+ inline void asm_assert_mem4_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(true, false, 4, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem8_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(true, false, 8, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem4_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(false, false, 4, mem_offset, mem_base, msg, id);
+ }
+ inline void asm_assert_mem8_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) {
+ asm_assert_mems_zero(false, false, 8, mem_offset, mem_base, msg, id);
+ }
+ void asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) PRODUCT_RETURN;
+
+ // Verify Z_thread contents.
+ void verify_thread();
+
+ // Only if +VerifyOops.
+ void verify_oop(Register reg, const char* s = "broken oop");
+
+ // TODO: verify_method and klass metadata (compare against vptr?).
+ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+ private:
+ // Generate printout in stop().
+ static const char* stop_types[];
+ enum {
+ stop_stop = 0,
+ stop_untested = 1,
+ stop_unimplemented = 2,
+ stop_shouldnotreachhere = 3,
+ stop_end = 4
+ };
+ // Prints msg and stops execution.
+ void stop(int type, const char* msg, int id = 0);
+ address stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation); // Non-relocateable code only!!
+ void stop_static(int type, const char* msg, int id); // Non-relocateable code only!!
+
+ public:
+
+ // Prints msg and stops.
+ address stop_chain( address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, true); }
+ address stop_chain_static(address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, false); }
+ void stop_static (const char* msg = "", int id = 0) { stop_static(stop_stop, msg, id); }
+ void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); }
+ void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); }
+ void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); }
+ void should_not_reach_here(const char* msg = "", int id = -1) { stop(stop_shouldnotreachhere, msg, id); }
+
+ // Factor out part of stop into subroutine to save space.
+ void stop_subroutine();
+
+ // Prints msg, but don't stop.
+ void warn(const char* msg);
+
+ //-----------------------------
+ //--- basic block tracing code
+ //-----------------------------
+ void trace_basic_block(uint i);
+ void init_basic_block_trace();
+ // Number of bytes a basic block gets larger due to the tracing code macro (worst case).
+ // Currently, worst case is 48 bytes. 64 puts us securely on the safe side.
+ static int basic_blck_trace_blk_size_incr() { return 64; }
+
+ // Write pattern 0x0101010101010101 in region [low-before, high+after].
+ // Low and high may be the same registers. Before and after are
+ // the numbers of 8-byte words.
+ void zap_from_to(Register low, Register high, Register tmp1 = Z_R0, Register tmp2 = Z_R1,
+ int before = 0, int after = 0) PRODUCT_RETURN;
+
+ // Emitters for CRC32 calculation.
+ private:
+ void fold_byte_crc32(Register crc, Register table, Register val, Register tmp);
+ void fold_8bit_crc32(Register crc, Register table, Register tmp);
+ void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
+ Register data, bool invertCRC);
+ void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
+ Register t0, Register t1, Register t2, Register t3);
+ public:
+ void update_byte_crc32( Register crc, Register val, Register table);
+ void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
+ void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3);
+ void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3);
+ void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
+ Register t0, Register t1, Register t2, Register t3);
+
+ // Emitters for BigInteger.multiplyToLen intrinsic
+ // note: length of result array (zlen) is passed on the stack
+ private:
+ void add2_with_carry(Register dest_hi, Register dest_lo,
+ Register src1, Register src2);
+ void multiply_64_x_64_loop(Register x, Register xstart,
+ Register x_xstart,
+ Register y, Register y_idx, Register z,
+ Register carry, Register product,
+ Register idx, Register kdx);
+ void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
+ Register yz_idx, Register idx,
+ Register carry, Register product, int offset);
+ void multiply_128_x_128_loop(Register x_xstart,
+ Register y, Register z,
+ Register yz_idx, Register idx,
+ Register jdx,
+ Register carry, Register product,
+ Register carry2);
+ public:
+ void multiply_to_len(Register x, Register xlen,
+ Register y, Register ylen,
+ Register z,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4, Register tmp5);
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+ MacroAssembler* _masm;
+ Label _label;
+
+ public:
+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value, Register _rscratch);
+ ~SkipIfEqual();
+};
+
+#ifdef ASSERT
+// Return false (e.g. important for our impl. of virtual calls).
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+#endif // CPU_S390_VM_MACROASSEMBLER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
+#define CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+#include "runtime/thread.hpp"
+
+// Simplified shift operations for single register operands, constant shift amount.
+inline void MacroAssembler::lshift(Register r, int places, bool is_DW) {
+ if (is_DW) {
+ z_sllg(r, r, places);
+ } else {
+ z_sll(r, places);
+ }
+}
+
+inline void MacroAssembler::rshift(Register r, int places, bool is_DW) {
+ if (is_DW) {
+ z_srlg(r, r, places);
+ } else {
+ z_srl(r, places);
+ }
+}
+
+// *((int8_t*)(dst)) |= imm8
+inline void MacroAssembler::or2mem_8(Address& dst, int64_t imm8) {
+ if (Displacement::is_shortDisp(dst.disp())) {
+ z_oi(dst, imm8);
+ } else {
+ z_oiy(dst, imm8);
+ }
+}
+
+inline int MacroAssembler::store_const(const Address &dest, long imm, Register scratch, bool is_long) {
+ unsigned int lm = is_long ? 8 : 4;
+ unsigned int lc = is_long ? 8 : 4;
+ return store_const(dest, imm, lm, lc, scratch);
+}
+
+// Do not rely on add2reg* emitter.
+// Depending on CmdLine switches and actual parameter values,
+// the generated code may alter the condition code, which is counter-intuitive
+// to the semantics of the "load address" (LA/LAY) instruction.
+// Generic address loading d <- base(a) + index(a) + disp(a)
+inline void MacroAssembler::load_address(Register d, const Address &a) {
+ if (Displacement::is_shortDisp(a.disp())) {
+ z_la(d, a.disp(), a.indexOrR0(), a.baseOrR0());
+ } else if (Displacement::is_validDisp(a.disp())) {
+ z_lay(d, a.disp(), a.indexOrR0(), a.baseOrR0());
+ } else {
+ guarantee(false, "displacement = " SIZE_FORMAT_HEX ", out of range for LA/LAY", a.disp());
+ }
+}
+
+inline void MacroAssembler::load_const(Register t, void* x) {
+ load_const(t, (long)x);
+}
+
+// Load a 64 bit constant encoded by a `Label'.
+// Works for bound as well as unbound labels. For unbound labels, the
+// code will become patched as soon as the label gets bound.
+inline void MacroAssembler::load_const(Register t, Label& L) {
+ load_const(t, target(L));
+}
+
+inline void MacroAssembler::load_const(Register t, const AddressLiteral& a) {
+ assert(t != Z_R0, "R0 not allowed");
+ // First relocate (we don't change the offset in the RelocationHolder,
+ // just pass a.rspec()), then delegate to load_const(Register, long).
+ relocate(a.rspec());
+ load_const(t, (long)a.value());
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, long x) {
+ (void) load_const_optimized_rtn_len(t, x, true);
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, void* a) {
+ load_const_optimized(t, (long)a);
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, Label& L) {
+ load_const_optimized(t, target(L));
+}
+
+inline void MacroAssembler::load_const_optimized(Register t, const AddressLiteral& a) {
+ assert(t != Z_R0, "R0 not allowed");
+ assert((relocInfo::relocType)a.rspec().reloc()->type() == relocInfo::none,
+ "cannot relocate optimized load_consts");
+ load_const_optimized(t, a.value());
+}
+
+inline void MacroAssembler::set_oop(jobject obj, Register d) {
+ load_const(d, allocate_oop_address(obj));
+}
+
+inline void MacroAssembler::set_oop_constant(jobject obj, Register d) {
+ load_const(d, constant_oop_address(obj));
+}
+
+// Adds MetaData constant md to TOC and loads it from there.
+// md is added to the oop_recorder, but no relocation is added.
+inline bool MacroAssembler::set_metadata_constant(Metadata* md, Register d) {
+ AddressLiteral a = constant_metadata_address(md);
+ return load_const_from_toc(d, a, d); // Discards the relocation.
+}
+
+
+inline bool MacroAssembler::is_call_pcrelative_short(unsigned long inst) {
+ return is_equal(inst, BRAS_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_call_pcrelative_long(unsigned long inst) {
+ return is_equal(inst, BRASL_ZOPC); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_branch_pcrelative_short(unsigned long inst) {
+ // Branch relative, 16-bit offset.
+ return is_equal(inst, BRC_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branch_pcrelative_long(unsigned long inst) {
+ // Branch relative, 32-bit offset.
+ return is_equal(inst, BRCL_ZOPC); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_compareandbranch_pcrelative_short(unsigned long inst) {
+ // Compare and branch relative, 16-bit offset.
+ return is_equal(inst, CRJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CGRJ_ZOPC, CMPBRANCH_MASK) ||
+ is_equal(inst, CIJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CGIJ_ZOPC, CMPBRANCH_MASK) ||
+ is_equal(inst, CLRJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGRJ_ZOPC, CMPBRANCH_MASK) ||
+ is_equal(inst, CLIJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGIJ_ZOPC, CMPBRANCH_MASK);
+}
+
+inline bool MacroAssembler::is_branchoncount_pcrelative_short(unsigned long inst) {
+ // Branch relative on count, 16-bit offset.
+ return is_equal(inst, BRCT_ZOPC) || is_equal(inst, BRCTG_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex32_pcrelative_short(unsigned long inst) {
+ // Branch relative on index (32bit), 16-bit offset.
+ return is_equal(inst, BRXH_ZOPC) || is_equal(inst, BRXLE_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex64_pcrelative_short(unsigned long inst) {
+ // Branch relative on index (64bit), 16-bit offset.
+ return is_equal(inst, BRXHG_ZOPC) || is_equal(inst, BRXLG_ZOPC); // off 16, len 16
+}
+
+inline bool MacroAssembler::is_branchonindex_pcrelative_short(unsigned long inst) {
+ return is_branchonindex32_pcrelative_short(inst) ||
+ is_branchonindex64_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative16(unsigned long inst) {
+ return is_branch_pcrelative_short(inst) ||
+ is_compareandbranch_pcrelative_short(inst) ||
+ is_branchoncount_pcrelative_short(inst) ||
+ is_branchonindex_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative32(unsigned long inst) {
+ return is_branch_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_branch_pcrelative(unsigned long inst) {
+ return is_branch_pcrelative16(inst) ||
+ is_branch_pcrelative32(inst);
+}
+
+inline bool MacroAssembler::is_load_pcrelative_long(unsigned long inst) {
+ // Load relative, 32-bit offset.
+ return is_equal(inst, LRL_ZOPC, REL_LONG_MASK) || is_equal(inst, LGRL_ZOPC, REL_LONG_MASK); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_misc_pcrelative_long(unsigned long inst) {
+ // Load address, execute relative, 32-bit offset.
+ return is_equal(inst, LARL_ZOPC, REL_LONG_MASK) || is_equal(inst, EXRL_ZOPC, REL_LONG_MASK); // off 16, len 32
+}
+
+inline bool MacroAssembler::is_pcrelative_short(unsigned long inst) {
+ return is_branch_pcrelative16(inst) ||
+ is_call_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_long(unsigned long inst) {
+ return is_branch_pcrelative32(inst) ||
+ is_call_pcrelative_long(inst) ||
+ is_load_pcrelative_long(inst) ||
+ is_misc_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_load_pcrelative_long(address iLoc) {
+ unsigned long inst;
+ unsigned int len = get_instruction(iLoc, &inst);
+ return (len == 6) && is_load_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_short(address iLoc) {
+ unsigned long inst;
+ unsigned int len = get_instruction(iLoc, &inst);
+ return ((len == 4) || (len == 6)) && is_pcrelative_short(inst);
+}
+
+inline bool MacroAssembler::is_pcrelative_long(address iLoc) {
+ unsigned long inst;
+ unsigned int len = get_instruction(iLoc, &inst);
+ return (len == 6) && is_pcrelative_long(inst);
+}
+
+// Dynamic TOC. Test for any pc-relative instruction.
+inline bool MacroAssembler::is_pcrelative_instruction(address iloc) {
+ unsigned long inst;
+ get_instruction(iloc, &inst);
+ return is_pcrelative_short(inst) ||
+ is_pcrelative_long(inst);
+}
+
+inline bool MacroAssembler::is_load_addr_pcrel(address a) {
+ return is_equal(a, LARL_ZOPC, LARL_MASK);
+}
+
+// Save the return pc in the register that should be stored as the return pc
+// in the current frame (default is R14).
+inline void MacroAssembler::save_return_pc(Register pc) {
+ z_stg(pc, _z_abi16(return_pc), Z_SP);
+}
+
+inline void MacroAssembler::restore_return_pc() {
+ z_lg(Z_R14, _z_abi16(return_pc), Z_SP);
+}
+
+// Call a function with given entry.
+inline address MacroAssembler::call(Register function_entry) {
+ assert(function_entry != Z_R0, "function_entry cannot be Z_R0");
+
+ Assembler::z_basr(Z_R14, function_entry);
+ _last_calls_return_pc = pc();
+
+ return _last_calls_return_pc;
+}
+
+// Call a C function via a function entry.
+inline address MacroAssembler::call_c(Register function_entry) {
+ return call(function_entry);
+}
+
+// Call a stub function via a function descriptor, but don't save TOC before
+// call, don't setup TOC and ENV for call, and don't restore TOC after call
+inline address MacroAssembler::call_stub(Register function_entry) {
+ return call_c(function_entry);
+}
+
+inline address MacroAssembler::call_stub(address function_entry) {
+ return call_c(function_entry);
+}
+
+// Get the pc where the last emitted call will return to.
+inline address MacroAssembler::last_calls_return_pc() {
+ return _last_calls_return_pc;
+}
+
+inline void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) {
+ set_last_Java_frame(last_Java_sp, last_Java_pc, true);
+}
+
+inline void MacroAssembler::set_last_Java_frame_static(Register last_Java_sp, Register last_Java_pc) {
+ set_last_Java_frame(last_Java_sp, last_Java_pc, false);
+}
+
+inline void MacroAssembler::reset_last_Java_frame(void) {
+ reset_last_Java_frame(true);
+}
+
+inline void MacroAssembler::reset_last_Java_frame_static(void) {
+ reset_last_Java_frame(false);
+}
+
+inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) {
+ set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true);
+}
+
+inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1) {
+ set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true);
+}
+
+#endif // CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/codeBuffer.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/metaspaceShared.hpp"
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument. Example:
+//
+// oop obj;
+// int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();.
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#undef __
+#define __ masm->
+
+void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
+ void** vtable,
+ char** md_top,
+ char* md_end,
+ char** mc_top,
+ char* mc_end) {
+
+ intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+ *(intptr_t *)(*md_top) = vtable_bytes;
+ *md_top += sizeof(intptr_t);
+ void** dummy_vtable = (void**)*md_top;
+ *vtable = dummy_vtable;
+ *md_top += vtable_bytes;
+
+ // Get ready to generate dummy methods.
+
+ CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+
+ __ unimplemented();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+
+#ifdef PRODUCT
+#define __ _masm->
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+ return RegisterOrConstant(value);
+}
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg,
+ Register temp_reg, Register temp2_reg) {
+ if (VerifyMethodHandles) {
+ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+ temp_reg, temp2_reg, "MH argument is a Class");
+ }
+ __ z_lg(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+ assert(x != 0, "%s should be nonzero", xname);
+ return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else
+#define NONZERO(x) (x)
+#endif
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+ Register obj_reg, SystemDictionary::WKID klass_id,
+ Register temp_reg, Register temp2_reg,
+ const char* error_message) {
+
+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
+ KlassHandle klass = SystemDictionary::well_known_klass(klass_id);
+
+ assert(temp_reg != Z_R0 && // Is used as base register!
+ temp_reg != noreg && temp2_reg != noreg, "need valid registers!");
+
+ NearLabel L_ok, L_bad;
+
+ BLOCK_COMMENT("verify_klass {");
+
+ __ verify_oop(obj_reg);
+ __ compareU64_and_branch(obj_reg, (intptr_t)0L, Assembler::bcondEqual, L_bad);
+ __ load_klass(temp_reg, obj_reg);
+ // klass_addr is a klass in allstatic SystemDictionaryHandles. Can't get GCed.
+ __ load_const_optimized(temp2_reg, (address)klass_addr);
+ __ z_lg(temp2_reg, Address(temp2_reg));
+ __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok);
+
+ intptr_t super_check_offset = klass->super_check_offset();
+ __ z_lg(temp_reg, Address(temp_reg, super_check_offset));
+ __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok);
+ __ BIND(L_bad);
+ __ stop(error_message);
+ __ BIND(L_ok);
+
+ BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind,
+ Register member_reg, Register temp ) {
+ NearLabel L;
+ BLOCK_COMMENT("verify_ref_kind {");
+
+ __ z_llgf(temp,
+ Address(member_reg,
+ NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+ __ z_srl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+ __ z_nilf(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+ __ compare32_and_branch(temp, constant(ref_kind), Assembler::bcondEqual, L);
+
+ {
+ char *buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+
+ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+ if (ref_kind == JVM_REF_invokeVirtual || ref_kind == JVM_REF_invokeSpecial) {
+ // Could do this for all ref_kinds, but would explode assembly code size.
+ trace_method_handle(_masm, buf);
+ }
+ __ stop(buf);
+ }
+
+ BLOCK_COMMENT("} verify_ref_kind");
+
+ __ bind(L);
+}
+#endif // ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target,
+ Register temp, bool for_compiler_entry) {
+ assert(method == Z_method, "interpreter calling convention");
+ __ verify_method_ptr(method);
+
+ assert(target != method, "don 't you kill the method reg!");
+
+ Label L_no_such_method;
+
+ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+ // JVMTI events, such as single-stepping, are implemented partly
+ // by avoiding running compiled code in threads for which the
+ // event is enabled. Check here for interp_only_mode if these
+ // events CAN be enabled.
+ __ verify_thread();
+
+ Label run_compiled_code;
+
+ __ load_and_test_int(temp, Address(Z_thread, JavaThread::interp_only_mode_offset()));
+ __ z_bre(run_compiled_code);
+
+ // Null method test is replicated below in compiled case,
+ // it might be able to address across the verify_thread().
+ __ z_ltgr(temp, method);
+ __ z_bre(L_no_such_method);
+
+ __ z_lg(target, Address(method, Method::interpreter_entry_offset()));
+ __ z_br(target);
+
+ __ bind(run_compiled_code);
+ }
+
+ // Compiled case, either static or fall-through from runtime conditional.
+ __ z_ltgr(temp, method);
+ __ z_bre(L_no_such_method);
+
+ ByteSize offset = for_compiler_entry ?
+ Method::from_compiled_offset() : Method::from_interpreted_offset();
+ Address method_from(method, offset);
+
+ __ z_lg(target, method_from);
+ __ z_br(target);
+
+ __ bind(L_no_such_method);
+ assert(StubRoutines::throw_AbstractMethodError_entry() != NULL, "not yet generated!");
+ __ load_const_optimized(target, StubRoutines::throw_AbstractMethodError_entry());
+ __ z_br(target);
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register method_temp,
+ Register temp2, Register temp3,
+ bool for_compiler_entry) {
+
+ // This is the initial entry point of a lazy method handle.
+ // After type checking, it picks up the invoker from the LambdaForm.
+ assert_different_registers(recv, method_temp, temp2, temp3);
+ assert(method_temp == Z_method, "required register for loading method");
+
+ BLOCK_COMMENT("jump_to_lambda_form {");
+
+ // Load the invoker, as MH -> MH.form -> LF.vmentry
+ __ verify_oop(recv);
+ __ load_heap_oop(method_temp,
+ Address(recv,
+ NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+ __ verify_oop(method_temp);
+ __ load_heap_oop(method_temp,
+ Address(method_temp,
+ NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+ __ verify_oop(method_temp);
+ // The following assumes that a method is normally compressed in the vmtarget field.
+ __ z_lg(method_temp,
+ Address(method_temp,
+ NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+
+ if (VerifyMethodHandles && !for_compiler_entry) {
+ // Make sure recv is already on stack.
+ NearLabel L;
+ Address paramSize(temp2, ConstMethod::size_of_parameters_offset());
+
+ __ z_lg(temp2, Address(method_temp, Method::const_offset()));
+ __ load_sized_value(temp2, paramSize, sizeof(u2), /*is_signed*/ false);
+ // if (temp2 != recv) stop
+ __ z_lg(temp2, __ argument_address(temp2, temp2, 0));
+ __ compare64_and_branch(temp2, recv, Assembler::bcondEqual, L);
+ __ stop("receiver not on stack");
+ __ BIND(L);
+ }
+
+ jump_from_method_handle(_masm, method_temp, temp2, Z_R0, for_compiler_entry);
+
+ BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+// code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+ vmIntrinsics::ID iid) {
+ const bool not_for_compiler_entry = false; // This is the interpreter entry.
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+
+ if (iid == vmIntrinsics::_invokeGeneric || iid == vmIntrinsics::_compiledLambdaForm) {
+ // Perhaps surprisingly, the symbolic references visible to Java
+ // are not directly used. They are linked to Java-generated
+ // adapters via MethodHandleNatives.linkMethod. They all allow an
+ // appendix argument.
+ __ should_not_reach_here(); // Empty stubs make SG sick.
+ return NULL;
+ }
+
+ // Z_R10: sender SP (must preserve; see prepare_to_jump_from_interprted)
+ // Z_method: method
+ // Z_ARG1 (Gargs): incoming argument list (must preserve)
+ Register Z_R4_param_size = Z_R4; // size of parameters
+ address code_start = __ pc();
+
+ // Here is where control starts out:
+ __ align(CodeEntryAlignment);
+
+ address entry_point = __ pc();
+
+ if (VerifyMethodHandles) {
+ Label L;
+ BLOCK_COMMENT("verify_intrinsic_id {");
+
+ // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit.
+ if (Method::intrinsic_id_size_in_bytes() == 1) {
+ __ z_cli(Address(Z_method, Method::intrinsic_id_offset_in_bytes()), (int)iid);
+ } else {
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id");
+ __ z_lh(Z_R0_scratch, Address(Z_method, Method::intrinsic_id_offset_in_bytes()));
+ __ z_chi(Z_R0_scratch, (int)iid);
+ }
+ __ z_bre(L);
+
+ if (iid == vmIntrinsics::_linkToVirtual || iid == vmIntrinsics::_linkToSpecial) {
+ // Could do this for all kinds, but would explode assembly code size.
+ trace_method_handle(_masm, "bad Method::intrinsic_id");
+ }
+
+ __ stop("bad Method::intrinsic_id");
+ __ bind(L);
+
+ BLOCK_COMMENT("} verify_intrinsic_id");
+ }
+
+ // First task: Find out how big the argument list is.
+ Address Z_R4_first_arg_addr;
+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+
+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic,
+ "must be _invokeBasic or a linkTo intrinsic");
+
+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ Address paramSize(Z_R1_scratch, ConstMethod::size_of_parameters_offset());
+
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ load_sized_value(Z_R4_param_size, paramSize, sizeof(u2), /*is_signed*/ false);
+ Z_R4_first_arg_addr = __ argument_address(Z_R4_param_size, Z_R4_param_size, 0);
+ } else {
+ DEBUG_ONLY(Z_R4_param_size = noreg);
+ }
+
+ Register Z_mh = noreg;
+ if (!is_signature_polymorphic_static(iid)) {
+ Z_mh = Z_ARG4;
+ __ z_lg(Z_mh, Z_R4_first_arg_addr);
+ DEBUG_ONLY(Z_R4_param_size = noreg);
+ }
+
+ // Z_R4_first_arg_addr is live!
+
+ trace_method_handle_interpreter_entry(_masm, iid);
+
+ if (iid == vmIntrinsics::_invokeBasic) {
+ __ pc(); // just for the block comment
+ generate_method_handle_dispatch(_masm, iid, Z_mh, noreg, not_for_compiler_entry);
+ } else {
+ // Adjust argument list by popping the trailing MemberName argument.
+ Register Z_recv = noreg;
+
+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ // Load the receiver (not the MH; the actual MemberName's receiver)
+ // up from the interpreter stack.
+ __ z_lg(Z_recv = Z_R5, Z_R4_first_arg_addr);
+ DEBUG_ONLY(Z_R4_param_size = noreg);
+ }
+
+ Register Z_member = Z_method; // MemberName ptr; incoming method ptr is dead now
+
+ __ z_lg(Z_member, __ argument_address(constant(1)));
+ __ add2reg(Z_esp, Interpreter::stackElementSize);
+ generate_method_handle_dispatch(_masm, iid, Z_recv, Z_member, not_for_compiler_entry);
+ }
+
+ return entry_point;
+}
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+ vmIntrinsics::ID iid,
+ Register receiver_reg,
+ Register member_reg,
+ bool for_compiler_entry) {
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+
+ Register temp1 = for_compiler_entry ? Z_R10 : Z_R6;
+ Register temp2 = Z_R12;
+ Register temp3 = Z_R11;
+ Register temp4 = Z_R13;
+
+ if (for_compiler_entry) {
+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : Z_ARG1),
+ "only valid assignment");
+ }
+ if (receiver_reg != noreg) {
+ assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
+ }
+ if (member_reg != noreg) {
+ assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
+ }
+ if (!for_compiler_entry) { // Don't trash last SP.
+ assert_different_registers(temp1, temp2, temp3, temp4, Z_R10);
+ }
+
+ if (iid == vmIntrinsics::_invokeBasic) {
+ __ pc(); // Just for the block comment.
+ // Indirect through MH.form.vmentry.vmtarget.
+ jump_to_lambda_form(_masm, receiver_reg, Z_method, Z_R1, temp3, for_compiler_entry);
+ return;
+ }
+
+ // The method is a member invoker used by direct method handles.
+ if (VerifyMethodHandles) {
+ // Make sure the trailing argument really is a MemberName (caller responsibility).
+ verify_klass(_masm, member_reg,
+ SystemDictionary::WK_KLASS_ENUM_NAME(MemberName_klass),
+ temp1, temp2,
+ "MemberName required for invokeVirtual etc.");
+ }
+
+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+ Address member_vmtarget(member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+ Register temp1_recv_klass = temp1;
+
+ if (iid != vmIntrinsics::_linkToStatic) {
+ __ verify_oop(receiver_reg);
+ if (iid == vmIntrinsics::_linkToSpecial) {
+ // Don't actually load the klass; just null-check the receiver.
+ __ null_check(receiver_reg);
+ } else {
+ // Load receiver klass itself.
+ __ null_check(receiver_reg, Z_R0, oopDesc::klass_offset_in_bytes());
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+ BLOCK_COMMENT("check_receiver {");
+ // The receiver for the MemberName must be in receiver_reg.
+ // Check the receiver against the MemberName.clazz.
+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+ // Did not load it above...
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+
+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+ NearLabel L_ok;
+ Register temp2_defc = temp2;
+
+ __ load_heap_oop(temp2_defc, member_clazz);
+ load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
+ __ verify_klass_ptr(temp2_defc);
+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
+ // If we get here, the type check failed!
+ __ stop("receiver class disagrees with MemberName.clazz");
+ __ bind(L_ok);
+ }
+ BLOCK_COMMENT("} check_receiver");
+ }
+ if (iid == vmIntrinsics::_linkToSpecial || iid == vmIntrinsics::_linkToStatic) {
+ DEBUG_ONLY(temp1_recv_klass = noreg); // These guys didn't load the recv_klass.
+ }
+
+ // Live registers at this point:
+ // member_reg - MemberName that was the trailing argument.
+ // temp1_recv_klass - Klass of stacked receiver, if needed.
+ // Z_R10 - Interpreter linkage if interpreted.
+
+ bool method_is_live = false;
+
+ switch (iid) {
+ case vmIntrinsics::_linkToSpecial:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+ }
+ __ z_lg(Z_method, member_vmtarget);
+ method_is_live = true;
+ break;
+
+ case vmIntrinsics::_linkToStatic:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+ }
+ __ z_lg(Z_method, member_vmtarget);
+ method_is_live = true;
+ break;
+
+ case vmIntrinsics::_linkToVirtual: {
+ // Same as TemplateTable::invokevirtual, minus the CP setup and profiling.
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+ }
+
+ // Pick out the vtable index from the MemberName, and then we can discard it.
+ Register temp2_index = temp2;
+ __ z_lg(temp2_index, member_vmindex);
+
+ if (VerifyMethodHandles) {
+ // if (member_vmindex < 0) stop
+ NearLabel L_index_ok;
+ __ compare32_and_branch(temp2_index, constant(0), Assembler::bcondNotLow, L_index_ok);
+ __ stop("no virtual index");
+ __ BIND(L_index_ok);
+ }
+
+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+ // at this point. And VerifyMethodHandles has already checked clazz, if needed.
+
+ // Get target method and entry point.
+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, Z_method);
+ method_is_live = true;
+ break;
+ }
+
+ case vmIntrinsics::_linkToInterface: {
+ // Same as TemplateTable::invokeinterface, minus the CP setup
+ // and profiling, with different argument motion.
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+ }
+
+ Register temp3_intf = temp3;
+
+ __ load_heap_oop(temp3_intf, member_clazz);
+ load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
+
+ Register Z_index = Z_method;
+
+ __ z_lg(Z_index, member_vmindex);
+
+ if (VerifyMethodHandles) {
+ NearLabel L;
+ // if (member_vmindex < 0) stop
+ __ compare32_and_branch(Z_index, constant(0), Assembler::bcondNotLow, L);
+ __ stop("invalid vtable index for MH.invokeInterface");
+ __ bind(L);
+ }
+
+ // Given interface, index, and recv klass, dispatch to the implementation method.
+ Label L_no_such_interface;
+ __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+ // Note: next two args must be the same:
+ Z_index, Z_method, temp2, noreg,
+ L_no_such_interface);
+ jump_from_method_handle(_masm, Z_method, temp2, Z_R0, for_compiler_entry);
+
+ __ bind(L_no_such_interface);
+
+ // Throw exception.
+ __ load_const_optimized(Z_R1, StubRoutines::throw_IncompatibleClassChangeError_entry());
+ __ z_br(Z_R1);
+ break;
+ }
+
+ default:
+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
+ break;
+ }
+
+ if (method_is_live) {
+ // Live at this point: Z_method, O5_savedSP (if interpreted).
+
+ // After figuring out which concrete method to call, jump into it.
+ // Note that this works in the interpreter with no data motion.
+ // But the compiled version will require that rcx_recv be shifted out.
+ jump_from_method_handle(_masm, Z_method, temp1, Z_R0, for_compiler_entry);
+ }
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+ oopDesc* mh,
+ intptr_t* sender_sp,
+ intptr_t* args,
+ intptr_t* tracing_fp) {
+ bool has_mh = (strstr(adaptername, "/static") == NULL &&
+ strstr(adaptername, "linkTo") == NULL); // Static linkers don't have MH.
+ const char* mh_reg_name = has_mh ? "Z_R4_mh" : "Z_R4";
+ tty->print_cr("MH %s %s=" INTPTR_FORMAT " sender_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT,
+ adaptername, mh_reg_name,
+ p2i(mh), p2i(sender_sp), p2i(args));
+
+ if (Verbose) {
+ // Dumping last frame with frame::describe.
+
+ JavaThread* p = JavaThread::active();
+
+ ResourceMark rm;
+ PRESERVE_EXCEPTION_MARK; // May not be needed by safer and unexpensive here.
+ FrameValues values;
+
+ // Note: We want to allow trace_method_handle from any call site.
+ // While trace_method_handle creates a frame, it may be entered
+ // without a valid return PC in Z_R14 (e.g. not just after a call).
+ // Walking that frame could lead to failures due to that invalid PC.
+ // => carefully detect that frame when doing the stack walking.
+
+ // Walk up to the right frame using the "tracing_fp" argument.
+ frame cur_frame = os::current_frame(); // Current C frame.
+
+ while (cur_frame.fp() != tracing_fp) {
+ cur_frame = os::get_sender_for_C_frame(&cur_frame);
+ }
+
+ // Safely create a frame and call frame::describe.
+ intptr_t *dump_sp = cur_frame.sender_sp();
+ intptr_t *dump_fp = cur_frame.link();
+
+ bool walkable = has_mh; // Whether the traced frame shoud be walkable.
+
+ // The sender for cur_frame is the caller of trace_method_handle.
+ if (walkable) {
+ // The previous definition of walkable may have to be refined
+ // if new call sites cause the next frame constructor to start
+ // failing. Alternatively, frame constructors could be
+ // modified to support the current or future non walkable
+ // frames (but this is more intrusive and is not considered as
+ // part of this RFE, which will instead use a simpler output).
+ frame dump_frame = frame(dump_sp);
+ dump_frame.describe(values, 1);
+ } else {
+ // Robust dump for frames which cannot be constructed from sp/younger_sp
+ // Add descriptions without building a Java frame to avoid issues.
+ values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+ values.describe(-1, dump_sp, "sp");
+ }
+
+ bool has_args = has_mh; // Whether Z_esp is meaningful.
+
+ // Mark args, if seems valid (may not be valid for some adapters).
+ if (has_args) {
+ if ((args >= dump_sp) && (args < dump_fp)) {
+ values.describe(-1, args, "*Z_esp");
+ }
+ }
+
+ // Note: the unextended_sp may not be correct.
+ tty->print_cr(" stack layout:");
+ values.print(p);
+ if (has_mh && mh->is_oop()) {
+ mh->print();
+ if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) {
+ java_lang_invoke_MethodHandle::form(mh)->print();
+ }
+ }
+ }
+ }
+}
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
+ if (!TraceMethodHandles) { return; }
+
+ BLOCK_COMMENT("trace_method_handle {");
+
+ // Save argument registers (they are used in raise exception stub).
+ __ z_stg(Z_ARG1, Address(Z_SP, 16));
+ __ z_stg(Z_ARG2, Address(Z_SP, 24));
+ __ z_stg(Z_ARG3, Address(Z_SP, 32));
+ __ z_stg(Z_ARG4, Address(Z_SP, 40));
+ __ z_stg(Z_ARG5, Address(Z_SP, 48));
+
+ // Setup arguments.
+ __ z_lgr(Z_ARG2, Z_ARG4); // mh, see generate_method_handle_interpreter_entry()
+ __ z_lgr(Z_ARG3, Z_R10); // sender_sp
+ __ z_lgr(Z_ARG4, Z_esp);
+ __ load_const_optimized(Z_ARG1, (void *)adaptername);
+ __ z_lgr(Z_ARG5, Z_SP); // tracing_fp
+ __ save_return_pc(); // saves Z_R14
+ __ push_frame_abi160(0);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));
+ __ pop_frame();
+ __ restore_return_pc(); // restores to Z_R14
+ __ z_lg(Z_ARG1, Address(Z_SP, 16));
+ __ z_lg(Z_ARG2, Address(Z_SP, 24));
+ __ z_lg(Z_ARG3, Address(Z_SP, 32));
+ __ z_lg(Z_ARG4, Address(Z_SP, 40));
+ __ z_lg(Z_ARG5, Address(Z_SP, 45));
+ __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, 50, -1);
+ __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, -1, 5);
+
+ BLOCK_COMMENT("} trace_method_handle");
+}
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+ // Adapters
+ enum /* platform_dependent_constants */ {
+ adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
+ };
+
+ // Additional helper methods for MethodHandles code generation:
+ public:
+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
+
+ static void verify_klass(MacroAssembler* _masm,
+ Register obj_reg, SystemDictionary::WKID klass_id,
+ Register temp_reg, Register temp2_reg,
+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
+ Register temp_reg, Register temp2_reg) {
+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+ temp_reg, temp2_reg,
+ "reference is a MH");
+ }
+
+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+ // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+ // Takes care of special dispatch from single stepping too.
+ static void jump_from_method_handle(MacroAssembler* _masm, Register method,
+ Register temp, Register temp2,
+ bool for_compiler_entry);
+
+ static void jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register method_temp,
+ Register temp2, Register temp3,
+ bool for_compiler_entry);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by JL, LS
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+#define LUCY_DBG
+
+//-------------------------------------
+// N a t i v e I n s t r u c t i o n
+//-------------------------------------
+
+// Define this switch to prevent identity updates.
+// In high-concurrency scenarios, it is beneficial to prevent
+// identity updates. It has a positive effect on cache line steals.
+// and invalidations.
+// Test runs of JVM98, JVM2008, and JBB2005 show a very low frequency
+// of identity updates. Detection is therefore disabled.
+#undef SUPPRESS_IDENTITY_UPDATE
+
+void NativeInstruction::verify() {
+ // Make sure code pattern is actually an instruction address.
+ // Do not allow:
+ // - NULL
+ // - any address in first page (0x0000 .. 0x0fff)
+ // - odd address (will cause a "specification exception")
+ address addr = addr_at(0);
+ if ((addr == 0) || (((unsigned long)addr & ~0x0fff) == 0) || ((intptr_t)addr & 1) != 0) {
+ tty->print_cr(INTPTR_FORMAT ": bad instruction address", p2i(addr));
+ fatal("not an instruction address");
+ }
+}
+
+// Print location and value (hex representation) of current NativeInstruction
+void NativeInstruction::print(const char* msg) const {
+ int len = Assembler::instr_len(addr_at(0));
+ if (msg == NULL) { // Output line without trailing blanks.
+ switch (len) {
+ case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x", p2i(addr_at(0)), len, halfword_at(0)); break;
+ case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2)); break;
+ case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4)); break;
+ default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy.
+ ShouldNotReachHere();
+ break;
+ }
+ } else { // Output line with filler blanks to have msg aligned.
+ switch (len) {
+ case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), msg); break;
+ case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), msg); break;
+ case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4), msg); break;
+ default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy.
+ ShouldNotReachHere();
+ break;
+ }
+ }
+}
+void NativeInstruction::print() const {
+ print(NULL);
+}
+
+// Hex-Dump of storage around current NativeInstruction. Also try disassembly.
+void NativeInstruction::dump(const unsigned int range, const char* msg) const {
+ Assembler::dump_code_range(tty, addr_at(0), range, (msg == NULL) ? "":msg);
+}
+
+void NativeInstruction::dump(const unsigned int range) const {
+ dump(range, NULL);
+}
+
+void NativeInstruction::dump() const {
+ dump(32, NULL);
+}
+
+void NativeInstruction::set_halfword_at(int offset, short i) {
+ address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+ *(short*)addr = i;
+#else
+ if (*(short*)addr != i) {
+ *(short*)addr = i;
+ }
+#endif
+ ICache::invalidate_word(addr);
+}
+
+void NativeInstruction::set_word_at(int offset, int i) {
+ address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+ *(int*)addr = i;
+#else
+ if (*(int*)addr != i) {
+ *(int*)addr = i;
+ }
+#endif
+ ICache::invalidate_word(addr);
+}
+
+void NativeInstruction::set_jlong_at(int offset, jlong i) {
+ address addr = addr_at(offset);
+#ifndef SUPPRESS_IDENTITY_UPDATE
+ *(jlong*)addr = i;
+#else
+ if (*(jlong*)addr != i) {
+ *(jlong*)addr = i;
+ }
+#endif
+ // Don't need to invalidate 2 words here, because
+ // the flush instruction operates on doublewords.
+ ICache::invalidate_word(addr);
+}
+
+#undef SUPPRESS_IDENTITY_UPDATE
+
+//------------------------------------------------------------
+
+int NativeInstruction::illegal_instruction() {
+ return 0;
+}
+
+bool NativeInstruction::is_illegal() {
+ // An instruction with main opcode 0x00 (leftmost byte) is not a valid instruction
+ // (and will never be) and causes a SIGILL where the pc points to the next instruction.
+ // The caller of this method wants to know if such a situation exists at the current pc.
+ //
+ // The result of this method is unsharp with respect to the following facts:
+ // - Stepping backwards in the instruction stream is not possible on z/Architecture.
+ // - z/Architecture instructions are 2, 4, or 6 bytes in length.
+ // - The instruction length is coded in the leftmost two bits of the main opcode.
+ // - The result is exact if the caller knows by some other means that the
+ // instruction is of length 2.
+ //
+ // If this method returns false, then the 2-byte instruction at *-2 is not a 0x00 opcode.
+ // If this method returns true, then the 2-byte instruction at *-2 is a 0x00 opcode.
+ return halfword_at(-2) == illegal_instruction();
+}
+
+// We use an illtrap for marking a method as not_entrant or zombie.
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+ if (!is_illegal()) return false; // Just a quick path.
+
+ // One-sided error of is_illegal tolerable here
+ // (see implementation of is_illegal() for details).
+
+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr_at(0));
+ if (cb == NULL || !cb->is_nmethod()) {
+ return false;
+ }
+
+ nmethod *nm = (nmethod *)cb;
+ // This method is not_entrant or zombie if the illtrap instruction
+ // is located at the verified entry point.
+ // BE AWARE: the current pc (this) points to the instruction after the
+ // "illtrap" location.
+ address sig_addr = ((address) this) - 2;
+ return nm->verified_entry_point() == sig_addr;
+}
+
+bool NativeInstruction::is_jump() {
+ unsigned long inst;
+ Assembler::get_instruction((address)this, &inst);
+ return MacroAssembler::is_branch_pcrelative_long(inst);
+}
+
+//---------------------------------------------------
+// N a t i v e I l l e g a l I n s t r u c t i o n
+//---------------------------------------------------
+
+void NativeIllegalInstruction::insert(address code_pos) {
+ NativeIllegalInstruction* nii = (NativeIllegalInstruction*) nativeInstruction_at(code_pos);
+ nii->set_halfword_at(0, illegal_instruction());
+}
+
+//-----------------------
+// N a t i v e C a l l
+//-----------------------
+
+void NativeCall::verify() {
+ if (NativeCall::is_call_at(addr_at(0))) return;
+
+ fatal("this is not a `NativeCall' site");
+}
+
+address NativeCall::destination() const {
+ if (MacroAssembler::is_call_far_pcrelative(instruction_address())) {
+ address here = addr_at(MacroAssembler::nop_size());
+ return MacroAssembler::get_target_addr_pcrel(here);
+ }
+
+ return (address)((NativeMovConstReg *)this)->data();
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times. Thus, the displacement field must be
+// 4-byte-aligned. We enforce this on z/Architecture by inserting a nop
+// instruction in front of 'brasl' when needed.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+void NativeCall::set_destination_mt_safe(address dest) {
+ if (MacroAssembler::is_call_far_pcrelative(instruction_address())) {
+ address iaddr = addr_at(MacroAssembler::nop_size());
+ // Ensure that patching is atomic hence mt safe.
+ assert(((long)addr_at(MacroAssembler::call_far_pcrelative_size()) & (call_far_pcrelative_displacement_alignment-1)) == 0,
+ "constant must be 4-byte aligned");
+ set_word_at(MacroAssembler::call_far_pcrelative_size() - 4, Assembler::z_pcrel_off(dest, iaddr));
+ } else {
+ assert(MacroAssembler::is_load_const_from_toc(instruction_address()), "unsupported instruction");
+ nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest));
+ }
+}
+
+//-----------------------------
+// N a t i v e F a r C a l l
+//-----------------------------
+
+void NativeFarCall::verify() {
+ NativeInstruction::verify();
+ if (NativeFarCall::is_far_call_at(addr_at(0))) return;
+ fatal("not a NativeFarCall");
+}
+
+address NativeFarCall::destination() {
+ assert(MacroAssembler::is_call_far_patchable_at((address)this), "unexpected call type");
+ address ctable = NULL;
+ if (MacroAssembler::call_far_patchable_requires_alignment_nop((address)this)) {
+ return MacroAssembler::get_dest_of_call_far_patchable_at(((address)this)+MacroAssembler::nop_size(), ctable);
+ } else {
+ return MacroAssembler::get_dest_of_call_far_patchable_at((address)this, ctable);
+ }
+}
+
+
+// Handles both patterns of patchable far calls.
+void NativeFarCall::set_destination(address dest, int toc_offset) {
+ address inst_addr = (address)this;
+
+ // Set new destination (implementation of call may change here).
+ assert(MacroAssembler::is_call_far_patchable_at(inst_addr), "unexpected call type");
+
+ if (!MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) {
+ address ctable = CodeCache::find_blob(inst_addr)->ctable_begin();
+ // Need distance of TOC entry from current instruction.
+ toc_offset = (ctable + toc_offset) - inst_addr;
+ // Call is via constant table entry.
+ MacroAssembler::set_dest_of_call_far_patchable_at(inst_addr, dest, toc_offset);
+ } else {
+ // Here, we have a pc-relative call (brasl).
+ // Be aware: dest may have moved in this case, so really patch the displacement,
+ // when necessary!
+ // This while loop will also consume the nop which always preceeds a call_far_pcrelative.
+ // We need to revert this after the loop. Pc-relative calls are always assumed to have a leading nop.
+ unsigned int nop_sz = MacroAssembler::nop_size();
+ unsigned int nop_bytes = 0;
+ while(MacroAssembler::is_z_nop(inst_addr+nop_bytes)) {
+ nop_bytes += nop_sz;
+ }
+ if (nop_bytes > 0) {
+ inst_addr += nop_bytes - nop_sz;
+ }
+
+ assert(MacroAssembler::is_call_far_pcrelative(inst_addr), "not a pc-relative call");
+ address target = MacroAssembler::get_target_addr_pcrel(inst_addr + nop_sz);
+ if (target != dest) {
+ NativeCall *call = nativeCall_at(inst_addr);
+ call->set_destination_mt_safe(dest);
+ }
+ }
+}
+
+//-------------------------------------
+// N a t i v e M o v C o n s t R e g
+//-------------------------------------
+
+// Do not use an assertion here. Let clients decide whether they only
+// want this when assertions are enabled.
+void NativeMovConstReg::verify() {
+ address loc = addr_at(0);
+
+ // This while loop will also consume the nop which always preceeds a
+ // call_far_pcrelative. We need to revert this after the
+ // loop. Pc-relative calls are always assumed to have a leading nop.
+ unsigned int nop_sz = MacroAssembler::nop_size();
+ unsigned int nop_bytes = 0;
+ while(MacroAssembler::is_z_nop(loc+nop_bytes)) {
+ nop_bytes += nop_sz;
+ }
+
+ if (nop_bytes > 0) {
+ if (MacroAssembler::is_call_far_pcrelative(loc+nop_bytes-nop_sz)) return;
+ loc += nop_bytes;
+ }
+
+ if (!MacroAssembler::is_load_const_from_toc(loc) && // Load const from TOC.
+ !MacroAssembler::is_load_const(loc) && // Load const inline.
+ !MacroAssembler::is_load_narrow_oop(loc) && // Load narrow oop.
+ !MacroAssembler::is_load_narrow_klass(loc) && // Load narrow Klass ptr.
+ !MacroAssembler::is_compare_immediate_narrow_oop(loc) && // Compare immediate narrow.
+ !MacroAssembler::is_compare_immediate_narrow_klass(loc) && // Compare immediate narrow.
+ !MacroAssembler::is_pcrelative_instruction(loc)) { // Just to make it run.
+ tty->cr();
+ tty->print_cr("NativeMovConstReg::verify(): verifying addr %p(0x%x), %d leading nops", loc, *(uint*)loc, nop_bytes/nop_sz);
+ tty->cr();
+ ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::verify()");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#endif
+ fatal("this is not a `NativeMovConstReg' site");
+ }
+}
+
+address NativeMovConstReg::next_instruction_address(int offset) const {
+ address inst_addr = addr_at(offset);
+
+ // Load address (which is a constant) pc-relative.
+ if (MacroAssembler::is_load_addr_pcrel(inst_addr)) { return addr_at(offset+MacroAssembler::load_addr_pcrel_size()); }
+
+ // Load constant from TOC.
+ if (MacroAssembler::is_load_const_from_toc(inst_addr)) { return addr_at(offset+MacroAssembler::load_const_from_toc_size()); }
+
+ // Load constant inline.
+ if (MacroAssembler::is_load_const(inst_addr)) { return addr_at(offset+MacroAssembler::load_const_size()); }
+
+ // Load constant narrow inline.
+ if (MacroAssembler::is_load_narrow_oop(inst_addr)) { return addr_at(offset+MacroAssembler::load_narrow_oop_size()); }
+ if (MacroAssembler::is_load_narrow_klass(inst_addr)) { return addr_at(offset+MacroAssembler::load_narrow_klass_size()); }
+
+ // Compare constant narrow inline.
+ if (MacroAssembler::is_compare_immediate_narrow_oop(inst_addr)) { return addr_at(offset+MacroAssembler::compare_immediate_narrow_oop_size()); }
+ if (MacroAssembler::is_compare_immediate_narrow_klass(inst_addr)) { return addr_at(offset+MacroAssembler::compare_immediate_narrow_klass_size()); }
+
+ if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) { return addr_at(offset+MacroAssembler::call_far_patchable_size()); }
+
+ if (MacroAssembler::is_pcrelative_instruction(inst_addr)) { return addr_at(offset+Assembler::instr_len(inst_addr)); }
+
+ ((NativeMovConstReg*)inst_addr)->dump(64, "NativeMovConstReg site is not recognized as such");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ guarantee(false, "Not a NativeMovConstReg site");
+#endif
+ return NULL;
+}
+
+intptr_t NativeMovConstReg::data() const {
+ address loc = addr_at(0);
+ if (MacroAssembler::is_load_const(loc)) {
+ return MacroAssembler::get_const(loc);
+ } else if (MacroAssembler::is_load_narrow_oop(loc) ||
+ MacroAssembler::is_compare_immediate_narrow_oop(loc) ||
+ MacroAssembler::is_load_narrow_klass(loc) ||
+ MacroAssembler::is_compare_immediate_narrow_klass(loc)) {
+ ((NativeMovConstReg*)loc)->dump(32, "NativeMovConstReg::data(): cannot extract data from narrow ptr (oop or klass)");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ ShouldNotReachHere();
+#endif
+ return *(intptr_t *)NULL;
+ } else {
+ // Otherwise, assume data resides in TOC. Is asserted in called method.
+ return MacroAssembler::get_const_from_toc(loc);
+ }
+}
+
+
+// Patch in a new constant.
+//
+// There are situations where we have multiple (hopefully two at most)
+// relocations connected to one instruction. Loading an oop from CP
+// using pcrelative addressing would one such example. Here we have an
+// oop relocation, modifying the oop itself, and an internal word relocation,
+// modifying the relative address.
+//
+// NativeMovConstReg::set_data is then called once for each relocation. To be
+// able to distinguish between the relocations, we use a rather dirty hack:
+//
+// All calls that deal with an internal word relocation to fix their relative
+// address are on a faked, odd instruction address. The instruction can be
+// found on the next lower, even address.
+//
+// All other calls are "normal", i.e. on even addresses.
+address NativeMovConstReg::set_data_plain(intptr_t src, CodeBlob *cb) {
+ unsigned long x = (unsigned long)src;
+ address loc = instruction_address();
+ address next_address;
+
+ if (MacroAssembler::is_load_addr_pcrel(loc)) {
+ MacroAssembler::patch_target_addr_pcrel(loc, (address)src);
+ ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size());
+ next_address = next_instruction_address();
+ } else if (MacroAssembler::is_load_const_from_toc(loc)) { // Load constant from TOC.
+ MacroAssembler::set_const_in_toc(loc, src, cb);
+ next_address = next_instruction_address();
+ } else if (MacroAssembler::is_load_const(loc)) {
+ // Not mt safe, ok in methods like CodeBuffer::copy_code().
+ MacroAssembler::patch_const(loc, x);
+ ICache::invalidate_range(loc, MacroAssembler::load_const_size());
+ next_address = next_instruction_address();
+ }
+ // cOops
+ else if (MacroAssembler::is_load_narrow_oop(loc)) {
+ MacroAssembler::patch_load_narrow_oop(loc, (oop) (void*) x);
+ ICache::invalidate_range(loc, MacroAssembler::load_narrow_oop_size());
+ next_address = next_instruction_address();
+ }
+ // compressed klass ptrs
+ else if (MacroAssembler::is_load_narrow_klass(loc)) {
+ MacroAssembler::patch_load_narrow_klass(loc, (Klass*)x);
+ ICache::invalidate_range(loc, MacroAssembler::load_narrow_klass_size());
+ next_address = next_instruction_address();
+ }
+ // cOops
+ else if (MacroAssembler::is_compare_immediate_narrow_oop(loc)) {
+ MacroAssembler::patch_compare_immediate_narrow_oop(loc, (oop) (void*) x);
+ ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_oop_size());
+ next_address = next_instruction_address();
+ }
+ // compressed klass ptrs
+ else if (MacroAssembler::is_compare_immediate_narrow_klass(loc)) {
+ MacroAssembler::patch_compare_immediate_narrow_klass(loc, (Klass*)x);
+ ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_klass_size());
+ next_address = next_instruction_address();
+ }
+ else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) {
+ assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?");
+ // This NativeMovConstReg site does not need to be patched. It was
+ // patched when it was converted to a call_pcrelative site
+ // before. The value of the src argument is not related to the
+ // branch target.
+ next_address = next_instruction_address();
+ }
+
+ else {
+ tty->print_cr("WARNING: detected an unrecognized code pattern at loc = %p -> 0x%8.8x %8.8x",
+ loc, *((unsigned int*)loc), *((unsigned int*)(loc+4)));
+ next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#endif
+ }
+
+ return next_address;
+}
+
+// Divided up in set_data_plain() which patches the instruction in the
+// code stream and set_data() which additionally patches the oop pool
+// if necessary.
+void NativeMovConstReg::set_data(intptr_t src) {
+ // Also store the value into an oop_Relocation cell, if any.
+ CodeBlob *cb = CodeCache::find_blob(instruction_address());
+ address next_address = set_data_plain(src, cb);
+
+ relocInfo::update_oop_pool(instruction_address(), next_address, (address)src, cb);
+}
+
+void NativeMovConstReg::set_narrow_oop(intptr_t data) {
+ const address start = addr_at(0);
+ int range = 0;
+ if (MacroAssembler::is_load_narrow_oop(start)) {
+ range = MacroAssembler::patch_load_narrow_oop(start, cast_to_oop <intptr_t> (data));
+ } else if (MacroAssembler::is_compare_immediate_narrow_oop(start)) {
+ range = MacroAssembler::patch_compare_immediate_narrow_oop(start, cast_to_oop <intptr_t>(data));
+ } else {
+ fatal("this is not a `NativeMovConstReg::narrow_oop' site");
+ }
+ ICache::invalidate_range(start, range);
+}
+
+// Compressed klass ptrs. patch narrow klass constant.
+void NativeMovConstReg::set_narrow_klass(intptr_t data) {
+ const address start = addr_at(0);
+ int range = 0;
+ if (MacroAssembler::is_load_narrow_klass(start)) {
+ range = MacroAssembler::patch_load_narrow_klass(start, (Klass*)data);
+ } else if (MacroAssembler::is_compare_immediate_narrow_klass(start)) {
+ range = MacroAssembler::patch_compare_immediate_narrow_klass(start, (Klass*)data);
+ } else {
+ fatal("this is not a `NativeMovConstReg::narrow_klass' site");
+ }
+ ICache::invalidate_range(start, range);
+}
+
+void NativeMovConstReg::set_pcrel_addr(intptr_t newTarget, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) {
+ address next_address;
+ address loc = addr_at(0);
+
+ if (MacroAssembler::is_load_addr_pcrel(loc)) {
+ address oldTarget = MacroAssembler::get_target_addr_pcrel(loc);
+ MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget);
+
+ ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size());
+ next_address = loc + MacroAssembler::load_addr_pcrel_size();
+ } else if (MacroAssembler::is_load_const_from_toc_pcrelative(loc) ) { // Load constant from TOC.
+ address oldTarget = MacroAssembler::get_target_addr_pcrel(loc);
+ MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget);
+
+ ICache::invalidate_range(loc, MacroAssembler::load_const_from_toc_size());
+ next_address = loc + MacroAssembler::load_const_from_toc_size();
+ } else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) {
+ assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?");
+ next_address = next_instruction_address();
+ } else {
+ assert(false, "Not a NativeMovConstReg site for set_pcrel_addr");
+ next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+ }
+
+ if (copy_back_to_oop_pool) {
+ if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newTarget, NULL)) {
+ ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_addr(): found oop reloc for pcrel_addr");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ assert(false, "Ooooops: found oop reloc for pcrel_addr");
+#endif
+ }
+ }
+}
+
+void NativeMovConstReg::set_pcrel_data(intptr_t newData, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) {
+ address next_address;
+ address loc = addr_at(0);
+
+ if (MacroAssembler::is_load_const_from_toc(loc) ) { // Load constant from TOC.
+ // Offset is +/- 2**32 -> use long.
+ long offset = MacroAssembler::get_load_const_from_toc_offset(loc);
+ address target = MacroAssembler::get_target_addr_pcrel(loc);
+ intptr_t oldData = *(intptr_t*)target;
+ if (oldData != newData) { // Update only if data changes. Prevents cache invalidation.
+ *(intptr_t *)(target) = newData;
+ }
+
+ // ICache::invalidate_range(target, sizeof(unsigned long)); // No ICache invalidate for CP data.
+ next_address = loc + MacroAssembler::load_const_from_toc_size();
+ } else if (MacroAssembler::is_call_far_pcrelative(loc)) {
+ ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::set_pcrel_data() has a problem: setting data for a pc-relative call?");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ assert(false, "Ooooops: setting data for a pc-relative call");
+#endif
+ next_address = next_instruction_address();
+ } else {
+ assert(false, "Not a NativeMovConstReg site for set_pcrel_data");
+ next_address = next_instruction_address(); // Failure should be handled in next_instruction_address().
+ }
+
+ if (copy_back_to_oop_pool) {
+ if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newData, NULL)) {
+ ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_data(): found oop reloc for pcrel_data");
+#ifdef LUCY_DBG
+ VM_Version::z_SIGSEGV();
+#else
+ assert(false, "Ooooops: found oop reloc for pcrel_data");
+#endif
+ }
+ }
+}
+
+#ifdef COMPILER1
+//--------------------------------
+// N a t i v e M o v R e g M e m
+//--------------------------------
+
+void NativeMovRegMem::verify() {
+ address l1 = addr_at(0);
+ address l2 = addr_at(MacroAssembler::load_const_size());
+
+ if (!MacroAssembler::is_load_const(l1)) {
+ tty->cr();
+ tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT, p2i(l1));
+ tty->cr();
+ ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()");
+ fatal("this is not a `NativeMovRegMem' site");
+ }
+
+ unsigned long inst1;
+ Assembler::get_instruction(l2, &inst1);
+
+ if (!Assembler::is_z_lb(inst1) &&
+ !Assembler::is_z_llgh(inst1) &&
+ !Assembler::is_z_lh(inst1) &&
+ !Assembler::is_z_l(inst1) &&
+ !Assembler::is_z_llgf(inst1) &&
+ !Assembler::is_z_lg(inst1) &&
+ !Assembler::is_z_le(inst1) &&
+ !Assembler::is_z_ld(inst1) &&
+ !Assembler::is_z_stc(inst1) &&
+ !Assembler::is_z_sth(inst1) &&
+ !Assembler::is_z_st(inst1) &&
+ !(Assembler::is_z_lgr(inst1) && UseCompressedOops) &&
+ !Assembler::is_z_stg(inst1) &&
+ !Assembler::is_z_ste(inst1) &&
+ !Assembler::is_z_std(inst1)) {
+ tty->cr();
+ tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT
+ ": wrong or missing load or store at " PTR_FORMAT, p2i(l1), p2i(l2));
+ tty->cr();
+ ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()");
+ fatal("this is not a `NativeMovRegMem' site");
+ }
+}
+#endif // COMPILER1
+
+//-----------------------
+// N a t i v e J u m p
+//-----------------------
+
+void NativeJump::verify() {
+ if (NativeJump::is_jump_at(addr_at(0))) return;
+ fatal("this is not a `NativeJump' site");
+}
+
+// Patch atomically with an illtrap.
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+ ResourceMark rm;
+ int code_size = 2;
+ CodeBuffer cb(verified_entry, code_size + 1);
+ MacroAssembler* a = new MacroAssembler(&cb);
+#ifdef COMPILER2
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+#endif
+ a->z_illtrap();
+ ICache::invalidate_range(verified_entry, code_size);
+}
+
+#undef LUCY_DBG
+
+//-------------------------------------
+// N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+#ifndef PRODUCT
+void NativeGeneralJump::verify() {
+ unsigned long inst;
+ Assembler::get_instruction((address)this, &inst);
+ assert(MacroAssembler::is_branch_pcrelative_long(inst), "not a general jump instruction");
+}
+#endif
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+ uint64_t instr = BRCL_ZOPC |
+ Assembler::uimm4(Assembler::bcondAlways, 8, 48) |
+ Assembler::simm32(RelAddr::pcrel_off32(entry, code_pos), 16, 48);
+ *(uint64_t*) code_pos = (instr << 16); // Must shift into big end, then the brcl will be written to code_pos.
+ ICache::invalidate_range(code_pos, instruction_size);
+}
+
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+ assert(((intptr_t)instr_addr & (BytesPerWord-1)) == 0, "requirement for mt safe patching");
+ // Bytes_after_jump cannot change, because we own the Patching_lock.
+ assert(Patching_lock->owned_by_self(), "must hold lock to patch instruction");
+ intptr_t bytes_after_jump = (*(intptr_t*)instr_addr) & 0x000000000000ffffL; // 2 bytes after jump.
+ intptr_t load_const_bytes = (*(intptr_t*)code_buffer) & 0xffffffffffff0000L;
+ *(intptr_t*)instr_addr = load_const_bytes | bytes_after_jump;
+ ICache::invalidate_range(instr_addr, 6);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,673 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Major contributions by AHa, JL, LS
+
+#ifndef CPU_S390_VM_NATIVEINST_S390_HPP
+#define CPU_S390_VM_NATIVEINST_S390_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+class NativeCall;
+class NativeFarCall;
+class NativeMovConstReg;
+class NativeJump;
+#ifndef COMPILER2
+class NativeGeneralJump;
+class NativeMovRegMem;
+#endif
+class NativeInstruction;
+
+NativeCall* nativeCall_before(address return_address);
+NativeCall* nativeCall_at(address instr);
+NativeFarCall* nativeFarCall_before(address return_address);
+NativeFarCall* nativeFarCall_at(address instr);
+NativeMovConstReg* nativeMovConstReg_at(address address);
+NativeMovConstReg* nativeMovConstReg_before(address address);
+NativeJump* nativeJump_at(address address);
+#ifndef COMPILER2
+NativeMovRegMem* nativeMovRegMem_at (address address);
+NativeGeneralJump* nativeGeneralJump_at(address address);
+#endif
+NativeInstruction* nativeInstruction_at(address address);
+
+// We have interface for the following instructions:
+// - NativeInstruction
+// - NativeCall
+// - NativeFarCall
+// - NativeMovConstReg
+// - NativeMovRegMem
+// - NativeJump
+// - NativeGeneralJump
+// - NativeIllegalInstruction
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+//-------------------------------------
+// N a t i v e I n s t r u c t i o n
+//-------------------------------------
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+ friend class Relocation;
+
+ public:
+
+ enum z_specific_constants {
+ nop_instruction_size = 2
+ };
+
+ bool is_illegal();
+
+ // Bcrl is currently the only accepted instruction here.
+ bool is_jump();
+
+ // We use an illtrap for marking a method as not_entrant or zombie.
+ bool is_sigill_zombie_not_entrant();
+
+ bool is_safepoint_poll() {
+ // Is the current instruction a POTENTIAL read access to the polling page?
+ // The instruction's current arguments are not checked!
+ return MacroAssembler::is_load_from_polling_page(addr_at(0));
+ }
+
+ address get_poll_address(void *ucontext) {
+ // Extract poll address from instruction and ucontext.
+ return MacroAssembler::get_poll_address(addr_at(0), ucontext);
+ }
+
+ uint get_poll_register() {
+ // Extract poll register from instruction.
+ return MacroAssembler::get_poll_register(addr_at(0));
+ }
+
+ bool is_memory_serialization(JavaThread *thread, void *ucontext) {
+ // Is the current instruction a write access of thread to the
+ // memory serialization page?
+ return MacroAssembler::is_memory_serialization(long_at(0), thread, ucontext);
+ }
+
+ public:
+
+ // The output of __ breakpoint_trap().
+ static int illegal_instruction();
+
+ // The address of the currently processed instruction.
+ address instruction_address() const { return addr_at(0); }
+
+ protected:
+ address addr_at(int offset) const { return address(this) + offset; }
+
+ // z/Architecture terminology
+ // halfword = 2 bytes
+ // word = 4 bytes
+ // doubleword = 8 bytes
+ unsigned short halfword_at(int offset) const { return *(unsigned short*)addr_at(offset); }
+ int word_at(int offset) const { return *(jint*)addr_at(offset); }
+ long long_at(int offset) const { return *(jlong*)addr_at(offset); }
+ void set_halfword_at(int offset, short i); // Deals with I-cache.
+ void set_word_at(int offset, int i); // Deals with I-cache.
+ void set_jlong_at(int offset, jlong i); // Deals with I-cache.
+ void set_addr_at(int offset, address x); // Deals with I-cache.
+
+ void print() const;
+ void print(const char* msg) const;
+ void dump() const;
+ void dump(const unsigned int range) const;
+ void dump(const unsigned int range, const char* msg) const;
+
+ public:
+
+ void verify();
+
+ // unit test stuff
+ static void test() {} // Override for testing.
+
+ friend NativeInstruction* nativeInstruction_at(address address) {
+ NativeInstruction* inst = (NativeInstruction*)address;
+ #ifdef ASSERT
+ inst->verify();
+ #endif
+ return inst;
+ }
+};
+
+//---------------------------------------------------
+// N a t i v e I l l e g a l I n s t r u c t i o n
+//---------------------------------------------------
+
+class NativeIllegalInstruction: public NativeInstruction {
+ public:
+ enum z_specific_constants {
+ instruction_size = 2
+ };
+
+ // Insert illegal opcode at specific address.
+ static void insert(address code_pos);
+};
+
+//-----------------------
+// N a t i v e C a l l
+//-----------------------
+
+// The NativeCall is an abstraction for accessing/manipulating call
+// instructions. It is used to manipulate inline caches, primitive &
+// dll calls, etc.
+
+// A native call, as defined by this abstraction layer, consists of
+// all instructions required to set up for and actually make the call.
+//
+// On z/Architecture, there exist three different forms of native calls:
+// 1) Call with pc-relative address, 1 instruction
+// The location of the target function is encoded as relative address
+// in the call instruction. The short form (BRAS) allows for a
+// 16-bit signed relative address (in 2-byte units). The long form
+// (BRASL) allows for a 32-bit signed relative address (in 2-byte units).
+// 2) Call with immediate address, 3 or 5 instructions.
+// The location of the target function is given by an immediate
+// constant which is loaded into a (scratch) register. Depending on
+// the hardware capabilities, this takes 2 or 4 instructions.
+// The call itself is then a "call by register"(BASR) instruction.
+// 3) Call with address from constant pool, 2(3) instructions (with dynamic TOC)
+// The location of the target function is stored in the constant pool
+// during compilation. From there it is loaded into a (scratch) register.
+// The call itself is then a "call by register"(BASR) instruction.
+//
+// When initially generating a call, the compiler uses form 2) (not
+// patchable, target address constant, e.g. runtime calls) or 3) (patchable,
+// target address might eventually get relocated). Later in the process,
+// a call could be transformed into form 1) (also patchable) during ShortenBranches.
+//
+// If a call is/has to be patchable, the instruction sequence generated for it
+// has to be constant in length. Excessive space, created e.g. by ShortenBranches,
+// is allocated to lower addresses and filled with nops. That is necessary to
+// keep the return address constant, no matter what form the call has.
+// Methods dealing with such calls have "patchable" as part of their name.
+
+class NativeCall: public NativeInstruction {
+ public:
+
+ static int get_IC_pos_in_java_to_interp_stub() {
+ return 0;
+ }
+
+ enum z_specific_constants {
+ instruction_size = 18, // Used in shared code for calls with reloc_info:
+ // value correct if !has_long_displacement_fast().
+ call_far_pcrelative_displacement_offset = 4, // Includes 2 bytes for the nop.
+ call_far_pcrelative_displacement_alignment = 4
+ };
+
+
+ // Maximum size (in bytes) of a call to an absolute address.
+ // Used when emitting call to deopt handler blob, which is a
+ // "load_const_call". The code pattern is:
+ // tmpReg := load_const(address); (* depends on CPU ArchLvl, but is otherwise constant *)
+ // call(tmpReg); (* basr, 2 bytes *)
+ static unsigned int max_instruction_size() {
+ return MacroAssembler::load_const_size() + MacroAssembler::call_byregister_size();
+ }
+
+ // address instruction_address() const { return addr_at(0); }
+
+ // For the ordering of the checks see note at nativeCall_before.
+ address next_instruction_address() const {
+ address iaddr = instruction_address();
+
+ if (MacroAssembler::is_load_const_call(iaddr)) {
+ // Form 2): load_const, BASR
+ return addr_at(MacroAssembler::load_const_call_size());
+ }
+
+ if (MacroAssembler::is_load_const_from_toc_call(iaddr)) {
+ // Form 3): load_const_from_toc (LARL+LG/LGRL), BASR.
+ return addr_at(MacroAssembler::load_const_from_toc_call_size());
+ }
+
+ if (MacroAssembler::is_call_far_pcrelative(iaddr)) {
+ // Form 1): NOP, BRASL
+ // The BRASL (Branch Relative And Save Long) is patched into the space created
+ // by the load_const_from_toc_call sequence (typically (LARL-LG)/LGRL - BASR.
+ // The BRASL must be positioned such that it's end is FW (4-byte) aligned (for atomic patching).
+ // It is achieved by aligning the end of the entire sequence on a 4byte boundary, by inserting
+ // a nop, if required, at the very beginning of the instruction sequence. The nop needs to
+ // be accounted for when calculating the next instruction address. The alignment takes place
+ // already when generating the original instruction sequence. The alignment requirement
+ // makes the size depend on location.
+ // The return address of the call must always be at the end of the instruction sequence.
+ // Inserting the extra alignment nop (or anything else) at the end is not an option.
+ // The patched-in brasl instruction is prepended with a nop to make it easier to
+ // distinguish from a load_const_from_toc_call sequence.
+ return addr_at(MacroAssembler::call_far_pcrelative_size());
+ }
+
+ ((NativeCall*)iaddr)->print();
+ guarantee(false, "Not a NativeCall site");
+ return NULL;
+ }
+
+ address return_address() const {
+ return next_instruction_address();
+ }
+
+ address destination() const;
+
+ void set_destination_mt_safe(address dest);
+
+ void verify_alignment() {} // Yet another real do nothing guy :)
+ void verify();
+
+ // unit test stuff
+ static void test();
+
+ // Creation.
+ friend NativeCall* nativeCall_at(address instr) {
+ NativeCall* call;
+
+ // Make sure not to return garbage.
+ if (NativeCall::is_call_at(instr)) {
+ call = (NativeCall*)instr;
+ } else {
+ call = (NativeCall*)instr;
+ call->print();
+ guarantee(false, "Not a NativeCall site");
+ }
+
+#ifdef ASSERT
+ call->verify();
+#endif
+ return call;
+ }
+
+ // This is a very tricky function to implement. It involves stepping
+ // backwards in the instruction stream. On architectures with variable
+ // instruction length, this is a risky endeavor. From the return address,
+ // you do not know how far to step back to be at a location (your starting
+ // point) that will eventually bring you back to the return address.
+ // Furthermore, it may happen that there are multiple starting points.
+ //
+ // With only a few possible (allowed) code patterns, the risk is lower but
+ // does not diminish completely. Experience shows that there are code patterns
+ // which look like a load_const_from_toc_call @(return address-8), but in
+ // fact are a call_far_pcrelative @(return address-6). The other way around
+ // is possible as well, but was not knowingly observed so far.
+ //
+ // The unpredictability is caused by the pc-relative address field in both
+ // the call_far_pcrelative (BASR) and the load_const_from_toc (LGRL)
+ // instructions. This field can contain an arbitrary bit pattern.
+ //
+ // Here is a real-world example:
+ // Mnemonics: <not a valid sequence> LGRL r10,<addr> BASR r14,r10
+ // Hex code: eb01 9008 007a c498 ffff c4a8 c0e5 ffc1 0dea
+ // Mnemonics: AGSI <mem>,I8 LGRL r9,<addr> BRASL r14,<addr> correct
+ //
+ // If you first check for a load_const_from_toc_call @(-8), you will find
+ // a false positive. In this example, it is obviously false, because the
+ // preceding bytes do not form a valid instruction pattern. If you first
+ // check for call_far_pcrelative @(-6), you get a true positive - in this
+ // case.
+ //
+ // The following remedy has been implemented/enforced:
+ // 1) Everywhere, the permissible code patterns are checked in the same
+ // sequence: Form 2) - Form 3) - Form 1).
+ // 2) The call_far_pcrelative, which would ideally be just one BRASL
+ // instruction, is always prepended with a NOP. This measure avoids
+ // ambiguities with load_const_from_toc_call.
+ friend NativeCall* nativeCall_before(address return_address) {
+ NativeCall *call = NULL;
+
+ // Make sure not to return garbage
+ address instp = return_address - MacroAssembler::load_const_call_size();
+ if (MacroAssembler::is_load_const_call(instp)) { // Form 2)
+ call = (NativeCall*)(instp); // load_const + basr
+ } else {
+ instp = return_address - MacroAssembler::load_const_from_toc_call_size();
+ if (MacroAssembler::is_load_const_from_toc_call(instp)) { // Form 3)
+ call = (NativeCall*)(instp); // load_const_from_toc + basr
+ } else {
+ instp = return_address - MacroAssembler::call_far_pcrelative_size();
+ if (MacroAssembler::is_call_far_pcrelative(instp)) { // Form 1)
+ call = (NativeCall*)(instp); // brasl (or nop + brasl)
+ } else {
+ call = (NativeCall*)(instp);
+ call->print();
+ guarantee(false, "Not a NativeCall site");
+ }
+ }
+ }
+
+#ifdef ASSERT
+ call->verify();
+#endif
+ return call;
+ }
+
+ // Ordering of checks 2) 3) 1) is relevant!
+ static bool is_call_at(address a) {
+ // Check plain instruction sequence. Do not care about filler or alignment nops.
+ bool b = MacroAssembler::is_load_const_call(a) || // load_const + basr
+ MacroAssembler::is_load_const_from_toc_call(a) || // load_const_from_toc + basr
+ MacroAssembler::is_call_far_pcrelative(a); // nop + brasl
+ return b;
+ }
+
+ // Ordering of checks 2) 3) 1) is relevant!
+ static bool is_call_before(address a) {
+ // check plain instruction sequence. Do not care about filler or alignment nops.
+ bool b = MacroAssembler::is_load_const_call( a - MacroAssembler::load_const_call_size()) || // load_const + basr
+ MacroAssembler::is_load_const_from_toc_call(a - MacroAssembler::load_const_from_toc_call_size()) || // load_const_from_toc + basr
+ MacroAssembler::is_call_far_pcrelative( a - MacroAssembler::call_far_pcrelative_size()); // nop+brasl
+ return b;
+ }
+
+ static bool is_call_to(address instr, address target) {
+ // Check whether there is a `NativeCall' at the address `instr'
+ // calling to the address `target'.
+ return is_call_at(instr) && target == ((NativeCall *)instr)->destination();
+ }
+
+ bool is_pcrelative() {
+ return MacroAssembler::is_call_far_pcrelative((address)this);
+ }
+};
+
+//-----------------------------
+// N a t i v e F a r C a l l
+//-----------------------------
+
+// The NativeFarCall is an abstraction for accessing/manipulating native
+// call-anywhere instructions.
+// Used to call native methods which may be loaded anywhere in the address
+// space, possibly out of reach of a call instruction.
+
+// Refer to NativeCall for a description of the supported call forms.
+
+class NativeFarCall: public NativeInstruction {
+
+ public:
+ // We use MacroAssembler::call_far_patchable() for implementing a
+ // call-anywhere instruction.
+
+ static int instruction_size() { return MacroAssembler::call_far_patchable_size(); }
+ static int return_address_offset() { return MacroAssembler::call_far_patchable_ret_addr_offset(); }
+
+ // address instruction_address() const { return addr_at(0); }
+
+ address next_instruction_address() const {
+ return addr_at(instruction_size());
+ }
+
+ address return_address() const {
+ return addr_at(return_address_offset());
+ }
+
+ // Returns the NativeFarCall's destination.
+ address destination();
+
+ // Sets the NativeCall's destination, not necessarily mt-safe.
+ // Used when relocating code.
+ void set_destination(address dest, int toc_offset);
+
+ // Checks whether instr points at a NativeFarCall instruction.
+ static bool is_far_call_at(address instr) {
+ // Use compound inspection function which, in addition to instruction sequence,
+ // also checks for expected nops and for instruction alignment.
+ return MacroAssembler::is_call_far_patchable_at(instr);
+ }
+
+ // Does the NativeFarCall implementation use a pc-relative encoding
+ // of the call destination?
+ // Used when relocating code.
+ bool is_pcrelative() {
+ address iaddr = (address)this;
+ assert(is_far_call_at(iaddr), "unexpected call type");
+ return MacroAssembler::is_call_far_patchable_pcrelative_at(iaddr);
+ }
+
+ void verify();
+
+ // Unit tests
+ static void test();
+
+ // Instantiates a NativeFarCall object starting at the given instruction
+ // address and returns the NativeFarCall object.
+ inline friend NativeFarCall* nativeFarCall_at(address instr) {
+ NativeFarCall* call = (NativeFarCall*)instr;
+#ifdef ASSERT
+ call->verify();
+#endif
+ return call;
+ }
+};
+
+
+//-------------------------------------
+// N a t i v e M o v C o n s t R e g
+//-------------------------------------
+
+// An interface for accessing/manipulating native set_oop imm, reg instructions.
+// (Used to manipulate inlined data references, etc.)
+
+// A native move of a constant into a register, as defined by this abstraction layer,
+// deals with instruction sequences that load "quasi constant" oops into registers
+// for addressing. For multiple causes, those "quasi constant" oops eventually need
+// to be changed (i.e. patched). The reason is quite simple: objects might get moved
+// around in storage. Pc-relative oop addresses have to be patched also if the
+// reference location is moved. That happens when executable code is relocated.
+
+class NativeMovConstReg: public NativeInstruction {
+ public:
+
+ enum z_specific_constants {
+ instruction_size = 10 // Used in shared code for calls with reloc_info.
+ };
+
+ // address instruction_address() const { return addr_at(0); }
+
+ // The current instruction might be located at an offset.
+ address next_instruction_address(int offset = 0) const;
+
+ // (The [set_]data accessor respects oop_type relocs also.)
+ intptr_t data() const;
+
+ // Patch data in code stream.
+ address set_data_plain(intptr_t x, CodeBlob *code);
+ // Patch data in code stream and oop pool if necessary.
+ void set_data(intptr_t x);
+
+ // Patch narrow oop constant in code stream.
+ void set_narrow_oop(intptr_t data);
+ void set_narrow_klass(intptr_t data);
+ void set_pcrel_addr(intptr_t addr, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false);
+ void set_pcrel_data(intptr_t data, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false);
+
+ void verify();
+
+ // unit test stuff
+ static void test();
+
+ // Creation.
+ friend NativeMovConstReg* nativeMovConstReg_at(address address) {
+ NativeMovConstReg* test = (NativeMovConstReg*)address;
+ #ifdef ASSERT
+ test->verify();
+ #endif
+ return test;
+ }
+};
+
+
+#ifdef COMPILER1
+//---------------------------------
+// N a t i v e M o v R e g M e m
+//---------------------------------
+
+// Interface to manipulate a code sequence that performs a memory access (load/store).
+// The code is the patchable version of memory accesses generated by
+// LIR_Assembler::reg2mem() and LIR_Assembler::mem2reg().
+//
+// Loading the offset for the mem access is target of the manipulation.
+//
+// The instruction sequence looks like this:
+// iihf %r1,$bits1 ; load offset for mem access
+// iilf %r1,$bits2
+// [compress oop] ; optional, load only
+// load/store %r2,0(%r1,%r2) ; memory access
+
+class NativeMovRegMem;
+inline NativeMovRegMem* nativeMovRegMem_at (address address);
+class NativeMovRegMem: public NativeInstruction {
+ public:
+ intptr_t offset() const {
+ return nativeMovConstReg_at(addr_at(0))->data();
+ }
+ void set_offset(intptr_t x) {
+ nativeMovConstReg_at(addr_at(0))->set_data(x);
+ }
+ void add_offset_in_bytes(intptr_t radd_offset) {
+ set_offset(offset() + radd_offset);
+ }
+ void verify();
+
+ private:
+ friend inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+ NativeMovRegMem* test = (NativeMovRegMem*)address;
+ #ifdef ASSERT
+ test->verify();
+ #endif
+ return test;
+ }
+};
+#endif // COMPILER1
+
+
+//-----------------------
+// N a t i v e J u m p
+//-----------------------
+
+
+// An interface for accessing/manipulating native jumps
+class NativeJump: public NativeInstruction {
+ public:
+ enum z_constants {
+ instruction_size = 2 // Size of z_illtrap().
+ };
+
+ // Maximum size (in bytes) of a jump to an absolute address.
+ // Used when emitting branch to an exception handler which is a "load_const_optimized_branch".
+ // Thus, a pessimistic estimate is obtained when using load_const.
+ // code pattern is:
+ // tmpReg := load_const(address); (* varying size *)
+ // jumpTo(tmpReg); (* bcr, 2 bytes *)
+ //
+ static unsigned int max_instruction_size() {
+ return MacroAssembler::load_const_size() + MacroAssembler::jump_byregister_size();
+ }
+
+
+// address instruction_address() const { return addr_at(0); }
+
+ address jump_destination() const {
+ return (address)nativeMovConstReg_at(instruction_address())->data();
+ }
+
+ void set_jump_destination(address dest) {
+ nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest));
+ }
+
+ // Creation
+ friend NativeJump* nativeJump_at(address address) {
+ NativeJump* jump = (NativeJump*)address;
+ #ifdef ASSERT
+ jump->verify();
+ #endif
+ return jump;
+ }
+
+ static bool is_jump_at(address a) {
+ int off = 0;
+ bool b = (MacroAssembler::is_load_const_from_toc(a+off) &&
+ Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_from_toc_size())));
+ b = b || (MacroAssembler::is_load_const(a+off) &&
+ Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_size())));
+ return b;
+ }
+
+ void verify();
+
+ // Unit testing stuff
+ static void test();
+
+ // Insertion of native jump instruction.
+ static void insert(address code_pos, address entry);
+
+ // MT-safe insertion of native jump at verified method entry.
+ static void check_verified_entry_alignment(address entry, address verified_entry) { }
+
+ static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+//-------------------------------------
+// N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+// Despite the name, handles only simple branches.
+// On ZARCH_64 BRCL only.
+class NativeGeneralJump;
+inline NativeGeneralJump* nativeGeneralJump_at(address address);
+class NativeGeneralJump: public NativeInstruction {
+ public:
+ enum ZARCH_specific_constants {
+ instruction_size = 6
+ };
+
+ address instruction_address() const { return addr_at(0); }
+ address jump_destination() const { return addr_at(0) + MacroAssembler::get_pcrel_offset(addr_at(0)); }
+
+ // Creation
+ friend inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+ NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+#ifdef ASSERT
+ jump->verify();
+#endif
+ return jump;
+ }
+
+ // Insertion of native general jump instruction.
+ static void insert_unconditional(address code_pos, address entry);
+
+ void set_jump_destination(address dest) {
+ Unimplemented();
+ // set_word_at(MacroAssembler::call_far_pcrelative_size()-4, Assembler::z_pcrel_off(dest, addr_at(0)));
+ }
+
+ static void replace_mt_safe(address instr_addr, address code_buffer);
+
+ void verify() PRODUCT_RETURN;
+};
+
+#endif // CPU_S390_VM_NATIVEINST_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/registerMap_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTERMAP_S390_HPP
+#define CPU_S390_VM_REGISTERMAP_S390_HPP
+
+// Machine-dependent implementation for register maps.
+
+ friend class frame;
+
+ private:
+ // This is the hook for finding a register in a "well-known" location,
+ // such as a register block of a predetermined format.
+ // Since there is none, we just return NULL.
+ address pd_location(VMReg reg) const {return NULL;}
+
+ // No PD state to clear or copy.
+ void pd_clear() {}
+ void pd_initialize() {}
+ void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_S390_VM_REGISTERMAP_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/registerSaver_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTERSAVER_S390_HPP
+#define CPU_S390_VM_REGISTERSAVER_S390_HPP
+
+class RegisterSaver {
+ // Used for saving volatile registers.
+
+ // Class declaration moved to separate file to make it available elsewhere.
+ // Implementation remains in sharedRuntime_s390.cpp
+
+ public:
+
+ // Set of registers to be saved.
+ typedef enum {
+ all_registers,
+ all_registers_except_r2,
+ all_integer_registers,
+ all_volatile_registers, // According to ABI calling convention.
+ arg_registers
+ } RegisterSet;
+
+ // Boolean flags to force only argument registers to be saved.
+ static int live_reg_save_size(RegisterSet reg_set);
+ static int live_reg_frame_size(RegisterSet reg_set);
+ // Specify the register that should be stored as the return pc in the current frame.
+ static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14);
+ static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set);
+
+ // Generate the OopMap (again, regs where saved before).
+ static OopMap* generate_oop_map(MacroAssembler* masm, RegisterSet reg_set);
+
+ // During deoptimization only the result register need to be restored
+ // all the other values have already been extracted.
+ static void restore_result_registers(MacroAssembler* masm);
+
+ // Constants and data structures:
+
+ typedef enum {
+ int_reg = 0,
+ float_reg = 1,
+ excluded_reg = 2, // Not saved/restored.
+ } RegisterType;
+
+ typedef enum {
+ reg_size = 8,
+ half_reg_size = reg_size / 2,
+ } RegisterConstants;
+
+ // Remember type, number, and VMReg.
+ typedef struct {
+ RegisterType reg_type;
+ int reg_num;
+ VMReg vmreg;
+ } LiveRegType;
+
+};
+
+#endif // CPU_S390_VM_REGISTERSAVER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_definitions_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Make sure the defines don't screw up the declarations later on in this file.
+#define DONT_USE_REGISTER_DEFINES
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "register_s390.hpp"
+#include "interp_masm_s390.hpp"
+
+REGISTER_DEFINITION(Register, noreg);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_s390.hpp"
+
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * 2;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
+ FloatRegisterImpl::number_of_registers * 2;
+
+const char* RegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+ "Z_R0", "Z_R1", "Z_R2", "Z_R3", "Z_R4", "Z_R5", "Z_R6", "Z_R7",
+ "Z_R8", "Z_R9", "Z_R10", "Z_R11", "Z_R12", "Z_R13", "Z_R14", "Z_R15"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+ "Z_F0", "Z_F1", "Z_F2", "Z_F3", "Z_F4", "Z_F5", "Z_F6", "Z_F7", "Z_F8", "Z_F9",
+ "Z_F10", "Z_F11", "Z_F12", "Z_F13", "Z_F14", "Z_F15"
+ };
+ return is_valid() ? names[encoding()] : "fnoreg";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/register_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_REGISTER_S390_HPP
+#define CPU_S390_VM_REGISTER_S390_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_s390.hpp"
+
+class Address;
+class VMRegImpl;
+
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut.
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+// The implementation of integer registers for z/Architecture.
+
+// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
+//
+// r0-r1 General purpose (volatile)
+// r2 Parameter and return value (volatile)
+// r3 TOC pointer (volatile)
+// r3-r5 Parameters (volatile)
+// r6 Parameter (nonvolatile)
+// r7-r11 Locals (nonvolatile)
+// r12 Local, often used as GOT pointer (nonvolatile)
+// r13 Local, often used as toc (nonvolatile)
+// r14 return address (volatile)
+// r15 stack pointer (nonvolatile)
+//
+// f0,f2,f4,f6 Parameters (volatile)
+// f1,f3,f5,f7 General purpose (volatile)
+// f8-f15 General purpose (nonvolatile)
+
+inline Register as_Register(int encoding) {
+ return (Register)(long)encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers = 16,
+ number_of_arg_registers = 5
+ };
+
+ // general construction
+ inline friend Register as_Register(int encoding);
+
+ inline VMReg as_VMReg();
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
+ const char* name() const;
+
+ // testers
+ bool is_valid() const { return (0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); }
+ bool is_even() const { return (encoding() & 1) == 0; }
+ bool is_volatile() const { return (0 <= (value()&0x7F) && (value()&0x7F) <= 5) || (value()&0x7F)==14; }
+ bool is_nonvolatile() const { return is_valid() && !is_volatile(); }
+
+ public:
+ // derived registers, offsets, and addresses
+ Register predecessor() const { return as_Register((encoding()-1) & (number_of_registers-1)); }
+ Register successor() const { return as_Register((encoding() + 1) & (number_of_registers-1)); }
+};
+
+// The integer registers of the z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(Register, Z_R0, (0));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R1, (1));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R2, (2));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R3, (3));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R4, (4));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R5, (5));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R6, (6));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R7, (7));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R8, (8));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R9, (9));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R10, (10));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R11, (11));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R12, (12));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
+CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
+
+// Use ConditionRegister as shortcut
+class ConditionRegisterImpl;
+typedef ConditionRegisterImpl* ConditionRegister;
+
+// The implementation of condition register(s) for the z/Architecture.
+
+class ConditionRegisterImpl: public AbstractRegisterImpl {
+ public:
+
+ enum {
+ number_of_registers = 1
+ };
+
+ // accessors
+ int encoding() const {
+ assert(is_valid(), "invalid register"); return value();
+ }
+
+ // testers
+ bool is_valid() const {
+ return (0 <= value() && value() < number_of_registers);
+ }
+ bool is_volatile() const {
+ return true;
+ }
+ bool is_nonvolatile() const {
+ return false;
+ }
+
+ // construction.
+ inline friend ConditionRegister as_ConditionRegister(int encoding);
+
+ inline VMReg as_VMReg();
+};
+
+inline ConditionRegister as_ConditionRegister(int encoding) {
+ assert(encoding >= 0 && encoding < ConditionRegisterImpl::number_of_registers, "bad condition register encoding");
+ return (ConditionRegister)(long)encoding;
+}
+
+// The condition register of the z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
+
+// Because z/Architecture has so many registers, #define'ing values for them is
+// beneficial in code size and is worth the cost of some of the
+// dangers of defines.
+// If a particular file has a problem with these defines then it's possible
+// to turn them off in that file by defining
+// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
+// so that it's able to provide real definitions of these registers
+// for use in debuggers and such.
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define noreg ((Register)(noreg_RegisterEnumValue))
+
+#define Z_R0 ((Register)(Z_R0_RegisterEnumValue))
+#define Z_R1 ((Register)(Z_R1_RegisterEnumValue))
+#define Z_R2 ((Register)(Z_R2_RegisterEnumValue))
+#define Z_R3 ((Register)(Z_R3_RegisterEnumValue))
+#define Z_R4 ((Register)(Z_R4_RegisterEnumValue))
+#define Z_R5 ((Register)(Z_R5_RegisterEnumValue))
+#define Z_R6 ((Register)(Z_R6_RegisterEnumValue))
+#define Z_R7 ((Register)(Z_R7_RegisterEnumValue))
+#define Z_R8 ((Register)(Z_R8_RegisterEnumValue))
+#define Z_R9 ((Register)(Z_R9_RegisterEnumValue))
+#define Z_R10 ((Register)(Z_R10_RegisterEnumValue))
+#define Z_R11 ((Register)(Z_R11_RegisterEnumValue))
+#define Z_R12 ((Register)(Z_R12_RegisterEnumValue))
+#define Z_R13 ((Register)(Z_R13_RegisterEnumValue))
+#define Z_R14 ((Register)(Z_R14_RegisterEnumValue))
+#define Z_R15 ((Register)(Z_R15_RegisterEnumValue))
+
+#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+// The implementation of float registers for the z/Architecture.
+
+inline FloatRegister as_FloatRegister(int encoding) {
+ return (FloatRegister)(long)encoding;
+}
+
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers = 16,
+ number_of_arg_registers = 4
+ };
+
+ // construction
+ inline friend FloatRegister as_FloatRegister(int encoding);
+
+ inline VMReg as_VMReg();
+
+ // accessors
+ int encoding() const {
+ assert(is_valid(), "invalid register"); return value();
+ }
+
+ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+ bool is_volatile() const { return (0 <= (value()&0x7F) && (value()&0x7F) <= 7); }
+ bool is_nonvolatile() const { return (8 <= (value()&0x7F) && (value()&0x7F) <= 15); }
+
+ const char* name() const;
+
+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+};
+
+// The float registers of z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F0, (0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F1, (1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F2, (2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F3, (3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F4, (4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F5, (5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F6, (6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F7, (7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F8, (8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F9, (9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F10, (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F11, (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F12, (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F13, (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F14, (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
+#define Z_F0 ((FloatRegister)( Z_F0_FloatRegisterEnumValue))
+#define Z_F1 ((FloatRegister)( Z_F1_FloatRegisterEnumValue))
+#define Z_F2 ((FloatRegister)( Z_F2_FloatRegisterEnumValue))
+#define Z_F3 ((FloatRegister)( Z_F3_FloatRegisterEnumValue))
+#define Z_F4 ((FloatRegister)( Z_F4_FloatRegisterEnumValue))
+#define Z_F5 ((FloatRegister)( Z_F5_FloatRegisterEnumValue))
+#define Z_F6 ((FloatRegister)( Z_F6_FloatRegisterEnumValue))
+#define Z_F7 ((FloatRegister)( Z_F7_FloatRegisterEnumValue))
+#define Z_F8 ((FloatRegister)( Z_F8_FloatRegisterEnumValue))
+#define Z_F9 ((FloatRegister)( Z_F9_FloatRegisterEnumValue))
+#define Z_F10 ((FloatRegister)( Z_F10_FloatRegisterEnumValue))
+#define Z_F11 ((FloatRegister)( Z_F11_FloatRegisterEnumValue))
+#define Z_F12 ((FloatRegister)( Z_F12_FloatRegisterEnumValue))
+#define Z_F13 ((FloatRegister)( Z_F13_FloatRegisterEnumValue))
+#define Z_F14 ((FloatRegister)( Z_F14_FloatRegisterEnumValue))
+#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers =
+ (RegisterImpl::number_of_registers +
+ FloatRegisterImpl::number_of_registers)
+ * 2 // register halves
+ + 1 // condition code register
+ };
+ static const int max_gpr;
+ static const int max_fpr;
+};
+
+// Single, Double and Quad fp reg classes. These exist to map the ADLC
+// encoding for a floating point register, to the FloatRegister number
+// desired by the macroassembler. A FloatRegister is a number between
+// 0 and 31 passed around as a pointer. For ADLC, an fp register encoding
+// is the actual bit encoding used by the z/Architecture hardware. When ADLC used
+// the macroassembler to generate an instruction that references, e.g., a
+// double fp reg, it passed the bit encoding to the macroassembler via
+// as_FloatRegister, which, for double regs > 30, returns an illegal
+// register number.
+//
+// Therefore we provide the following classes for use by ADLC. Their
+// sole purpose is to convert from z/Architecture register encodings to FloatRegisters.
+// At some future time, we might replace FloatRegister with these classes,
+// hence the definitions of as_xxxFloatRegister as class methods rather
+// than as external inline routines.
+
+class SingleFloatRegisterImpl;
+typedef SingleFloatRegisterImpl *SingleFloatRegister;
+
+class SingleFloatRegisterImpl {
+ public:
+ friend FloatRegister as_SingleFloatRegister(int encoding) {
+ assert(encoding < 32, "bad single float register encoding");
+ return as_FloatRegister(encoding);
+ }
+};
+
+class DoubleFloatRegisterImpl;
+typedef DoubleFloatRegisterImpl *DoubleFloatRegister;
+
+class DoubleFloatRegisterImpl {
+ public:
+ friend FloatRegister as_DoubleFloatRegister(int encoding) {
+ assert(encoding < 32, "bad double float register encoding");
+ return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1e));
+ }
+};
+
+class QuadFloatRegisterImpl;
+typedef QuadFloatRegisterImpl *QuadFloatRegister;
+
+class QuadFloatRegisterImpl {
+ public:
+ friend FloatRegister as_QuadFloatRegister(int encoding) {
+ assert(encoding < 32 && ((encoding & 2) == 0), "bad quad float register encoding");
+ return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1c));
+ }
+};
+
+
+// Common register declarations used in assembler code.
+REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
+REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);
+REGISTER_DECLARATION(Register, Z_RET, Z_R2);
+REGISTER_DECLARATION(Register, Z_ARG1, Z_R2);
+REGISTER_DECLARATION(Register, Z_ARG2, Z_R3);
+REGISTER_DECLARATION(Register, Z_ARG3, Z_R4);
+REGISTER_DECLARATION(Register, Z_ARG4, Z_R5);
+REGISTER_DECLARATION(Register, Z_ARG5, Z_R6);
+REGISTER_DECLARATION(Register, Z_SP, Z_R15);
+REGISTER_DECLARATION(FloatRegister, Z_FRET, Z_F0);
+REGISTER_DECLARATION(FloatRegister, Z_FARG1, Z_F0);
+REGISTER_DECLARATION(FloatRegister, Z_FARG2, Z_F2);
+REGISTER_DECLARATION(FloatRegister, Z_FARG3, Z_F4);
+REGISTER_DECLARATION(FloatRegister, Z_FARG4, Z_F6);
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_EXC_OOP AS_REGISTER(Register, Z_R2)
+#define Z_EXC_PC AS_REGISTER(Register, Z_R3)
+#define Z_RET AS_REGISTER(Register, Z_R2)
+#define Z_ARG1 AS_REGISTER(Register, Z_R2)
+#define Z_ARG2 AS_REGISTER(Register, Z_R3)
+#define Z_ARG3 AS_REGISTER(Register, Z_R4)
+#define Z_ARG4 AS_REGISTER(Register, Z_R5)
+#define Z_ARG5 AS_REGISTER(Register, Z_R6)
+#define Z_SP AS_REGISTER(Register, Z_R15)
+#define Z_FRET AS_REGISTER(FloatRegister, Z_F0)
+#define Z_FARG1 AS_REGISTER(FloatRegister, Z_F0)
+#define Z_FARG2 AS_REGISTER(FloatRegister, Z_F2)
+#define Z_FARG3 AS_REGISTER(FloatRegister, Z_F4)
+#define Z_FARG4 AS_REGISTER(FloatRegister, Z_F6)
+#endif
+
+// Register declarations to be used in frame manager assembly code.
+// Use only non-volatile registers in order to keep values across C-calls.
+
+// Register to cache the integer value on top of the operand stack.
+REGISTER_DECLARATION(Register, Z_tos, Z_R2);
+// Register to cache the fp value on top of the operand stack.
+REGISTER_DECLARATION(FloatRegister, Z_ftos, Z_F0);
+// Expression stack pointer in interpreted java frame.
+REGISTER_DECLARATION(Register, Z_esp, Z_R7);
+// Address of current thread.
+REGISTER_DECLARATION(Register, Z_thread, Z_R8);
+// Address of current method. only valid in interpreter_entry.
+REGISTER_DECLARATION(Register, Z_method, Z_R9);
+// Inline cache register. used by c1 and c2.
+REGISTER_DECLARATION(Register, Z_inline_cache,Z_R9);
+// Frame pointer of current interpreter frame. only valid while
+// executing bytecodes.
+REGISTER_DECLARATION(Register, Z_fp, Z_R9);
+// Address of the locals array in an interpreted java frame.
+REGISTER_DECLARATION(Register, Z_locals, Z_R12);
+// Bytecode pointer.
+REGISTER_DECLARATION(Register, Z_bcp, Z_R13);
+// Bytecode which is dispatched (short lived!).
+REGISTER_DECLARATION(Register, Z_bytecode, Z_R14);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_tos AS_REGISTER(Register, Z_R2)
+#define Z_ftos AS_REGISTER(FloatRegister, Z_F0)
+#define Z_esp AS_REGISTER(Register, Z_R7)
+#define Z_thread AS_REGISTER(Register, Z_R8)
+#define Z_method AS_REGISTER(Register, Z_R9)
+#define Z_inline_cache AS_REGISTER(Register, Z_R9)
+#define Z_fp AS_REGISTER(Register, Z_R9)
+#define Z_locals AS_REGISTER(Register, Z_R12)
+#define Z_bcp AS_REGISTER(Register, Z_R13)
+#define Z_bytecode AS_REGISTER(Register, Z_R14)
+#endif
+
+// Temporary registers to be used within frame manager. We can use
+// the nonvolatiles because the call stub has saved them.
+// Use only non-volatile registers in order to keep values across C-calls.
+REGISTER_DECLARATION(Register, Z_tmp_1, Z_R10);
+REGISTER_DECLARATION(Register, Z_tmp_2, Z_R11);
+REGISTER_DECLARATION(Register, Z_tmp_3, Z_R12);
+REGISTER_DECLARATION(Register, Z_tmp_4, Z_R13);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_tmp_1 AS_REGISTER(Register, Z_R10)
+#define Z_tmp_2 AS_REGISTER(Register, Z_R11)
+#define Z_tmp_3 AS_REGISTER(Register, Z_R12)
+#define Z_tmp_4 AS_REGISTER(Register, Z_R13)
+#endif
+
+// Scratch registers are volatile.
+REGISTER_DECLARATION(Register, Z_R0_scratch, Z_R0);
+REGISTER_DECLARATION(Register, Z_R1_scratch, Z_R1);
+REGISTER_DECLARATION(FloatRegister, Z_fscratch_1, Z_F1);
+#ifndef DONT_USE_REGISTER_DEFINES
+#define Z_R0_scratch AS_REGISTER(Register, Z_R0)
+#define Z_R1_scratch AS_REGISTER(Register, Z_R1)
+#define Z_fscratch_1 AS_REGISTER(FloatRegister, Z_F1)
+#endif
+
+
+#endif // CPU_S390_VM_REGISTER_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+ // we don't support splitting of relocations, so o must be zero:
+ assert(o == 0, "tried to split relocations");
+ if (!verify_only) {
+ switch (format()) {
+ case relocInfo::uncompressed_format:
+ nativeMovConstReg_at(addr())->set_data_plain(((intptr_t)x) + o, code());
+ break;
+ case relocInfo::compressed_format:
+ if (type() == relocInfo::metadata_type)
+ nativeMovConstReg_at(addr())->set_narrow_klass(((intptr_t)x) + o);
+ else if (type() == relocInfo::oop_type)
+ nativeMovConstReg_at(addr())->set_narrow_oop(((intptr_t)x) + o);
+ else
+ guarantee(false, "bad relocInfo type for relocInfo::narrow_oop_format");
+ break;
+ case relocInfo::pcrel_addr_format: // patch target location
+ nativeMovConstReg_at(addr())->set_pcrel_addr(((intptr_t)x) + o, code());
+ break;
+ case relocInfo::pcrel_data_format: // patch data at target location
+ nativeMovConstReg_at(addr())->set_pcrel_data(((intptr_t)x) + o, code());
+ break;
+ default:
+ assert(false, "not a valid relocInfo format");
+ break;
+ }
+ } else {
+ // TODO: Reading of narrow oops out of code stream is not implemented
+ // (see nativeMovConstReg::data()). Implement this if you want to verify.
+ // assert(x == (address) nativeMovConstReg_at(addr())->data(), "Instructions must match");
+ switch (format()) {
+ case relocInfo::uncompressed_format:
+ break;
+ case relocInfo::compressed_format:
+ break;
+ case relocInfo::pcrel_addr_format:
+ break;
+ case relocInfo::pcrel_data_format:
+ break;
+ default:
+ assert(false, "not a valid relocInfo format");
+ break;
+ }
+ }
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+ address inst_addr = addr();
+
+ if (NativeFarCall::is_far_call_at(inst_addr)) {
+ if (!ShortenBranches) {
+ if (MacroAssembler::is_call_far_pcrelative(inst_addr)) {
+ address a1 = MacroAssembler::get_target_addr_pcrel(orig_addr+MacroAssembler::nop_size());
+#ifdef ASSERT
+ address a2 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size());
+ address a3 = nativeFarCall_at(orig_addr)->destination();
+ address a4 = nativeFarCall_at(inst_addr)->destination();
+ if ((a1 != a3) || (a2 != a4)) {
+ unsigned int range = 128;
+ Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?");
+ Assembler::dump_code_range(tty, orig_addr, range, "pc-relative call w/o ShortenBranches?");
+ assert(false, "pc-relative call w/o ShortenBranches?");
+ }
+#endif
+ return a1;
+ }
+ return (address)(-1);
+ }
+ NativeFarCall* call;
+ if (orig_addr == NULL) {
+ call = nativeFarCall_at(inst_addr);
+ } else {
+ if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) {
+ call = nativeFarCall_at(orig_addr);
+ } else {
+ call = nativeFarCall_at(orig_addr); // must access location (in CP) where destination is stored in unmoved code, because load from CP is pc-relative
+ }
+ }
+ return call->destination();
+ }
+
+ if (NativeCall::is_call_at(inst_addr)) {
+ NativeCall* call = nativeCall_at(inst_addr);
+ if (call->is_pcrelative()) {
+ intptr_t off = inst_addr - orig_addr;
+ return (address) (call->destination()-off);
+ }
+ }
+
+ return (address) nativeMovConstReg_at(inst_addr)->data();
+}
+
+void Relocation::pd_set_call_destination(address x) {
+ address inst_addr = addr();
+
+ if (NativeFarCall::is_far_call_at(inst_addr)) {
+ if (!ShortenBranches) {
+ if (MacroAssembler::is_call_far_pcrelative(inst_addr)) {
+ address a1 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size());
+#ifdef ASSERT
+ address a3 = nativeFarCall_at(inst_addr)->destination();
+ if (a1 != a3) {
+ unsigned int range = 128;
+ Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?");
+ assert(false, "pc-relative call w/o ShortenBranches?");
+ }
+#endif
+ nativeFarCall_at(inst_addr)->set_destination(x, 0);
+ return;
+ }
+ assert(x == (address)-1, "consistency check");
+ return;
+ }
+ int toc_offset = -1;
+ if (type() == relocInfo::runtime_call_w_cp_type) {
+ toc_offset = ((runtime_call_w_cp_Relocation *)this)->get_constant_pool_offset();
+ }
+ if (toc_offset>=0) {
+ NativeFarCall* call = nativeFarCall_at(inst_addr);
+ call->set_destination(x, toc_offset);
+ return;
+ }
+ }
+
+ if (NativeCall::is_call_at(inst_addr)) {
+ NativeCall* call = nativeCall_at(inst_addr);
+ if (call->is_pcrelative()) {
+ call->set_destination_mt_safe(x);
+ return;
+ }
+ }
+
+ // constant is absolute, must use x
+ nativeMovConstReg_at(inst_addr)->set_data(((intptr_t)x));
+}
+
+
+// store the new target address into an oop_Relocation cell, if any
+// return indication if update happened.
+bool relocInfo::update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb) {
+
+ // Try to find the CodeBlob, if not given by caller
+ if (cb == NULL) cb = CodeCache::find_blob(begin);
+#ifdef ASSERT
+ else
+ assert(cb == CodeCache::find_blob(begin), "consistency");
+#endif
+
+ // 'RelocIterator' requires an nmethod
+ nmethod* nm = cb ? cb->as_nmethod_or_null() : NULL;
+ if (nm != NULL) {
+ RelocIterator iter(nm, begin, end);
+ oop* oop_addr = NULL;
+ Metadata** metadata_addr = NULL;
+ while (iter.next()) {
+ if (iter.type() == relocInfo::oop_type) {
+ oop_Relocation *r = iter.oop_reloc();
+ if (oop_addr == NULL) {
+ oop_addr = r->oop_addr();
+ *oop_addr = (oop)newTarget;
+ } else {
+ assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
+ }
+ }
+ if (iter.type() == relocInfo::metadata_type) {
+ metadata_Relocation *r = iter.metadata_reloc();
+ if (metadata_addr == NULL) {
+ metadata_addr = r->metadata_addr();
+ *metadata_addr = (Metadata*)newTarget;
+ } else {
+ assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here");
+ }
+ }
+ }
+ return oop_addr || metadata_addr;
+ }
+ return false;
+}
+
+
+address* Relocation::pd_address_in_code() {
+ ShouldNotReachHere();
+ return 0;
+}
+
+address Relocation::pd_get_address_from_code() {
+ return (address) (nativeMovConstReg_at(addr())->data());
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_RELOCINFO_S390_HPP
+#define CPU_S390_VM_RELOCINFO_S390_HPP
+
+//----------------------------
+// relocInfo layout
+//----------------------------
+
+// This description should be contained in code/relocInfo.hpp
+// but was put here to minimize shared code diffs.
+
+// Relocation information for a nmethod is stored in compressed
+// form in an array of element type short int (16 bits).
+// Each array element constitutes one relocInfo record.
+// The layout of one such record is described here.
+
+// +------------+---+---+------------------------------+
+// | type | fmt | offset/offset_unit |
+// +------------+---+---+------------------------------+
+//
+// |<-- value_width (16) ----------------------------->|
+// |<type_width>|<-- nontype_width (12) -------------->|
+// (4)
+// | |<--+-->|<-- offset_width (10) ------->|
+// / \
+// / (2) \
+// /<--format->\
+// | width |
+
+
+// only for type == data_prefix_tag:
+// +------------+---+---+------------------------------+
+// | type | | data |
+// +------------+---+---+------------------------------+
+// | 15 |<->|<-- datalen_width (11) ---------->|
+// |
+// +--datalen_tag (1)
+
+// relocType
+// The type field holds a value of relocType (which is
+// an enum of all possible relocation types). Currently,
+// there are 16 distinct relocation types, requiring
+// type_width to be (at least) 4.
+// relocFormat
+// The format field holds a value of relocFormat (which is
+// an enum of all possible relocation formats). Currently,
+// there are 4 distinct relocation formats, requiring
+// format_width to be (at least) 2.
+// offset
+// Each relocInfo is related to one specific address in the CodeBlob.
+// The address always points to the first byte of the target instruction.
+// It does NOT refer directly to the relocation subfield or embedded constant.
+// offset contains the distance of this relocInfo from the previous one.
+// offset is scaled by offset_unit (the platform-specific instruction
+// alignment requirement) to maximize the encodable distance.
+// To obtain the absolute address in the CodeBlob the relocInfo is
+// related to, you have to iterate over all relocInfos from the
+// beginning, and then use RelocIterator::addr() to get the address.
+
+// relocType == data_prefix_tag
+// These are relocInfo records containing inline data that belongs to
+// the next non-data relocInfo record. Usage of that inline data is
+// specific and private to that relocInfo record.
+// For details refer to code/relocInfo.hpp
+
+
+ // machine-dependent parts of class relocInfo
+ private:
+ enum {
+ // Instructions are HW (2-byte) aligned on z/Architecture.
+ offset_unit = 2,
+
+ // Encodes Assembler::disp32_operand vs. Assembler::imm64_operand.
+ // (Assembler::call32_operand is used on call instructions only.)
+ format_width = 2
+ };
+
+ public:
+
+ enum relocFormat {
+ no_format = 0,
+ uncompressed_format = 0, // Relocation is for a regular oop.
+ compressed_format = 1, // Relocation is for a narrow (compressed) oop or klass.
+ // Similar to relocInfo::narrow_oop_in_const.
+ pcrel_addr_format = 2, // Relocation is for the target LOCATION of a pc-relative instruction.
+ pcrel_data_format = 3 // Relocation is for the target data of a pc-relative instruction.
+ };
+
+ // Store the new target address into an oop_Relocation cell, if any.
+ // Return indication if update happened.
+ static bool update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb);
+
+#endif // CPU_S390_VM_RELOCINFO_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/runtime_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_s390.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_s390.inline.hpp"
+#endif
+
+#define __ masm->
+
+
+//------------------------------generate_exception_blob---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in s390.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers), unwind the frame, and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a branch.
+//
+// Arguments:
+// Z_R2(=Z_ARG1): exception oop
+// Z_R3(=Z_ARG2): exception pc
+//
+// Results:
+// Z_R2: exception oop
+// Z_R3: exception pc in caller
+// destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+
+void OptoRuntime::generate_exception_blob() {
+
+ // Allocate space for the code
+ ResourceMark rm;
+ // Setup code generation tools
+ CodeBuffer buffer("exception_blob", 2048, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+
+ Register handle_exception = Z_ARG5;
+
+ __ verify_thread();
+ __ z_stg(Z_ARG1/*exception oop*/, Address(Z_thread, JavaThread::exception_oop_offset()));
+ __ z_stg(Z_ARG2/*issuing pc*/, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+ // Store issuing pc as return pc into
+ // caller's frame. stack-walking needs it. R14 is not valid here,
+ // because this code gets entered with a jump.
+ __ z_stg(Z_ARG2/*issuing pc*/, _z_abi(return_pc), Z_SP);
+
+ // The following call to function OptoRuntime::handle_exception_C
+ // does all the hard work. It checks if an
+ // exception catch exists in the method. If so, it returns the
+ // handler address. If the nmethod has been deoptimized and it had
+ // a handler the handler address is the deopt blob's
+ // unpack_with_exception entry.
+
+ // push a C frame for the exception blob. it is needed for the
+ // C call later on.
+
+ Register saved_sp = Z_R11;
+
+ __ z_lgr(saved_sp, Z_SP);
+
+ // push frame for blob.
+ int frame_size = __ push_frame_abi160(0);
+
+ __ get_PC(Z_R1/*scratch*/);
+ __ set_last_Java_frame(/*sp=*/Z_SP, /*pc=*/Z_R1);
+
+ // This call can lead to deoptimization of the nmethod holding the handler.
+ __ z_lgr(Z_ARG1, Z_thread); // argument of C function
+ __ call_c(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C));
+
+ __ z_lgr(handle_exception, Z_RET);
+ __ reset_last_Java_frame();
+
+ // Pop the exception blob's C frame that has been pushed before.
+ __ z_lgr(Z_SP, saved_sp);
+
+ // [Z_RET]!=NULL was possible in hotspot5 but not in sapjvm6.
+ // C2I adapter extensions are now removed by a resize in the frame manager
+ // (unwind_initial_activation_pending_exception).
+#ifdef ASSERT
+ __ z_ltgr(handle_exception, handle_exception);
+ __ asm_assert_ne("handler must not be NULL", 0x852);
+#endif
+
+ // Handle_exception contains the handler address. If the associated frame
+ // has been deoptimized then the handler has been patched to jump to
+ // the deoptimization blob.
+
+ // If the exception handler jumps to the deoptimization blob, the
+ // exception pc will be read from there.
+ __ z_lg(Z_ARG2, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+ __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
+
+ // Clear the exception oop so GC no longer processes it as a root.
+ __ clear_mem(Address(Z_thread, JavaThread::exception_oop_offset()),sizeof(intptr_t));
+#ifdef ASSERT
+ __ clear_mem(Address(Z_thread, JavaThread::exception_handler_pc_offset()), sizeof(intptr_t));
+ __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), sizeof(intptr_t));
+#endif
+
+ __ z_br(handle_exception);
+
+ // Make sure all code is generated.
+ masm->flush();
+
+ // Set exception blob.
+ OopMapSet *oop_maps = NULL;
+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, frame_size/wordSize);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/s390.ad Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,10802 @@
+//
+// Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2016 SAP SE. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// z/Architecture Architecture Description File
+
+// Major contributions by AS, JL, LS.
+
+//
+// Following information is derived from private mail communication
+// (Oct. 2011).
+//
+// General branch target alignment considerations
+//
+// z/Architecture does not imply a general branch target alignment requirement.
+// There are side effects and side considerations, though, which may
+// provide some performance benefit. These are:
+// - Align branch target on octoword (32-byte) boundary
+// On more recent models (from z9 on), I-fetch is done on a Octoword
+// (32 bytes at a time) basis. To avoid I-fetching unnecessary
+// instructions, branch targets should be 32-byte aligend. If this
+// exact alingment cannot be achieved, having the branch target in
+// the first doubleword still provides some benefit.
+// - Avoid branch targets at the end of cache lines (> 64 bytes distance).
+// Sequential instruction prefetching after the branch target starts
+// immediately after having fetched the octoword containing the
+// branch target. When I-fetching crosses a cache line, there may be
+// a small stall. The worst case: the branch target (at the end of
+// a cache line) is a L1 I-cache miss and the next line as well.
+// Then, the entire target line must be filled first (to contine at the
+// branch target). Only then can the next sequential line be filled.
+// - Avoid multiple poorly predicted branches in a row.
+//
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// architecture.
+
+register %{
+
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name (register save type, C convention save type,
+// ideal register type, encoding);
+//
+// Register Save Types:
+//
+// NS = No-Save: The register allocator assumes that these registers
+// can be used without saving upon entry to the method, &
+// that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call: The register allocator assumes that these registers
+// can be used without saving upon entry to the method,
+// but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, but they do not need to be saved at call sites.
+//
+// AS = Always-Save: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// z/Architecture register definitions, based on the z/Architecture Principles
+// of Operation, 5th Edition, September 2005, and z/Linux Elf ABI Supplement,
+// 5th Edition, March 2001.
+//
+// For each 64-bit register we must define two registers: the register
+// itself, e.g. Z_R3, and a corresponding virtual other (32-bit-)'half',
+// e.g. Z_R3_H, which is needed by the allocator, but is not used
+// for stores, loads, etc.
+
+ // Integer/Long Registers
+ // ----------------------------
+
+ // z/Architecture has 16 64-bit integer registers.
+
+ // types: v = volatile, nv = non-volatile, s = system
+ reg_def Z_R0 (SOC, SOC, Op_RegI, 0, Z_R0->as_VMReg()); // v scratch1
+ reg_def Z_R0_H (SOC, SOC, Op_RegI, 99, Z_R0->as_VMReg()->next());
+ reg_def Z_R1 (SOC, SOC, Op_RegI, 1, Z_R1->as_VMReg()); // v scratch2
+ reg_def Z_R1_H (SOC, SOC, Op_RegI, 99, Z_R1->as_VMReg()->next());
+ reg_def Z_R2 (SOC, SOC, Op_RegI, 2, Z_R2->as_VMReg()); // v iarg1 & iret
+ reg_def Z_R2_H (SOC, SOC, Op_RegI, 99, Z_R2->as_VMReg()->next());
+ reg_def Z_R3 (SOC, SOC, Op_RegI, 3, Z_R3->as_VMReg()); // v iarg2
+ reg_def Z_R3_H (SOC, SOC, Op_RegI, 99, Z_R3->as_VMReg()->next());
+ reg_def Z_R4 (SOC, SOC, Op_RegI, 4, Z_R4->as_VMReg()); // v iarg3
+ reg_def Z_R4_H (SOC, SOC, Op_RegI, 99, Z_R4->as_VMReg()->next());
+ reg_def Z_R5 (SOC, SOC, Op_RegI, 5, Z_R5->as_VMReg()); // v iarg4
+ reg_def Z_R5_H (SOC, SOC, Op_RegI, 99, Z_R5->as_VMReg()->next());
+ reg_def Z_R6 (SOC, SOE, Op_RegI, 6, Z_R6->as_VMReg()); // v iarg5
+ reg_def Z_R6_H (SOC, SOE, Op_RegI, 99, Z_R6->as_VMReg()->next());
+ reg_def Z_R7 (SOC, SOE, Op_RegI, 7, Z_R7->as_VMReg());
+ reg_def Z_R7_H (SOC, SOE, Op_RegI, 99, Z_R7->as_VMReg()->next());
+ reg_def Z_R8 (SOC, SOE, Op_RegI, 8, Z_R8->as_VMReg());
+ reg_def Z_R8_H (SOC, SOE, Op_RegI, 99, Z_R8->as_VMReg()->next());
+ reg_def Z_R9 (SOC, SOE, Op_RegI, 9, Z_R9->as_VMReg());
+ reg_def Z_R9_H (SOC, SOE, Op_RegI, 99, Z_R9->as_VMReg()->next());
+ reg_def Z_R10 (SOC, SOE, Op_RegI, 10, Z_R10->as_VMReg());
+ reg_def Z_R10_H(SOC, SOE, Op_RegI, 99, Z_R10->as_VMReg()->next());
+ reg_def Z_R11 (SOC, SOE, Op_RegI, 11, Z_R11->as_VMReg());
+ reg_def Z_R11_H(SOC, SOE, Op_RegI, 99, Z_R11->as_VMReg()->next());
+ reg_def Z_R12 (SOC, SOE, Op_RegI, 12, Z_R12->as_VMReg());
+ reg_def Z_R12_H(SOC, SOE, Op_RegI, 99, Z_R12->as_VMReg()->next());
+ reg_def Z_R13 (SOC, SOE, Op_RegI, 13, Z_R13->as_VMReg());
+ reg_def Z_R13_H(SOC, SOE, Op_RegI, 99, Z_R13->as_VMReg()->next());
+ reg_def Z_R14 (NS, NS, Op_RegI, 14, Z_R14->as_VMReg()); // s return_pc
+ reg_def Z_R14_H(NS, NS, Op_RegI, 99, Z_R14->as_VMReg()->next());
+ reg_def Z_R15 (NS, NS, Op_RegI, 15, Z_R15->as_VMReg()); // s SP
+ reg_def Z_R15_H(NS, NS, Op_RegI, 99, Z_R15->as_VMReg()->next());
+
+ // Float/Double Registers
+
+ // The rules of ADL require that double registers be defined in pairs.
+ // Each pair must be two 32-bit values, but not necessarily a pair of
+ // single float registers. In each pair, ADLC-assigned register numbers
+ // must be adjacent, with the lower number even. Finally, when the
+ // CPU stores such a register pair to memory, the word associated with
+ // the lower ADLC-assigned number must be stored to the lower address.
+
+ // z/Architecture has 16 64-bit floating-point registers. Each can store a single
+ // or double precision floating-point value.
+
+ // types: v = volatile, nv = non-volatile, s = system
+ reg_def Z_F0 (SOC, SOC, Op_RegF, 0, Z_F0->as_VMReg()); // v farg1 & fret
+ reg_def Z_F0_H (SOC, SOC, Op_RegF, 99, Z_F0->as_VMReg()->next());
+ reg_def Z_F1 (SOC, SOC, Op_RegF, 1, Z_F1->as_VMReg());
+ reg_def Z_F1_H (SOC, SOC, Op_RegF, 99, Z_F1->as_VMReg()->next());
+ reg_def Z_F2 (SOC, SOC, Op_RegF, 2, Z_F2->as_VMReg()); // v farg2
+ reg_def Z_F2_H (SOC, SOC, Op_RegF, 99, Z_F2->as_VMReg()->next());
+ reg_def Z_F3 (SOC, SOC, Op_RegF, 3, Z_F3->as_VMReg());
+ reg_def Z_F3_H (SOC, SOC, Op_RegF, 99, Z_F3->as_VMReg()->next());
+ reg_def Z_F4 (SOC, SOC, Op_RegF, 4, Z_F4->as_VMReg()); // v farg3
+ reg_def Z_F4_H (SOC, SOC, Op_RegF, 99, Z_F4->as_VMReg()->next());
+ reg_def Z_F5 (SOC, SOC, Op_RegF, 5, Z_F5->as_VMReg());
+ reg_def Z_F5_H (SOC, SOC, Op_RegF, 99, Z_F5->as_VMReg()->next());
+ reg_def Z_F6 (SOC, SOC, Op_RegF, 6, Z_F6->as_VMReg());
+ reg_def Z_F6_H (SOC, SOC, Op_RegF, 99, Z_F6->as_VMReg()->next());
+ reg_def Z_F7 (SOC, SOC, Op_RegF, 7, Z_F7->as_VMReg());
+ reg_def Z_F7_H (SOC, SOC, Op_RegF, 99, Z_F7->as_VMReg()->next());
+ reg_def Z_F8 (SOC, SOE, Op_RegF, 8, Z_F8->as_VMReg());
+ reg_def Z_F8_H (SOC, SOE, Op_RegF, 99, Z_F8->as_VMReg()->next());
+ reg_def Z_F9 (SOC, SOE, Op_RegF, 9, Z_F9->as_VMReg());
+ reg_def Z_F9_H (SOC, SOE, Op_RegF, 99, Z_F9->as_VMReg()->next());
+ reg_def Z_F10 (SOC, SOE, Op_RegF, 10, Z_F10->as_VMReg());
+ reg_def Z_F10_H(SOC, SOE, Op_RegF, 99, Z_F10->as_VMReg()->next());
+ reg_def Z_F11 (SOC, SOE, Op_RegF, 11, Z_F11->as_VMReg());
+ reg_def Z_F11_H(SOC, SOE, Op_RegF, 99, Z_F11->as_VMReg()->next());
+ reg_def Z_F12 (SOC, SOE, Op_RegF, 12, Z_F12->as_VMReg());
+ reg_def Z_F12_H(SOC, SOE, Op_RegF, 99, Z_F12->as_VMReg()->next());
+ reg_def Z_F13 (SOC, SOE, Op_RegF, 13, Z_F13->as_VMReg());
+ reg_def Z_F13_H(SOC, SOE, Op_RegF, 99, Z_F13->as_VMReg()->next());
+ reg_def Z_F14 (SOC, SOE, Op_RegF, 14, Z_F14->as_VMReg());
+ reg_def Z_F14_H(SOC, SOE, Op_RegF, 99, Z_F14->as_VMReg()->next());
+ reg_def Z_F15 (SOC, SOE, Op_RegF, 15, Z_F15->as_VMReg());
+ reg_def Z_F15_H(SOC, SOE, Op_RegF, 99, Z_F15->as_VMReg()->next());
+
+
+ // Special Registers
+
+ // Condition Codes Flag Registers
+
+ // z/Architecture has the PSW (program status word) that contains
+ // (among other information) the condition code. We treat this
+ // part of the PSW as a condition register CR. It consists of 4
+ // bits. Floating point instructions influence the same condition register CR.
+
+ reg_def Z_CR(SOC, SOC, Op_RegFlags, 0, Z_CR->as_VMReg()); // volatile
+
+
+// Specify priority of register selection within phases of register
+// allocation. Highest priority is first. A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, and choose no-save registers before save-on-call, and
+// save-on-call before save-on-entry. Registers which participate in
+// fix calling sequences should come last. Registers which are used
+// as pairs must fall on an even boundary.
+
+// It's worth about 1% on SPEC geomean to get this right.
+
+// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
+// in adGlobals_s390.hpp which defines the <register>_num values, e.g.
+// Z_R3_num. Therefore, Z_R3_num may not be (and in reality is not)
+// the same as Z_R3->encoding()! Furthermore, we cannot make any
+// assumptions on ordering, e.g. Z_R3_num may be less than Z_R2_num.
+// Additionally, the function
+// static enum RC rc_class(OptoReg::Name reg)
+// maps a given <register>_num value to its chunk type (except for flags)
+// and its current implementation relies on chunk0 and chunk1 having a
+// size of 64 each.
+
+alloc_class chunk0(
+ // chunk0 contains *all* 32 integer registers halves.
+
+ // potential SOE regs
+ Z_R13,Z_R13_H,
+ Z_R12,Z_R12_H,
+ Z_R11,Z_R11_H,
+ Z_R10,Z_R10_H,
+
+ Z_R9,Z_R9_H,
+ Z_R8,Z_R8_H,
+ Z_R7,Z_R7_H,
+
+ Z_R1,Z_R1_H,
+ Z_R0,Z_R0_H,
+
+ // argument registers
+ Z_R6,Z_R6_H,
+ Z_R5,Z_R5_H,
+ Z_R4,Z_R4_H,
+ Z_R3,Z_R3_H,
+ Z_R2,Z_R2_H,
+
+ // special registers
+ Z_R14,Z_R14_H,
+ Z_R15,Z_R15_H
+);
+
+alloc_class chunk1(
+ // Chunk1 contains *all* 64 floating-point registers halves.
+
+ Z_F15,Z_F15_H,
+ Z_F14,Z_F14_H,
+ Z_F13,Z_F13_H,
+ Z_F12,Z_F12_H,
+ Z_F11,Z_F11_H,
+ Z_F10,Z_F10_H,
+ Z_F9,Z_F9_H,
+ Z_F8,Z_F8_H,
+ // scratch register
+ Z_F7,Z_F7_H,
+ Z_F5,Z_F5_H,
+ Z_F3,Z_F3_H,
+ Z_F1,Z_F1_H,
+ // argument registers
+ Z_F6,Z_F6_H,
+ Z_F4,Z_F4_H,
+ Z_F2,Z_F2_H,
+ Z_F0,Z_F0_H
+);
+
+alloc_class chunk2(
+ Z_CR
+);
+
+
+//-------Architecture Description Register Classes-----------------------
+
+// Several register classes are automatically defined based upon
+// information in this architecture description.
+
+// 1) reg_class inline_cache_reg (as defined in frame section)
+// 2) reg_class compiler_method_oop_reg (as defined in frame section)
+// 2) reg_class interpreter_method_oop_reg (as defined in frame section)
+// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */)
+
+// Integer Register Classes
+reg_class z_int_reg(
+/*Z_R0*/ // R0
+/*Z_R1*/
+ Z_R2,
+ Z_R3,
+ Z_R4,
+ Z_R5,
+ Z_R6,
+ Z_R7,
+/*Z_R8,*/ // Z_thread
+ Z_R9,
+ Z_R10,
+ Z_R11,
+ Z_R12,
+ Z_R13
+/*Z_R14*/ // return_pc
+/*Z_R15*/ // SP
+);
+
+reg_class z_no_odd_int_reg(
+/*Z_R0*/ // R0
+/*Z_R1*/
+ Z_R2,
+ Z_R3,
+ Z_R4,
+/*Z_R5,*/ // odd part of fix register pair
+ Z_R6,
+ Z_R7,
+/*Z_R8,*/ // Z_thread
+ Z_R9,
+ Z_R10,
+ Z_R11,
+ Z_R12,
+ Z_R13
+/*Z_R14*/ // return_pc
+/*Z_R15*/ // SP
+);
+
+reg_class z_no_arg_int_reg(
+/*Z_R0*/ // R0
+/*Z_R1*/ // scratch
+/*Z_R2*/
+/*Z_R3*/
+/*Z_R4*/
+/*Z_R5*/
+/*Z_R6*/
+ Z_R7,
+/*Z_R8*/ // Z_thread
+ Z_R9,
+ Z_R10,
+ Z_R11,
+ Z_R12,
+ Z_R13
+/*Z_R14*/ // return_pc
+/*Z_R15*/ // SP
+);
+
+reg_class z_rarg1_int_reg(Z_R2);
+reg_class z_rarg2_int_reg(Z_R3);
+reg_class z_rarg3_int_reg(Z_R4);
+reg_class z_rarg4_int_reg(Z_R5);
+reg_class z_rarg5_int_reg(Z_R6);
+
+// Pointer Register Classes
+
+// 64-bit build means 64-bit pointers means hi/lo pairs.
+
+reg_class z_rarg5_ptrN_reg(Z_R6);
+
+reg_class z_rarg1_ptr_reg(Z_R2_H,Z_R2);
+reg_class z_rarg2_ptr_reg(Z_R3_H,Z_R3);
+reg_class z_rarg3_ptr_reg(Z_R4_H,Z_R4);
+reg_class z_rarg4_ptr_reg(Z_R5_H,Z_R5);
+reg_class z_rarg5_ptr_reg(Z_R6_H,Z_R6);
+reg_class z_thread_ptr_reg(Z_R8_H,Z_R8);
+
+reg_class z_ptr_reg(
+/*Z_R0_H,Z_R0*/ // R0
+/*Z_R1_H,Z_R1*/
+ Z_R2_H,Z_R2,
+ Z_R3_H,Z_R3,
+ Z_R4_H,Z_R4,
+ Z_R5_H,Z_R5,
+ Z_R6_H,Z_R6,
+ Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/ // Z_thread
+ Z_R9_H,Z_R9,
+ Z_R10_H,Z_R10,
+ Z_R11_H,Z_R11,
+ Z_R12_H,Z_R12,
+ Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/ // return_pc
+/*Z_R15_H,Z_R15*/ // SP
+);
+
+reg_class z_lock_ptr_reg(
+/*Z_R0_H,Z_R0*/ // R0
+/*Z_R1_H,Z_R1*/
+ Z_R2_H,Z_R2,
+ Z_R3_H,Z_R3,
+ Z_R4_H,Z_R4,
+/*Z_R5_H,Z_R5,*/
+/*Z_R6_H,Z_R6,*/
+ Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/ // Z_thread
+ Z_R9_H,Z_R9,
+ Z_R10_H,Z_R10,
+ Z_R11_H,Z_R11,
+ Z_R12_H,Z_R12,
+ Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/ // return_pc
+/*Z_R15_H,Z_R15*/ // SP
+);
+
+reg_class z_no_arg_ptr_reg(
+/*Z_R0_H,Z_R0*/ // R0
+/*Z_R1_H,Z_R1*/ // scratch
+/*Z_R2_H,Z_R2*/
+/*Z_R3_H,Z_R3*/
+/*Z_R4_H,Z_R4*/
+/*Z_R5_H,Z_R5*/
+/*Z_R6_H,Z_R6*/
+ Z_R7_H, Z_R7,
+/*Z_R8_H,Z_R8*/ // Z_thread
+ Z_R9_H,Z_R9,
+ Z_R10_H,Z_R10,
+ Z_R11_H,Z_R11,
+ Z_R12_H,Z_R12,
+ Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/ // return_pc
+/*Z_R15_H,Z_R15*/ // SP
+);
+
+// Special class for storeP instructions, which can store SP or RPC to
+// TLS. (Note: Do not generalize this to "any_reg". If you add
+// another register, such as FP, to this mask, the allocator may try
+// to put a temp in it.)
+// Register class for memory access base registers,
+// This class is a superset of z_ptr_reg including Z_thread.
+reg_class z_memory_ptr_reg(
+/*Z_R0_H,Z_R0*/ // R0
+/*Z_R1_H,Z_R1*/
+ Z_R2_H,Z_R2,
+ Z_R3_H,Z_R3,
+ Z_R4_H,Z_R4,
+ Z_R5_H,Z_R5,
+ Z_R6_H,Z_R6,
+ Z_R7_H,Z_R7,
+ Z_R8_H,Z_R8, // Z_thread
+ Z_R9_H,Z_R9,
+ Z_R10_H,Z_R10,
+ Z_R11_H,Z_R11,
+ Z_R12_H,Z_R12,
+ Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14*/ // return_pc
+/*Z_R15_H,Z_R15*/ // SP
+);
+
+// Other special pointer regs.
+reg_class z_r1_regP(Z_R1_H,Z_R1);
+reg_class z_r9_regP(Z_R9_H,Z_R9);
+
+
+// Long Register Classes
+
+reg_class z_rarg1_long_reg(Z_R2_H,Z_R2);
+reg_class z_rarg2_long_reg(Z_R3_H,Z_R3);
+reg_class z_rarg3_long_reg(Z_R4_H,Z_R4);
+reg_class z_rarg4_long_reg(Z_R5_H,Z_R5);
+reg_class z_rarg5_long_reg(Z_R6_H,Z_R6);
+
+// Longs in 1 register. Aligned adjacent hi/lo pairs.
+reg_class z_long_reg(
+/*Z_R0_H,Z_R0*/ // R0
+/*Z_R1_H,Z_R1*/
+ Z_R2_H,Z_R2,
+ Z_R3_H,Z_R3,
+ Z_R4_H,Z_R4,
+ Z_R5_H,Z_R5,
+ Z_R6_H,Z_R6,
+ Z_R7_H,Z_R7,
+/*Z_R8_H,Z_R8,*/ // Z_thread
+ Z_R9_H,Z_R9,
+ Z_R10_H,Z_R10,
+ Z_R11_H,Z_R11,
+ Z_R12_H,Z_R12,
+ Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14,*/ // return_pc
+/*Z_R15_H,Z_R15*/ // SP
+);
+
+
+// Special Class for Condition Code Flags Register
+
+reg_class z_condition_reg(
+ Z_CR
+);
+
+// Scratch register for late profiling. Callee saved.
+reg_class z_rscratch2_bits64_reg(Z_R2_H, Z_R2);
+
+
+// Float Register Classes
+
+reg_class z_flt_reg(
+ Z_F0,
+/*Z_F1,*/ // scratch
+ Z_F2,
+ Z_F3,
+ Z_F4,
+ Z_F5,
+ Z_F6,
+ Z_F7,
+ Z_F8,
+ Z_F9,
+ Z_F10,
+ Z_F11,
+ Z_F12,
+ Z_F13,
+ Z_F14,
+ Z_F15
+);
+reg_class z_rscratch1_flt_reg(Z_F1);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+reg_class z_dbl_reg(
+ Z_F0,Z_F0_H,
+/*Z_F1,Z_F1_H,*/ // scratch
+ Z_F2,Z_F2_H,
+ Z_F3,Z_F3_H,
+ Z_F4,Z_F4_H,
+ Z_F5,Z_F5_H,
+ Z_F6,Z_F6_H,
+ Z_F7,Z_F7_H,
+ Z_F8,Z_F8_H,
+ Z_F9,Z_F9_H,
+ Z_F10,Z_F10_H,
+ Z_F11,Z_F11_H,
+ Z_F12,Z_F12_H,
+ Z_F13,Z_F13_H,
+ Z_F14,Z_F14_H,
+ Z_F15,Z_F15_H
+);
+reg_class z_rscratch1_dbl_reg(Z_F1,Z_F1_H);
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define 'name --> value' mappings to inform the ADLC of an integer valued name.
+// Current support includes integer values in the range [0, 0x7FFFFFFF].
+// Format:
+// int_def <name> (<int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+// #define <name> (<expression>)
+// // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+// assert(<name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+ // The default cost (of an ALU instruction).
+ int_def DEFAULT_COST ( 100, 100);
+ int_def DEFAULT_COST_LOW ( 80, 80);
+ int_def DEFAULT_COST_HIGH ( 120, 120);
+ int_def HUGE_COST (1000000, 1000000);
+
+ // Put an advantage on REG_MEM vs. MEM+REG_REG operations.
+ int_def ALU_REG_COST ( 100, DEFAULT_COST);
+ int_def ALU_MEMORY_COST ( 150, 150);
+
+ // Memory refs are twice as expensive as run-of-the-mill.
+ int_def MEMORY_REF_COST_HI ( 220, 2 * DEFAULT_COST+20);
+ int_def MEMORY_REF_COST ( 200, 2 * DEFAULT_COST);
+ int_def MEMORY_REF_COST_LO ( 180, 2 * DEFAULT_COST-20);
+
+ // Branches are even more expensive.
+ int_def BRANCH_COST ( 300, DEFAULT_COST * 3);
+ int_def CALL_COST ( 300, DEFAULT_COST * 3);
+%}
+
+source %{
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str)
+#define BIND(label) __ bind(label)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+#define __ _masm.
+
+#define Z_DISP_SIZE Immediate::is_uimm12((long)opnd_array(1)->disp(ra_,this,2)) ? 4 : 6
+#define Z_DISP3_SIZE 6
+
+// Tertiary op of a LoadP or StoreP encoding.
+#define REGP_OP true
+
+// Given a register encoding, produce an Integer Register object.
+static Register reg_to_register_object(int register_encoding);
+
+// ****************************************************************************
+
+// REQUIRED FUNCTIONALITY
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+// from the start of the call to the point where the return address
+// will point.
+
+int MachCallStaticJavaNode::ret_addr_offset() {
+ if (_method) {
+ return 8;
+ } else {
+ return MacroAssembler::call_far_patchable_ret_addr_offset();
+ }
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+ // Consider size of receiver type profiling (C2 tiers).
+ int profile_receiver_type_size = 0;
+
+ int vtable_index = this->_vtable_index;
+ if (vtable_index == -4) {
+ return 14 + profile_receiver_type_size;
+ } else {
+ assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
+ return 36 + profile_receiver_type_size;
+ }
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+ return 12 + MacroAssembler::call_far_patchable_ret_addr_offset();
+}
+
+// Compute padding required for nodes which need alignment
+//
+// The addresses of the call instructions needs to be 4-byte aligned to
+// ensure that they don't span a cache line so that they are atomically patchable.
+// The actual calls get emitted at different offsets within the node emitters.
+// ins_alignment needs to be set to 2 which means that up to 1 nop may get inserted.
+
+int CallStaticJavaDirect_dynTOCNode::compute_padding(int current_offset) const {
+ return (0 - current_offset) & 2;
+}
+
+int CallDynamicJavaDirect_dynTOCNode::compute_padding(int current_offset) const {
+ return (6 - current_offset) & 2;
+}
+
+int CallRuntimeDirectNode::compute_padding(int current_offset) const {
+ return (12 - current_offset) & 2;
+}
+
+int CallLeafDirectNode::compute_padding(int current_offset) const {
+ return (12 - current_offset) & 2;
+}
+
+int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
+ return (12 - current_offset) & 2;
+}
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since z/Architecture does not have absolute addressing, it does.
+bool SafePointNode::needs_polling_address_input() {
+ return true;
+}
+
+void emit_nop(CodeBuffer &cbuf) {
+ MacroAssembler _masm(&cbuf);
+ __ z_nop();
+}
+
+// Emit an interrupt that is caught by the debugger (for debugging compiler).
+void emit_break(CodeBuffer &cbuf) {
+ MacroAssembler _masm(&cbuf);
+ __ z_illtrap();
+}
+
+#if !defined(PRODUCT)
+void MachBreakpointNode::format(PhaseRegAlloc *, outputStream *os) const {
+ os->print("TA");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+static inline void z_emit16(CodeBuffer &cbuf, long value) {
+ // 32bit instructions may become sign extended.
+ assert(value >= 0, "unintended sign extension (int->long)");
+ assert(value < (1L << 16), "instruction too large");
+ *((unsigned short*)(cbuf.insts_end())) = (unsigned short)value;
+ cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned short));
+}
+
+static inline void z_emit32(CodeBuffer &cbuf, long value) {
+ // 32bit instructions may become sign extended.
+ assert(value < (1L << 32), "instruction too large");
+ *((unsigned int*)(cbuf.insts_end())) = (unsigned int)value;
+ cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned int));
+}
+
+static inline void z_emit48(CodeBuffer &cbuf, long value) {
+ // 32bit instructions may become sign extended.
+ assert(value >= 0, "unintended sign extension (int->long)");
+ assert(value < (1L << 48), "instruction too large");
+ value = value<<16;
+ memcpy(cbuf.insts_end(), (unsigned char*)&value, 6);
+ cbuf.set_insts_end(cbuf.insts_end() + 6);
+}
+
+static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
+ if (value < 0) {
+ // There obviously has been an unintended sign extension (int->long). Revert it.
+ value = (long)((unsigned long)((unsigned int)value));
+ }
+
+ if (value < (1L << 16)) { // 2-byte instruction
+ z_emit16(cbuf, value);
+ return 2;
+ }
+
+ if (value < (1L << 32)) { // 4-byte instruction, might be unaligned store
+ z_emit32(cbuf, value);
+ return 4;
+ }
+
+ // 6-byte instruction, probably unaligned store.
+ z_emit48(cbuf, value);
+ return 6;
+}
+
+// Check effective address (at runtime) for required alignment.
+static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
+ MacroAssembler _masm(&cbuf);
+
+ __ z_lay(Z_R0, disp, index, base);
+ __ z_nill(Z_R0, alignment-1);
+ __ z_brc(Assembler::bcondEqual, +3);
+ __ z_illtrap();
+}
+
+int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
+ PhaseRegAlloc* ra_, bool is_native_call = false) {
+ __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
+ address old_mark = __ inst_mark();
+ unsigned int start_off = __ offset();
+
+ if (is_native_call) {
+ ShouldNotReachHere();
+ }
+
+ if (rtype == relocInfo::runtime_call_w_cp_type) {
+ assert((__ offset() & 2) == 0, "misaligned emit_call_reloc");
+ address call_addr = __ call_c_opt((address)entry_point);
+ if (call_addr == NULL) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return -1;
+ }
+ } else {
+ assert(rtype == relocInfo::none || rtype == relocInfo::opt_virtual_call_type ||
+ rtype == relocInfo::static_call_type, "unexpected rtype");
+ __ relocate(rtype);
+ // BRASL must be prepended with a nop to identify it in the instruction stream.
+ __ z_nop();
+ __ z_brasl(Z_R14, (address)entry_point);
+ }
+
+ unsigned int ret_off = __ offset();
+
+ return (ret_off - start_off);
+}
+
+static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
+ __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
+ address old_mark = __ inst_mark();
+ unsigned int start_off = __ offset();
+
+ relocInfo::relocType rtype = rspec.type();
+ assert(rtype == relocInfo::opt_virtual_call_type || rtype == relocInfo::static_call_type,
+ "unexpected rtype");
+
+ __ relocate(rspec);
+ __ z_nop();
+ __ z_brasl(Z_R14, (address)entry_point);
+
+ unsigned int ret_off = __ offset();
+
+ return (ret_off - start_off);
+}
+
+//=============================================================================
+
+const RegMask& MachConstantBaseNode::_out_RegMask = _Z_PTR_REG_mask;
+int Compile::ConstantTable::calculate_table_base_offset() const {
+ return 0; // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+ ShouldNotReachHere();
+}
+
+// Even with PC-relative TOC addressing, we still need this node.
+// Float loads/stores do not support PC-relative addresses.
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ MacroAssembler _masm(&cbuf);
+ Register Rtoc = as_Register(ra_->get_encode(this));
+ __ load_toc(Rtoc);
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+ // PCrelative TOC access.
+ return 6; // sizeof(LARL)
+}
+
+#if !defined(PRODUCT)
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+ Register r = as_Register(ra_->get_encode(this));
+ st->print("LARL %s,&constant_pool # MachConstantBaseNode", r->name());
+}
+#endif
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ Compile* C = ra_->C;
+ st->print_cr("--- MachPrologNode ---");
+ st->print("\t");
+ for (int i = 0; i < OptoPrologueNops; i++) {
+ st->print_cr("NOP"); st->print("\t");
+ }
+
+ if (VerifyThread) {
+ st->print_cr("Verify_Thread");
+ st->print("\t");
+ }
+
+ long framesize = C->frame_size_in_bytes();
+ int bangsize = C->bang_size_in_bytes();
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be
+ // careful, because some VM calls (such as call site linkage) can
+ // use several kilobytes of stack. But the stack safety zone should
+ // account for that. See bugs 4446381, 4468289, 4497237.
+ if (C->need_stack_bang(bangsize) && UseStackBanging) {
+ st->print_cr("# stack bang"); st->print("\t");
+ }
+ st->print_cr("push_frame %d", (int)-framesize);
+ st->print("\t");
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ Compile* C = ra_->C;
+ MacroAssembler _masm(&cbuf);
+
+ __ verify_thread();
+
+ size_t framesize = C->frame_size_in_bytes();
+ size_t bangsize = C->bang_size_in_bytes();
+
+ assert(framesize % wordSize == 0, "must preserve wordSize alignment");
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be
+ // careful, because some VM calls (such as call site linkage) can
+ // use several kilobytes of stack. But the stack safety zone should
+ // account for that. See bugs 4446381, 4468289, 4497237.
+ if (C->need_stack_bang(bangsize) && UseStackBanging) {
+ __ generate_stack_overflow_check(bangsize);
+ }
+
+ assert(Immediate::is_uimm32((long)framesize), "to do: choose suitable types!");
+ __ save_return_pc();
+
+ // The z/Architecture abi is already accounted for in `framesize' via the
+ // 'out_preserve_stack_slots' declaration.
+ __ push_frame((unsigned int)framesize/*includes JIT ABI*/);
+
+ if (C->has_mach_constant_base_node()) {
+ // NOTE: We set the table base offset here because users might be
+ // emitted before MachConstantBaseNode.
+ Compile::ConstantTable& constant_table = C->constant_table();
+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+ }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+ // Variable size. Determine dynamically.
+ return MachNode::size(ra_);
+}
+
+int MachPrologNode::reloc() const {
+ // Return number of relocatable values contained in this instruction.
+ return 1; // One reloc entry for load_const(toc).
+}
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+ os->print_cr("epilog");
+ os->print("\t");
+ if (do_polling() && ra_->C->is_method_compilation()) {
+ os->print_cr("load_from_polling_page Z_R1_scratch");
+ os->print("\t");
+ }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+ Compile* C = ra_->C;
+ __ verify_thread();
+
+ // If this does safepoint polling, then do it here.
+ bool need_polling = do_polling() && C->is_method_compilation();
+
+ // Touch the polling page.
+ // Part 1: get the page's address.
+ if (need_polling) {
+ AddressLiteral pp(os::get_polling_page());
+ __ load_const_optimized(Z_R1_scratch, pp);
+ }
+
+ // Pop frame, restore return_pc, and all stuff needed by interpreter.
+ // Pop frame by add insted of load (a penny saved is a penny got :-).
+ int frame_size_in_bytes = Assembler::align((C->frame_slots() << LogBytesPerInt), frame::alignment_in_bytes);
+ int retPC_offset = frame_size_in_bytes + _z_abi16(return_pc);
+ if (Displacement::is_validDisp(retPC_offset)) {
+ __ z_lg(Z_R14, retPC_offset, Z_SP);
+ __ add2reg(Z_SP, frame_size_in_bytes);
+ } else {
+ __ add2reg(Z_SP, frame_size_in_bytes);
+ __ restore_return_pc();
+ }
+
+ // Touch the polling page,
+ // part 2: touch the page now.
+ if (need_polling) {
+ // We need to mark the code position where the load from the safepoint
+ // polling page was emitted as relocInfo::poll_return_type here.
+ __ relocate(relocInfo::poll_return_type);
+ __ load_from_polling_page(Z_R1_scratch);
+ }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+ // variable size. determine dynamically.
+ return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+ // Return number of relocatable values contained in this instruction.
+ return 1; // One for load_from_polling_page.
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+ return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const {
+ assert(do_polling(), "no return for this epilog node");
+ return 0;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+ // Return the register class for the given register. The given register
+ // reg is a <register>_num value, which is an index into the MachRegisterNumbers
+ // enumeration in adGlobals_s390.hpp.
+
+ if (reg == OptoReg::Bad) {
+ return rc_bad;
+ }
+
+ // We have 32 integer register halves, starting at index 0.
+ if (reg < 32) {
+ return rc_int;
+ }
+
+ // We have 32 floating-point register halves, starting at index 32.
+ if (reg < 32+32) {
+ return rc_float;
+ }
+
+ // Between float regs & stack are the flags regs.
+ assert(reg >= OptoReg::stack0(), "blow up if spilling flags");
+ return rc_stack;
+}
+
+// Returns size as obtained from z_emit_instr.
+static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigned long opcode,
+ int reg, int offset, bool do_print, outputStream *os) {
+
+ if (cbuf) {
+ if (opcode > (1L<<32)) {
+ return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 48) |
+ Assembler::simm20(offset) | Assembler::reg(Z_R0, 12, 48) | Assembler::regz(Z_SP, 16, 48));
+ } else {
+ return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 32) |
+ Assembler::uimm12(offset, 20, 32) | Assembler::reg(Z_R0, 12, 32) | Assembler::regz(Z_SP, 16, 32));
+ }
+ }
+
+#if !defined(PRODUCT)
+ if (do_print) {
+ os->print("%s %s,#%d[,SP]\t # MachCopy spill code",op_str, Matcher::regName[reg], offset);
+ }
+#endif
+ return (opcode > (1L << 32)) ? 6 : 4;
+}
+
+static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ __ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
+ }
+
+#if !defined(PRODUCT)
+ else if (do_print) {
+ os->print("MVC %d(%d,SP),%d(SP)\t # MachCopy spill code",dst_off, len, src_off);
+ }
+#endif
+
+ return 6;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *os) const {
+ // Get registers to move.
+ OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+ OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+ OptoReg::Name dst_hi = ra_->get_reg_second(this);
+ OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+ enum RC src_hi_rc = rc_class(src_hi);
+ enum RC src_lo_rc = rc_class(src_lo);
+ enum RC dst_hi_rc = rc_class(dst_hi);
+ enum RC dst_lo_rc = rc_class(dst_lo);
+
+ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+ bool is64 = (src_hi_rc != rc_bad);
+ assert(!is64 ||
+ ((src_lo&1) == 0 && src_lo+1 == src_hi && (dst_lo&1) == 0 && dst_lo+1 == dst_hi),
+ "expected aligned-adjacent pairs");
+
+ // Generate spill code!
+
+ if (src_lo == dst_lo && src_hi == dst_hi) {
+ return 0; // Self copy, no move.
+ }
+
+ int src_offset = ra_->reg2offset(src_lo);
+ int dst_offset = ra_->reg2offset(dst_lo);
+ bool print = !do_size;
+ bool src12 = Immediate::is_uimm12(src_offset);
+ bool dst12 = Immediate::is_uimm12(dst_offset);
+
+ const char *mnemo = NULL;
+ unsigned long opc = 0;
+
+ // Memory->Memory Spill. Use Z_R0 to hold the value.
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+
+ assert(!is64 || (src_hi_rc==rc_stack && dst_hi_rc==rc_stack),
+ "expected same type of move for high parts");
+
+ if (src12 && dst12) {
+ return z_mvc_helper(cbuf, is64 ? 8 : 4, dst_offset, src_offset, print, os);
+ }
+
+ int r0 = Z_R0_num;
+ if (is64) {
+ return z_ld_st_helper(cbuf, "LG ", LG_ZOPC, r0, src_offset, print, os) +
+ z_ld_st_helper(cbuf, "STG ", STG_ZOPC, r0, dst_offset, print, os);
+ }
+
+ return z_ld_st_helper(cbuf, "LY ", LY_ZOPC, r0, src_offset, print, os) +
+ z_ld_st_helper(cbuf, "STY ", STY_ZOPC, r0, dst_offset, print, os);
+ }
+
+ // Check for float->int copy. Requires a trip through memory.
+ if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
+ Unimplemented(); // Unsafe, do not remove!
+ }
+
+ // Check for integer reg-reg copy.
+ if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
+ Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
+ __ z_lgr(Rdst, Rsrc);
+ return 4;
+ }
+#if !defined(PRODUCT)
+ // else
+ if (print) {
+ os->print("LGR %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+ }
+#endif
+ return 4;
+ }
+
+ // Check for integer store.
+ if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
+ assert(!is64 || (src_hi_rc==rc_int && dst_hi_rc==rc_stack),
+ "expected same type of move for high parts");
+
+ if (is64) {
+ return z_ld_st_helper(cbuf, "STG ", STG_ZOPC, src_lo, dst_offset, print, os);
+ }
+
+ // else
+ mnemo = dst12 ? "ST " : "STY ";
+ opc = dst12 ? ST_ZOPC : STY_ZOPC;
+
+ return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+ }
+
+ // Check for integer load
+ // Always load cOops zero-extended. That doesn't hurt int loads.
+ if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
+
+ assert(!is64 || (dst_hi_rc==rc_int && src_hi_rc==rc_stack),
+ "expected same type of move for high parts");
+
+ mnemo = is64 ? "LG " : "LLGF";
+ opc = is64 ? LG_ZOPC : LLGF_ZOPC;
+
+ return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+ }
+
+ // Check for float reg-reg copy.
+ if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
+ FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
+ __ z_ldr(Rdst, Rsrc);
+ return 2;
+ }
+#if !defined(PRODUCT)
+ // else
+ if (print) {
+ os->print("LDR %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+ }
+#endif
+ return 2;
+ }
+
+ // Check for float store.
+ if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
+ assert(!is64 || (src_hi_rc==rc_float && dst_hi_rc==rc_stack),
+ "expected same type of move for high parts");
+
+ if (is64) {
+ mnemo = dst12 ? "STD " : "STDY ";
+ opc = dst12 ? STD_ZOPC : STDY_ZOPC;
+ return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+ }
+ // else
+
+ mnemo = dst12 ? "STE " : "STEY ";
+ opc = dst12 ? STE_ZOPC : STEY_ZOPC;
+ return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+ }
+
+ // Check for float load.
+ if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
+ assert(!is64 || (dst_hi_rc==rc_float && src_hi_rc==rc_stack),
+ "expected same type of move for high parts");
+
+ if (is64) {
+ mnemo = src12 ? "LD " : "LDY ";
+ opc = src12 ? LD_ZOPC : LDY_ZOPC;
+ return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+ }
+ // else
+
+ mnemo = src12 ? "LE " : "LEY ";
+ opc = src12 ? LE_ZOPC : LEY_ZOPC;
+ return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+ }
+
+ // --------------------------------------------------------------------
+ // Check for hi bits still needing moving. Only happens for misaligned
+ // arguments to native calls.
+ if (src_hi == dst_hi) {
+ return 0; // Self copy, no move.
+ }
+
+ assert(is64 && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
+ Unimplemented(); // Unsafe, do not remove!
+
+ return 0; // never reached, but make the compiler shut up!
+}
+
+#if !defined(PRODUCT)
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+ if (ra_ && ra_->node_regs_max_index() > 0) {
+ implementation(NULL, ra_, false, os);
+ } else {
+ if (req() == 2 && in(1)) {
+ os->print("N%d = N%d\n", _idx, in(1)->_idx);
+ } else {
+ const char *c = "(";
+ os->print("N%d = ", _idx);
+ for (uint i = 1; i < req(); ++i) {
+ os->print("%sN%d", c, in(i)->_idx);
+ c = ", ";
+ }
+ os->print(")");
+ }
+ }
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+ return implementation(NULL, ra_, true, NULL);
+}
+
+//=============================================================================
+
+#if !defined(PRODUCT)
+void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
+ os->print("NOP # pad for alignment (%d nops, %d bytes)", _count, _count*MacroAssembler::nop_size());
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
+ MacroAssembler _masm(&cbuf);
+
+ int rem_space = 0;
+ if (!(ra_->C->in_scratch_emit_size())) {
+ rem_space = cbuf.insts()->remaining();
+ if (rem_space <= _count*2 + 8) {
+ tty->print("NopNode: _count = %3.3d, remaining space before = %d", _count, rem_space);
+ }
+ }
+
+ for (int i = 0; i < _count; i++) {
+ __ z_nop();
+ }
+
+ if (!(ra_->C->in_scratch_emit_size())) {
+ if (rem_space <= _count*2 + 8) {
+ int rem_space2 = cbuf.insts()->remaining();
+ tty->print_cr(", after = %d", rem_space2);
+ }
+ }
+}
+
+uint MachNopNode::size(PhaseRegAlloc *ra_) const {
+ return 2 * _count;
+}
+
+#if !defined(PRODUCT)
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ if (ra_ && ra_->node_regs_max_index() > 0) {
+ int reg = ra_->get_reg_first(this);
+ os->print("ADDHI %s, SP, %d\t//box node", Matcher::regName[reg], offset);
+ } else {
+ os->print("ADDHI N%d = SP + %d\t// box node", _idx, offset);
+ }
+}
+#endif
+
+// Take care of the size function, if you make changes here!
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ int reg = ra_->get_encode(this);
+ __ z_lay(as_Register(reg), offset, Z_SP);
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
+ return 6;
+}
+
+ %} // end source section
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+//--------------------------------------------------------------
+// Used for optimization in Compile::Shorten_branches
+//--------------------------------------------------------------
+
+class CallStubImpl {
+ public:
+
+ // call trampolines
+ // Size of call trampoline stub. For add'l comments, see size_java_to_interp().
+ static uint size_call_trampoline() {
+ return 0; // no call trampolines on this platform
+ }
+
+ // call trampolines
+ // Number of relocations needed by a call trampoline stub.
+ static uint reloc_call_trampoline() {
+ return 0; // No call trampolines on this platform.
+ }
+};
+
+%} // end source_hpp section
+
+source %{
+
+#if !defined(PRODUCT)
+void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
+ os->print_cr("---- MachUEPNode ----");
+ os->print_cr("\tTA");
+ os->print_cr("\tload_const Z_R1, SharedRuntime::get_ic_miss_stub()");
+ os->print_cr("\tBR(Z_R1)");
+ os->print_cr("\tTA # pad with illtraps");
+ os->print_cr("\t...");
+ os->print_cr("\tTA");
+ os->print_cr("\tLTGR Z_R2, Z_R2");
+ os->print_cr("\tBRU ic_miss");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+ const int ic_miss_offset = 2;
+
+ // Inline_cache contains a klass.
+ Register ic_klass = as_Register(Matcher::inline_cache_reg_encode());
+ // ARG1 is the receiver oop.
+ Register R2_receiver = Z_ARG1;
+ int klass_offset = oopDesc::klass_offset_in_bytes();
+ AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
+ Register R1_ic_miss_stub_addr = Z_R1_scratch;
+
+ // Null check of receiver.
+ // This is the null check of the receiver that actually should be
+ // done in the caller. It's here because in case of implicit null
+ // checks we get it for free.
+ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
+ "second word in oop should not require explicit null check.");
+ if (!ImplicitNullChecks) {
+ Label valid;
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgij(R2_receiver, 0, Assembler::bcondNotEqual, valid);
+ } else {
+ __ z_ltgr(R2_receiver, R2_receiver);
+ __ z_bre(valid);
+ }
+ // The ic_miss_stub will handle the null pointer exception.
+ __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
+ __ z_br(R1_ic_miss_stub_addr);
+ __ bind(valid);
+ }
+
+ // Check whether this method is the proper implementation for the class of
+ // the receiver (ic miss check).
+ {
+ Label valid;
+ // Compare cached class against klass from receiver.
+ // This also does an implicit null check!
+ __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
+ __ z_bre(valid);
+ // The inline cache points to the wrong method. Call the
+ // ic_miss_stub to find the proper method.
+ __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
+ __ z_br(R1_ic_miss_stub_addr);
+ __ bind(valid);
+ }
+
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+ // Determine size dynamically.
+ return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+%} // interrupt source section
+
+source_hpp %{ // Header information of the source block.
+
+class HandlerImpl {
+ public:
+
+ static int emit_exception_handler(CodeBuffer &cbuf);
+ static int emit_deopt_handler(CodeBuffer& cbuf);
+
+ static uint size_exception_handler() {
+ return NativeJump::max_instruction_size();
+ }
+
+ static uint size_deopt_handler() {
+ return NativeCall::max_instruction_size();
+ }
+};
+
+%} // end source_hpp section
+
+source %{
+
+// This exception handler code snippet is placed after the method's
+// code. It is the return point if an exception occurred. it jumps to
+// the exception blob.
+//
+// If the method gets deoptimized, the method and this code snippet
+// get patched.
+//
+// 1) Trampoline code gets patched into the end of this exception
+// handler. the trampoline code jumps to the deoptimization blob.
+//
+// 2) The return address in the method's code will get patched such
+// that it jumps to the trampoline.
+//
+// 3) The handler will get patched such that it does not jump to the
+// exception blob, but to an entry in the deoptimization blob being
+// aware of the exception.
+int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
+ Register temp_reg = Z_R1;
+ MacroAssembler _masm(&cbuf);
+
+ address base = __ start_a_stub(size_exception_handler());
+ if (base == NULL) {
+ return 0; // CodeBuffer::expand failed
+ }
+
+ int offset = __ offset();
+ // Use unconditional pc-relative jump with 32-bit range here.
+ __ load_const_optimized(temp_reg, (address)OptoRuntime::exception_blob()->content_begin());
+ __ z_br(temp_reg);
+
+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+
+ __ end_a_stub();
+
+ return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+ MacroAssembler _masm(&cbuf);
+ address base = __ start_a_stub(size_deopt_handler());
+
+ if (base == NULL) {
+ return 0; // CodeBuffer::expand failed
+ }
+
+ int offset = __ offset();
+
+ // Size_deopt_handler() must be exact on zarch, so for simplicity
+ // we do not use load_const_opt here.
+ __ load_const(Z_R1, SharedRuntime::deopt_blob()->unpack());
+ __ call(Z_R1);
+ assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
+
+ __ end_a_stub();
+ return offset;
+}
+
+//=============================================================================
+
+
+// Given a register encoding, produce an Integer Register object.
+static Register reg_to_register_object(int register_encoding) {
+ assert(Z_R12->encoding() == Z_R12_enc, "wrong coding");
+ return as_Register(register_encoding);
+}
+
+const bool Matcher::match_rule_supported(int opcode) {
+ if (!has_match_rule(opcode)) return false;
+
+ switch (opcode) {
+ case Op_CountLeadingZerosI:
+ case Op_CountLeadingZerosL:
+ case Op_CountTrailingZerosI:
+ case Op_CountTrailingZerosL:
+ // Implementation requires FLOGR instruction.
+ return UseCountLeadingZerosInstruction;
+
+ case Op_ReverseBytesI:
+ case Op_ReverseBytesL:
+ return UseByteReverseInstruction;
+
+ // PopCount supported by H/W from z/Architecture G5 (z196) on.
+ case Op_PopCountI:
+ case Op_PopCountL:
+ return UsePopCountInstruction && VM_Version::has_PopCount();
+
+ case Op_StrComp:
+ return SpecialStringCompareTo;
+ case Op_StrEquals:
+ return SpecialStringEquals;
+ case Op_StrIndexOf:
+ case Op_StrIndexOfChar:
+ return SpecialStringIndexOf;
+
+ case Op_GetAndAddI:
+ case Op_GetAndAddL:
+ return true;
+ // return VM_Version::has_AtomicMemWithImmALUOps();
+ case Op_GetAndSetI:
+ case Op_GetAndSetL:
+ case Op_GetAndSetP:
+ case Op_GetAndSetN:
+ return true; // General CAS implementation, always available.
+
+ default:
+ return true; // Per default match rules are supported.
+ // BUT: make sure match rule is not disabled by a false predicate!
+ }
+
+ return true; // Per default match rules are supported.
+ // BUT: make sure match rule is not disabled by a false predicate!
+}
+
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+ // TODO
+ // Identify extra cases that we might want to provide match rules for
+ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
+ bool ret_value = match_rule_supported(opcode);
+ // Add rules here.
+
+ return ret_value; // Per default match rules are supported.
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+ ShouldNotReachHere();
+ return regnum - 32; // The FP registers are in the second chunk.
+}
+
+const bool Matcher::has_predicated_vectors(void) {
+ return false;
+}
+
+const int Matcher::float_pressure(int default_pressure_threshold) {
+ return default_pressure_threshold;
+}
+
+const bool Matcher::convL2FSupported(void) {
+ return true; // False means that conversion is done by runtime call.
+}
+
+//----------SUPERWORD HELPERS----------------------------------------
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+ assert(MaxVectorSize == 8, "");
+ return 8;
+}
+
+// Vector ideal reg.
+const int Matcher::vector_ideal_reg(int size) {
+ assert(MaxVectorSize == 8 && size == 8, "");
+ return Op_RegL;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+ assert(is_java_primitive(bt), "only primitive type vectors");
+ return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+ return max_vector_size(bt); // Same as max.
+}
+
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+ fatal("vector shift is not supported");
+ return Node::NotAMachineReg;
+}
+
+// z/Architecture does support misaligned store/load at minimal extra cost.
+const bool Matcher::misaligned_vectors_ok() {
+ return true;
+}
+
+// Not yet ported to z/Architecture.
+const bool Matcher::pass_original_key_for_aes() {
+ return false;
+}
+
+// RETURNS: whether this branch offset is short enough that a short
+// branch can be used.
+//
+// If the platform does not provide any short branch variants, then
+// this method should return `false' for offset 0.
+//
+// `Compile::Fill_buffer' will decide on basis of this information
+// whether to do the pass `Compile::Shorten_branches' at all.
+//
+// And `Compile::Shorten_branches' will decide on basis of this
+// information whether to replace particular branch sites by short
+// ones.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+ // On zarch short branches use a 16 bit signed immediate that
+ // is the pc-relative offset in halfword (= 2 bytes) units.
+ return Assembler::is_within_range_of_RelAddr16((address)((long)offset), (address)0);
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+ // Probably always true, even if a temp register is required.
+ return true;
+}
+
+// Should correspond to setting above
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
+const int Matcher::long_cmove_cost() { return ConditionalMoveLimit; }
+
+// Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
+// Does the CPU require postalloc expand (see block.cpp for description of postalloc expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+// 32bit shifts mask in emitter, 64bit shifts need no mask.
+// Constant shift counts are handled in Ideal phase.
+const bool Matcher::need_masked_shift_count = false;
+
+// Set this as clone_shift_expressions.
+bool Matcher::narrow_oop_use_complex_address() {
+ if (Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0) return true;
+ return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+ NOT_LP64(ShouldNotCallThis());
+ assert(UseCompressedClassPointers, "only for compressed klass code");
+ // TODO HS25: z port if (MatchDecodeNodes) return true;
+ return false;
+}
+
+bool Matcher::const_oop_prefer_decode() {
+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
+ return Universe::narrow_oop_base() == NULL;
+}
+
+bool Matcher::const_klass_prefer_decode() {
+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
+ return Universe::narrow_klass_base() == NULL;
+}
+
+// Is it better to copy float constants, or load them directly from memory?
+// Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed. Else we split the double into 2 integer pieces and move it
+// piece-by-piece. Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = true;
+
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Do floats take an entire double register or just half?
+//
+// A float in resides in a zarch double register. When storing it by
+// z_std, it cannot be restored in C-code by reloading it as a double
+// and casting it into a float afterwards.
+bool Matcher::float_in_double() { return false; }
+
+// Do ints take an entire long register or just half?
+// The relevant question is how the int is callee-saved:
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits.
+const bool Matcher::int_in_long = true;
+
+// Constants for c2c and c calling conventions.
+
+const MachRegisterNumbers z_iarg_reg[5] = {
+ Z_R2_num, Z_R3_num, Z_R4_num, Z_R5_num, Z_R6_num
+};
+
+const MachRegisterNumbers z_farg_reg[4] = {
+ Z_F0_num, Z_F2_num, Z_F4_num, Z_F6_num
+};
+
+const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+
+const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+// Return whether or not this register is ever used as an argument. This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg(int reg) {
+ // We return true for all registers contained in z_iarg_reg[] and
+ // z_farg_reg[] and their virtual halves.
+ // We must include the virtual halves in order to get STDs and LDs
+ // instead of STWs and LWs in the trampoline stubs.
+
+ if (reg == Z_R2_num || reg == Z_R2_H_num ||
+ reg == Z_R3_num || reg == Z_R3_H_num ||
+ reg == Z_R4_num || reg == Z_R4_H_num ||
+ reg == Z_R5_num || reg == Z_R5_H_num ||
+ reg == Z_R6_num || reg == Z_R6_H_num) {
+ return true;
+ }
+
+ if (reg == Z_F0_num || reg == Z_F0_H_num ||
+ reg == Z_F2_num || reg == Z_F2_H_num ||
+ reg == Z_F4_num || reg == Z_F4_H_num ||
+ reg == Z_F6_num || reg == Z_F6_H_num) {
+ return true;
+ }
+
+ return false;
+}
+
+bool Matcher::is_spillable_arg(int reg) {
+ return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+ return false;
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+ return _Z_RARG4_INT_REG_mask;
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+ return _Z_RARG3_INT_REG_mask;
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+ return _Z_RARG4_LONG_REG_mask;
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+ return _Z_RARG3_LONG_REG_mask;
+}
+
+// Copied from sparc.
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+ return RegMask();
+}
+
+const bool Matcher::convi2l_type_required = true;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
+%} // source
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams. Encoding classes are parameterized macros used by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction. Operands specify their base encoding interface with the
+// interface keyword. There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried. CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried. MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried. COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+//
+// Instructions specify two basic values for encoding. Again, a function
+// is available to check if the constant displacement is an oop. They use the
+// ins_encode keyword to specify their encoding classes (which must be
+// a sequence of enc_class names, and their parameters, specified in
+// the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode. Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+ enc_class enc_unimplemented %{
+ MacroAssembler _masm(&cbuf);
+ __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
+ %}
+
+ enc_class enc_untested %{
+#ifdef ASSERT
+ MacroAssembler _masm(&cbuf);
+ __ untested("Untested mach node encoding in AD file.");
+#endif
+ %}
+
+ enc_class z_rrform(iRegI dst, iRegI src) %{
+ assert((($primary >> 14) & 0x03) == 0, "Instruction format error");
+ assert( ($primary >> 16) == 0, "Instruction format error");
+ z_emit16(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,16) |
+ Assembler::reg($src$$reg,12,16));
+ %}
+
+ enc_class z_rreform(iRegI dst1, iRegI src2) %{
+ assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst1$$reg,24,32) |
+ Assembler::reg($src2$$reg,28,32));
+ %}
+
+ enc_class z_rrfform(iRegI dst1, iRegI src2, iRegI src3) %{
+ assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst1$$reg,24,32) |
+ Assembler::reg($src2$$reg,28,32) |
+ Assembler::reg($src3$$reg,16,32));
+ %}
+
+ enc_class z_riform_signed(iRegI dst, immI16 src) %{
+ assert((($primary>>30) & 0x03) == 2, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,32) |
+ Assembler::simm16($src$$constant,16,32));
+ %}
+
+ enc_class z_riform_unsigned(iRegI dst, uimmI16 src) %{
+ assert((($primary>>30) & 0x03) == 2, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,32) |
+ Assembler::uimm16($src$$constant,16,32));
+ %}
+
+ enc_class z_rieform_d(iRegI dst1, iRegI src3, immI src2) %{
+ assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst1$$reg,8,48) |
+ Assembler::reg($src3$$reg,12,48) |
+ Assembler::simm16($src2$$constant,16,48));
+ %}
+
+ enc_class z_rilform_signed(iRegI dst, immL32 src) %{
+ assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::simm32($src$$constant,16,48));
+ %}
+
+ enc_class z_rilform_unsigned(iRegI dst, uimmL32 src) %{
+ assert((($primary>>46) & 0x03) == 3, "Instruction format error");
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::uimm32($src$$constant,16,48));
+ %}
+
+ enc_class z_rsyform_const(iRegI dst, iRegI src1, immI src2) %{
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::reg($src1$$reg,12,48) |
+ Assembler::simm20($src2$$constant));
+ %}
+
+ enc_class z_rsyform_reg_reg(iRegI dst, iRegI src, iRegI shft) %{
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::reg($src$$reg,12,48) |
+ Assembler::reg($shft$$reg,16,48) |
+ Assembler::simm20(0));
+ %}
+
+ enc_class z_rxform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
+ assert((($primary>>30) & 0x03) == 1, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,32) |
+ Assembler::reg($src1$$reg,12,32) |
+ Assembler::reg($src2$$reg,16,32) |
+ Assembler::uimm12($con$$constant,20,32));
+ %}
+
+ enc_class z_rxform_imm_reg(iRegL dst, immL con, iRegL src) %{
+ assert((($primary>>30) & 0x03) == 1, "Instruction format error");
+ z_emit32(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,32) |
+ Assembler::reg($src$$reg,16,32) |
+ Assembler::uimm12($con$$constant,20,32));
+ %}
+
+ enc_class z_rxyform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::reg($src1$$reg,12,48) |
+ Assembler::reg($src2$$reg,16,48) |
+ Assembler::simm20($con$$constant));
+ %}
+
+ enc_class z_rxyform_imm_reg(iRegL dst, immL con, iRegL src) %{
+ z_emit48(cbuf, $primary |
+ Assembler::reg($dst$$reg,8,48) |
+ Assembler::reg($src$$reg,16,48) |
+ Assembler::simm20($con$$constant));
+ %}
+
+ // Direct memory arithmetic.
+ enc_class z_siyform(memoryRSY mem, immI8 src) %{
+ int disp = $mem$$disp;
+ Register base = reg_to_register_object($mem$$base);
+ int con = $src$$constant;
+
+ assert(VM_Version::has_MemWithImmALUOps(), "unsupported CPU");
+ z_emit_inst(cbuf, $primary |
+ Assembler::regz(base,16,48) |
+ Assembler::simm20(disp) |
+ Assembler::simm8(con,8,48));
+ %}
+
+ enc_class z_silform(memoryRS mem, immI16 src) %{
+ z_emit_inst(cbuf, $primary |
+ Assembler::regz(reg_to_register_object($mem$$base),16,48) |
+ Assembler::uimm12($mem$$disp,20,48) |
+ Assembler::simm16($src$$constant,32,48));
+ %}
+
+ // Encoder for FP ALU reg/mem instructions (support only short displacements).
+ enc_class z_form_rt_memFP(RegF dst, memoryRX mem) %{
+ Register Ridx = $mem$$index$$Register;
+ if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+ if ($primary > (1L << 32)) {
+ z_emit_inst(cbuf, $primary |
+ Assembler::reg($dst$$reg, 8, 48) |
+ Assembler::uimm12($mem$$disp, 20, 48) |
+ Assembler::reg(Ridx, 12, 48) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+ } else {
+ z_emit_inst(cbuf, $primary |
+ Assembler::reg($dst$$reg, 8, 32) |
+ Assembler::uimm12($mem$$disp, 20, 32) |
+ Assembler::reg(Ridx, 12, 32) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, 32));
+ }
+ %}
+
+ enc_class z_form_rt_mem(iRegI dst, memory mem) %{
+ Register Ridx = $mem$$index$$Register;
+ if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+ if ($primary > (1L<<32)) {
+ z_emit_inst(cbuf, $primary |
+ Assembler::reg($dst$$reg, 8, 48) |
+ Assembler::simm20($mem$$disp) |
+ Assembler::reg(Ridx, 12, 48) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+ } else {
+ z_emit_inst(cbuf, $primary |
+ Assembler::reg($dst$$reg, 8, 32) |
+ Assembler::uimm12($mem$$disp, 20, 32) |
+ Assembler::reg(Ridx, 12, 32) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, 32));
+ }
+ %}
+
+ enc_class z_form_rt_mem_opt(iRegI dst, memory mem) %{
+ int isize = $secondary > 1L << 32 ? 48 : 32;
+ Register Ridx = $mem$$index$$Register;
+ if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
+
+ if (Displacement::is_shortDisp((long)$mem$$disp)) {
+ z_emit_inst(cbuf, $secondary |
+ Assembler::reg($dst$$reg, 8, isize) |
+ Assembler::uimm12($mem$$disp, 20, isize) |
+ Assembler::reg(Ridx, 12, isize) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, isize));
+ } else if (Displacement::is_validDisp((long)$mem$$disp)) {
+ z_emit_inst(cbuf, $primary |
+ Assembler::reg($dst$$reg, 8, 48) |
+ Assembler::simm20($mem$$disp) |
+ Assembler::reg(Ridx, 12, 48) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
+ } else {
+ MacroAssembler _masm(&cbuf);
+ __ load_const_optimized(Z_R1_scratch, $mem$$disp);
+ if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
+ z_emit_inst(cbuf, $secondary |
+ Assembler::reg($dst$$reg, 8, isize) |
+ Assembler::uimm12(0, 20, isize) |
+ Assembler::reg(Z_R1_scratch, 12, isize) |
+ Assembler::regz(reg_to_register_object($mem$$base), 16, isize));
+ }
+ %}
+
+ enc_class z_enc_brul(Label lbl) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+ __ z_brul(l);
+ %}
+
+ enc_class z_enc_bru(Label lbl) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+ __ z_bru(l);
+ %}
+
+ enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+ __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+ %}
+
+ enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+ __ z_brc((Assembler::branch_condition)$cmp$$cmpcode, l);
+ %}
+
+ enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+ Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+ unsigned long instr = $primary;
+ if (instr == CRJ_ZOPC) {
+ __ z_crj($src1$$Register, $src2$$Register, cc, l);
+ } else if (instr == CLRJ_ZOPC) {
+ __ z_clrj($src1$$Register, $src2$$Register, cc, l);
+ } else if (instr == CGRJ_ZOPC) {
+ __ z_cgrj($src1$$Register, $src2$$Register, cc, l);
+ } else {
+ guarantee(instr == CLGRJ_ZOPC, "opcode not implemented");
+ __ z_clgrj($src1$$Register, $src2$$Register, cc, l);
+ }
+ %}
+
+ enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+
+ unsigned long instr = $primary;
+ if (instr == CR_ZOPC) {
+ __ z_cr($src1$$Register, $src2$$Register);
+ } else if (instr == CLR_ZOPC) {
+ __ z_clr($src1$$Register, $src2$$Register);
+ } else if (instr == CGR_ZOPC) {
+ __ z_cgr($src1$$Register, $src2$$Register);
+ } else {
+ guarantee(instr == CLGR_ZOPC, "opcode not implemented");
+ __ z_clgr($src1$$Register, $src2$$Register);
+ }
+
+ __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+ %}
+
+ enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+
+ Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+ unsigned long instr = $primary;
+ if (instr == CIJ_ZOPC) {
+ __ z_cij($src1$$Register, $src2$$constant, cc, l);
+ } else if (instr == CLIJ_ZOPC) {
+ __ z_clij($src1$$Register, $src2$$constant, cc, l);
+ } else if (instr == CGIJ_ZOPC) {
+ __ z_cgij($src1$$Register, $src2$$constant, cc, l);
+ } else {
+ guarantee(instr == CLGIJ_ZOPC, "opcode not implemented");
+ __ z_clgij($src1$$Register, $src2$$constant, cc, l);
+ }
+ %}
+
+ enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
+ MacroAssembler _masm(&cbuf);
+ Label* p = $lbl$$label;
+
+ // 'p' is `NULL' when this encoding class is used only to
+ // determine the size of the encoded instruction.
+ // Use a bound dummy label in that case.
+ Label d;
+ __ bind(d);
+ Label& l = (NULL == p) ? d : *(p);
+
+ unsigned long instr = $primary;
+ if (instr == CHI_ZOPC) {
+ __ z_chi($src1$$Register, $src2$$constant);
+ } else if (instr == CLFI_ZOPC) {
+ __ z_clfi($src1$$Register, $src2$$constant);
+ } else if (instr == CGHI_ZOPC) {
+ __ z_cghi($src1$$Register, $src2$$constant);
+ } else {
+ guarantee(instr == CLGFI_ZOPC, "opcode not implemented");
+ __ z_clgfi($src1$$Register, $src2$$constant);
+ }
+
+ __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l);
+ %}
+
+ // Call from Java to runtime.
+ enc_class z_enc_java_to_runtime_call(method meth) %{
+ MacroAssembler _masm(&cbuf);
+
+ // Save return pc before call to the place where we need it, since
+ // callee doesn't.
+ unsigned int start_off = __ offset();
+ // Compute size of "larl + stg + call_c_opt".
+ const int size_of_code = 6 + 6 + MacroAssembler::call_far_patchable_size();
+ __ get_PC(Z_R14, size_of_code);
+ __ save_return_pc();
+ assert(__ offset() - start_off == 12, "bad prelude len: %d", __ offset() - start_off);
+
+ assert((__ offset() & 2) == 0, "misaligned z_enc_java_to_runtime_call");
+ address call_addr = __ call_c_opt((address)$meth$$method);
+ if (call_addr == NULL) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+
+#ifdef ASSERT
+ // Plausibility check for size_of_code assumptions.
+ unsigned int actual_ret_off = __ offset();
+ assert(start_off + size_of_code == actual_ret_off, "wrong return_pc");
+#endif
+ %}
+
+ enc_class z_enc_java_static_call(method meth) %{
+ // Call to fixup routine. Fixup routine uses ScopeDesc info to determine
+ // whom we intended to call.
+ MacroAssembler _masm(&cbuf);
+ int ret_offset = 0;
+
+ if (!_method) {
+ ret_offset = emit_call_reloc(_masm, $meth$$method,
+ relocInfo::runtime_call_w_cp_type, ra_);
+ } else {
+ int method_index = resolved_method_index(cbuf);
+ if (_optimized_virtual) {
+ ret_offset = emit_call_reloc(_masm, $meth$$method,
+ opt_virtual_call_Relocation::spec(method_index));
+ } else {
+ ret_offset = emit_call_reloc(_masm, $meth$$method,
+ static_call_Relocation::spec(method_index));
+ }
+ }
+ assert(__ inst_mark() != NULL, "emit_call_reloc must set_inst_mark()");
+
+ if (_method) { // Emit stub for static call.
+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+ if (stub == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ }
+ %}
+
+ // Java dynamic call
+ enc_class z_enc_java_dynamic_call(method meth) %{
+ MacroAssembler _masm(&cbuf);
+ unsigned int start_off = __ offset();
+
+ int vtable_index = this->_vtable_index;
+ if (vtable_index == -4) {
+ Register ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
+ address virtual_call_oop_addr = NULL;
+
+ AddressLiteral empty_ic((address) Universe::non_oop_word());
+ virtual_call_oop_addr = __ pc();
+ bool success = __ load_const_from_toc(ic_reg, empty_ic);
+ if (!success) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+
+ // Call to fixup routine. Fixup routine uses ScopeDesc info
+ // to determine who we intended to call.
+ int method_index = resolved_method_index(cbuf);
+ __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
+ unsigned int ret_off = __ offset();
+ assert(__ offset() - start_off == 6, "bad prelude len: %d", __ offset() - start_off);
+ ret_off += emit_call_reloc(_masm, $meth$$method, relocInfo::none, ra_);
+ assert(_method, "lazy_constant may be wrong when _method==null");
+ } else {
+ assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
+ // Go through the vtable. Get receiver klass. Receiver already
+ // checked for non-null. If we'll go thru a C2I adapter, the
+ // interpreter expects method in Z_method.
+ // Use Z_method to temporarily hold the klass oop. Z_R1_scratch is destroyed
+ // by load_heap_oop_not_null.
+ __ load_klass(Z_method, Z_R2);
+
+ int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
+ int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+
+ if (Displacement::is_validDisp(v_off) ) {
+ // Can use load instruction with large offset.
+ __ z_lg(Z_method, Address(Z_method /*class oop*/, v_off /*method offset*/));
+ } else {
+ // Worse case, must load offset into register.
+ __ load_const(Z_R1_scratch, v_off);
+ __ z_lg(Z_method, Address(Z_method /*class oop*/, Z_R1_scratch /*method offset*/));
+ }
+ // NOTE: for vtable dispatches, the vtable entry will never be
+ // null. However it may very well end up in handle_wrong_method
+ // if the method is abstract for the particular class.
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
+ // Call target. Either compiled code or C2I adapter.
+ __ z_basr(Z_R14, Z_R1_scratch);
+ unsigned int ret_off = __ offset();
+ }
+ %}
+
+ enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rsrc = reg_to_register_object($src$$reg);
+
+ // Don't emit code if operands are identical (same register).
+ if (Rsrc != Rdst) {
+ Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ z_locgr(Rdst, Rsrc, cc);
+ } else {
+ // Branch if not (cmp cr).
+ Label done;
+ __ z_brc(Assembler::inverse_condition(cc), done);
+ __ z_lgr(Rdst, Rsrc); // Used for int and long+ptr.
+ __ bind(done);
+ }
+ }
+ %}
+
+ enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ int Csrc = $src$$constant;
+ Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
+ Label done;
+ // Branch if not (cmp cr).
+ __ z_brc(Assembler::inverse_condition(cc), done);
+ if (Csrc == 0) {
+ // Don't set CC.
+ __ clear_reg(Rdst, true, false); // Use for int, long & ptr.
+ } else {
+ __ z_lghi(Rdst, Csrc); // Use for int, long & ptr.
+ }
+ __ bind(done);
+ %}
+
+ enc_class z_enc_cctobool(iRegI res) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rres = reg_to_register_object($res$$reg);
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label done;
+ __ load_const_optimized(Rres, 0L); // false (failed)
+ __ z_brne(done); // Assume true to be the common case.
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ bind(done);
+ }
+ %}
+
+ enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rcomp = reg_to_register_object($compare_value$$reg);
+ Register Rnew = reg_to_register_object($exchange_value$$reg);
+ Register Raddr = reg_to_register_object($addr_ptr$$reg);
+
+ __ z_cs(Rcomp, Rnew, 0, Raddr);
+ %}
+
+ enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rcomp = reg_to_register_object($compare_value$$reg);
+ Register Rnew = reg_to_register_object($exchange_value$$reg);
+ Register Raddr = reg_to_register_object($addr_ptr$$reg);
+
+ __ z_csg(Rcomp, Rnew, 0, Raddr);
+ %}
+
+ enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_llgf(Rtmp, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csy(Rtmp, Rdst, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+ %}
+
+ enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
+ MacroAssembler _masm(&cbuf);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_lg(Rtmp, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csg(Rtmp, Rdst, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+ %}
+
+%} // encode
+
+source %{
+
+ // Check whether outs are all Stores. If so, we can omit clearing the upper
+ // 32 bits after encoding.
+ static bool all_outs_are_Stores(const Node *n) {
+ for (DUIterator_Fast imax, k = n->fast_outs(imax); k < imax; k++) {
+ Node *out = n->fast_out(k);
+ if (!out->is_Mach() || out->as_Mach()->ideal_Opcode() != Op_StoreN) {
+ // Most other outs are SpillCopy, but there are various other.
+ // jvm98 has arond 9% Encodes where we return false.
+ return false;
+ }
+ }
+ return true;
+ }
+
+%} // source
+
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+
+frame %{
+ // What direction does stack grow in (assumed to be same for native & Java).
+ stack_direction(TOWARDS_LOW);
+
+ // These two registers define part of the calling convention between
+ // compiled code and the interpreter.
+
+ // Inline Cache Register
+ inline_cache_reg(Z_R9); // Z_inline_cache
+
+ // Argument pointer for I2C adapters
+ //
+ // Tos is loaded in run_compiled_code to Z_ARG5=Z_R6.
+ // interpreter_arg_ptr_reg(Z_R6);
+
+ // Temporary in compiled entry-points
+ // compiler_method_oop_reg(Z_R1);//Z_R1_scratch
+
+ // Method Oop Register when calling interpreter
+ interpreter_method_oop_reg(Z_R9);//Z_method
+
+ // Optional: name the operand used by cisc-spilling to access
+ // [stack_pointer + offset].
+ cisc_spilling_operand_name(indOffset12);
+
+ // Number of stack slots consumed by a Monitor enter.
+ sync_stack_slots(frame::jit_monitor_size_in_4_byte_units);
+
+ // Compiled code's Frame Pointer
+ //
+ // z/Architecture stack pointer
+ frame_pointer(Z_R15); // Z_SP
+
+ // Interpreter stores its frame pointer in a register which is
+ // stored to the stack by I2CAdaptors. I2CAdaptors convert from
+ // interpreted java to compiled java.
+ //
+ // Z_state holds pointer to caller's cInterpreter.
+ interpreter_frame_pointer(Z_R7); // Z_state
+
+ // Use alignment_in_bytes instead of log_2_of_alignment_in_bits.
+ stack_alignment(frame::alignment_in_bytes);
+
+ in_preserve_stack_slots(frame::jit_in_preserve_size_in_4_byte_units);
+
+ // A `slot' is assumed 4 bytes here!
+ // out_preserve_stack_slots(frame::jit_out_preserve_size_in_4_byte_units);
+
+ // Number of outgoing stack slots killed above the
+ // out_preserve_stack_slots for calls to C. Supports the var-args
+ // backing area for register parms.
+ varargs_C_out_slots_killed(((frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size));
+
+ // The after-PROLOG location of the return address. Location of
+ // return address specifies a type (REG or STACK) and a number
+ // representing the register number (i.e. - use a register name) or
+ // stack slot.
+ return_addr(REG Z_R14);
+
+ // This is the body of the function
+ //
+ // void Matcher::calling_convention(OptoRegPair* sig /* array of ideal regs */,
+ // uint length /* length of array */,
+ // bool is_outgoing)
+ //
+ // The `sig' array is to be updated. Sig[j] represents the location
+ // of the j-th argument, either a register or a stack slot.
+
+ // Body of function which returns an integer array locating
+ // arguments either in registers or in stack slots. Passed an array
+ // of ideal registers called "sig" and a "length" count. Stack-slot
+ // offsets are based on outgoing arguments, i.e. a CALLER setting up
+ // arguments for a CALLEE. Incoming stack arguments are
+ // automatically biased by the preserve_stack_slots field above.
+ calling_convention %{
+ // No difference between ingoing/outgoing just pass false.
+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+ %}
+
+ // Body of function which returns an integer array locating
+ // arguments either in registers or in stack slots. Passed an array
+ // of ideal registers called "sig" and a "length" count. Stack-slot
+ // offsets are based on outgoing arguments, i.e. a CALLER setting up
+ // arguments for a CALLEE. Incoming stack arguments are
+ // automatically biased by the preserve_stack_slots field above.
+ c_calling_convention %{
+ // This is obviously always outgoing.
+ // C argument must be in register AND stack slot.
+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+ %}
+
+ // Location of native (C/C++) and interpreter return values. This
+ // is specified to be the same as Java. In the 32-bit VM, long
+ // values are actually returned from native calls in O0:O1 and
+ // returned to the interpreter in I0:I1. The copying to and from
+ // the register pairs is done by the appropriate call and epilog
+ // opcodes. This simplifies the register allocator.
+ //
+ // Use register pair for c return value.
+ c_return_value %{
+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values");
+ static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num };
+ static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num };
+ return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
+ %}
+
+ // Use register pair for return value.
+ // Location of compiled Java return values. Same as C
+ return_value %{
+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values");
+ static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num };
+ static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num };
+ return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
+ %}
+%}
+
+
+//----------ATTRIBUTES---------------------------------------------------------
+
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1); // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+
+// Cost attribute. required.
+ins_attrib ins_cost(DEFAULT_COST);
+
+// Is this instruction a non-matching short branch variant of some
+// long branch? Not required.
+ins_attrib ins_short_branch(0);
+
+// Indicates this is a trap based check node and final control-flow fixup
+// must generate a proper fall through.
+ins_attrib ins_is_TrapBasedCheckNode(true);
+
+// Attribute of instruction to tell how many constants the instruction will generate.
+// (optional attribute). Default: 0.
+ins_attrib ins_num_consts(0);
+
+// Required alignment attribute (must be a power of 2)
+// specifies the alignment that some part of the instruction (not
+// necessarily the start) requires. If > 1, a compute_padding()
+// function must be provided for the instruction.
+//
+// WARNING: Don't use size(FIXED_SIZE) or size(VARIABLE_SIZE) in
+// instructions which depend on the proper alignment, because the
+// desired alignment isn't guaranteed for the call to "emit()" during
+// the size computation.
+ins_attrib ins_alignment(1);
+
+// Enforce/prohibit rematerializations.
+// - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
+// then rematerialization of that instruction is prohibited and the
+// instruction's value will be spilled if necessary.
+// - If an instruction is attributed with 'ins_should_rematerialize(true)'
+// then rematerialization is enforced and the instruction's value will
+// never get spilled. a copy of the instruction will be inserted if
+// necessary.
+// Note: this may result in rematerializations in front of every use.
+// (optional attribute)
+ins_attrib ins_cannot_rematerialize(false);
+ins_attrib ins_should_rematerialize(false);
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct
+// parsing in the ADLC because operands constitute user defined types
+// which are used in instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+// Please note:
+// Formats are generated automatically for constants and base registers.
+
+//----------------------------------------------
+// SIGNED (shorter than INT) immediate operands
+//----------------------------------------------
+
+// Byte Immediate: constant 'int -1'
+operand immB_minus1() %{
+ // sign-ext constant zero-ext constant
+ predicate((n->get_int() == -1) || ((n->get_int()&0x000000ff) == 0x000000ff));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Byte Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immB_n0m1() %{
+ // sign-ext constant zero-ext constant
+ predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x000000ff) != 0x000000ff);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Short Immediate: constant 'int -1'
+operand immS_minus1() %{
+ // sign-ext constant zero-ext constant
+ predicate((n->get_int() == -1) || ((n->get_int()&0x0000ffff) == 0x0000ffff));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Short Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immS_n0m1() %{
+ // sign-ext constant zero-ext constant
+ predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x0000ffff) != 0x0000ffff);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//-----------------------------------------
+// SIGNED INT immediate operands
+//-----------------------------------------
+
+// Integer Immediate: 32-bit
+operand immI() %{
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Int Immediate: 20-bit
+operand immI20() %{
+ predicate(Immediate::is_simm20(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 16-bit
+operand immI16() %{
+ predicate(Immediate::is_simm16(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 8-bit
+operand immI8() %{
+ predicate(Immediate::is_simm8(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant 'int 0'
+operand immI_0() %{
+ predicate(n->get_int() == 0);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant 'int -1'
+operand immI_minus1() %{
+ predicate(n->get_int() == -1);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: constant, but not 'int 0' nor 'int -1'.
+operand immI_n0m1() %{
+ predicate(n->get_int() != 0 && n->get_int() != -1);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//-------------------------------------------
+// UNSIGNED INT immediate operands
+//-------------------------------------------
+
+// Unsigned Integer Immediate: 32-bit
+operand uimmI() %{
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 16-bit
+operand uimmI16() %{
+ predicate(Immediate::is_uimm16(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 12-bit
+operand uimmI12() %{
+ predicate(Immediate::is_uimm12(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 12-bit
+operand uimmI8() %{
+ predicate(Immediate::is_uimm8(n->get_int()));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 6-bit
+operand uimmI6() %{
+ predicate(Immediate::is_uimm(n->get_int(), 6));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 5-bit
+operand uimmI5() %{
+ predicate(Immediate::is_uimm(n->get_int(), 5));
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Length for SS instructions, given in DWs,
+// possible range [1..512], i.e. [8..4096] Bytes
+// used range [1..256], i.e. [8..2048] Bytes
+// operand type int
+// Unsigned Integer Immediate: 9-bit
+operand SSlenDW() %{
+ predicate(Immediate::is_uimm8(n->get_long()-1));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//------------------------------------------
+// (UN)SIGNED INT specific values
+//------------------------------------------
+
+// Integer Immediate: the value 1
+operand immI_1() %{
+ predicate(n->get_int() == 1);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 16.
+operand immI_16() %{
+ predicate(n->get_int() == 16);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 24.
+operand immI_24() %{
+ predicate(n->get_int() == 24);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 255
+operand immI_255() %{
+ predicate(n->get_int() == 255);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the values 32-63
+operand immI_32_63() %{
+ predicate(n->get_int() >= 32 && n->get_int() <= 63);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: LL-part, extended by 1s.
+operand uimmI_LL1() %{
+ predicate((n->get_int() & 0xFFFF0000) == 0xFFFF0000);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: LH-part, extended by 1s.
+operand uimmI_LH1() %{
+ predicate((n->get_int() & 0xFFFF) == 0xFFFF);
+ match(ConI);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//------------------------------------------
+// SIGNED LONG immediate operands
+//------------------------------------------
+
+operand immL() %{
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 32-bit
+operand immL32() %{
+ predicate(Immediate::is_simm32(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 20-bit
+operand immL20() %{
+ predicate(Immediate::is_simm20(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 16-bit
+operand immL16() %{
+ predicate(Immediate::is_simm16(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 8-bit
+operand immL8() %{
+ predicate(Immediate::is_simm8(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//--------------------------------------------
+// UNSIGNED LONG immediate operands
+//--------------------------------------------
+
+operand uimmL32() %{
+ predicate(Immediate::is_uimm32(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 16-bit
+operand uimmL16() %{
+ predicate(Immediate::is_uimm16(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 12-bit
+operand uimmL12() %{
+ predicate(Immediate::is_uimm12(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: 8-bit
+operand uimmL8() %{
+ predicate(Immediate::is_uimm8(n->get_long()));
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//-------------------------------------------
+// (UN)SIGNED LONG specific values
+//-------------------------------------------
+
+// Long Immediate: the value FF
+operand immL_FF() %{
+ predicate(n->get_long() == 0xFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFF
+operand immL_FFFF() %{
+ predicate(n->get_long() == 0xFFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFFFFFF
+operand immL_FFFFFFFF() %{
+ predicate(n->get_long() == 0xFFFFFFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immL_0() %{
+ predicate(n->get_long() == 0L);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: LL-part, extended by 1s.
+operand uimmL_LL1() %{
+ predicate((n->get_long() & 0xFFFFFFFFFFFF0000L) == 0xFFFFFFFFFFFF0000L);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: LH-part, extended by 1s.
+operand uimmL_LH1() %{
+ predicate((n->get_long() & 0xFFFFFFFF0000FFFFL) == 0xFFFFFFFF0000FFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: HL-part, extended by 1s.
+operand uimmL_HL1() %{
+ predicate((n->get_long() & 0xFFFF0000FFFFFFFFL) == 0xFFFF0000FFFFFFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Long Immediate: HH-part, extended by 1s.
+operand uimmL_HH1() %{
+ predicate((n->get_long() & 0xFFFFFFFFFFFFL) == 0xFFFFFFFFFFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits() %{
+ predicate(n->get_long() == 0xFFFFFFFFL);
+ match(ConL);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//--------------------------------------
+// POINTER immediate operands
+//--------------------------------------
+
+// Pointer Immediate: 64-bit
+operand immP() %{
+ match(ConP);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 32-bit
+operand immP32() %{
+ predicate(Immediate::is_uimm32(n->get_ptr()));
+ match(ConP);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 16-bit
+operand immP16() %{
+ predicate(Immediate::is_uimm16(n->get_ptr()));
+ match(ConP);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 8-bit
+operand immP8() %{
+ predicate(Immediate::is_uimm8(n->get_ptr()));
+ match(ConP);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//-----------------------------------
+// POINTER specific values
+//-----------------------------------
+
+// Pointer Immediate: NULL
+operand immP0() %{
+ predicate(n->get_ptr() == 0);
+ match(ConP);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+//---------------------------------------------
+// NARROW POINTER immediate operands
+//---------------------------------------------
+
+// Narrow Pointer Immediate
+operand immN() %{
+ match(ConN);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immNKlass() %{
+ match(ConNKlass);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Narrow Pointer Immediate
+operand immN8() %{
+ predicate(Immediate::is_uimm8(n->get_narrowcon()));
+ match(ConN);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0() %{
+ predicate(n->get_narrowcon() == 0);
+ match(ConN);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// FLOAT and DOUBLE immediate operands
+
+// Double Immediate
+operand immD() %{
+ match(ConD);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Double Immediate: +-0
+operand immDpm0() %{
+ predicate(n->getd() == 0);
+ match(ConD);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Double Immediate: +0
+operand immDp0() %{
+ predicate(jlong_cast(n->getd()) == 0);
+ match(ConD);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF() %{
+ match(ConF);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate: +-0
+operand immFpm0() %{
+ predicate(n->getf() == 0);
+ match(ConF);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate: +0
+operand immFp0() %{
+ predicate(jint_cast(n->getf()) == 0);
+ match(ConF);
+ op_cost(1);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// End of Immediate Operands
+
+// Integer Register Operands
+// Integer Register
+operand iRegI() %{
+ constraint(ALLOC_IN_RC(z_int_reg));
+ match(RegI);
+ match(noArg_iRegI);
+ match(rarg1RegI);
+ match(rarg2RegI);
+ match(rarg3RegI);
+ match(rarg4RegI);
+ match(rarg5RegI);
+ match(noOdd_iRegI);
+ match(revenRegI);
+ match(roddRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand noArg_iRegI() %{
+ constraint(ALLOC_IN_RC(z_no_arg_int_reg));
+ match(RegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregi and roddRegI constitute and even-odd-pair.
+operand revenRegI() %{
+ constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregi and roddRegI constitute and even-odd-pair.
+operand roddRegI() %{
+ constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg1RegI() %{
+ constraint(ALLOC_IN_RC(z_rarg1_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg2RegI() %{
+ constraint(ALLOC_IN_RC(z_rarg2_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg3RegI() %{
+ constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg4RegI() %{
+ constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg5RegI() %{
+ constraint(ALLOC_IN_RC(z_rarg5_int_reg));
+ match(iRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand noOdd_iRegI() %{
+ constraint(ALLOC_IN_RC(z_no_odd_int_reg));
+ match(RegI);
+ match(revenRegI);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer Register
+operand iRegP() %{
+ constraint(ALLOC_IN_RC(z_ptr_reg));
+ match(RegP);
+ match(noArg_iRegP);
+ match(rarg1RegP);
+ match(rarg2RegP);
+ match(rarg3RegP);
+ match(rarg4RegP);
+ match(rarg5RegP);
+ match(revenRegP);
+ match(roddRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// thread operand
+operand threadRegP() %{
+ constraint(ALLOC_IN_RC(z_thread_ptr_reg));
+ match(RegP);
+ format %{ "Z_THREAD" %}
+ interface(REG_INTER);
+%}
+
+operand noArg_iRegP() %{
+ constraint(ALLOC_IN_RC(z_no_arg_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg1RegP() %{
+ constraint(ALLOC_IN_RC(z_rarg1_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg2RegP() %{
+ constraint(ALLOC_IN_RC(z_rarg2_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg3RegP() %{
+ constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg4RegP() %{
+ constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg5RegP() %{
+ constraint(ALLOC_IN_RC(z_rarg5_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand memoryRegP() %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(RegP);
+ match(iRegP);
+ match(threadRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregp and roddRegP constitute and even-odd-pair.
+operand revenRegP() %{
+ constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand roddRegP() %{
+ constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
+ match(iRegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand lock_ptr_RegP() %{
+ constraint(ALLOC_IN_RC(z_lock_ptr_reg));
+ match(RegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rscratch2RegP() %{
+ constraint(ALLOC_IN_RC(z_rscratch2_bits64_reg));
+ match(RegP);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegN() %{
+ constraint(ALLOC_IN_RC(z_int_reg));
+ match(RegN);
+ match(noArg_iRegN);
+ match(rarg1RegN);
+ match(rarg2RegN);
+ match(rarg3RegN);
+ match(rarg4RegN);
+ match(rarg5RegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand noArg_iRegN() %{
+ constraint(ALLOC_IN_RC(z_no_arg_int_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg1RegN() %{
+ constraint(ALLOC_IN_RC(z_rarg1_int_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg2RegN() %{
+ constraint(ALLOC_IN_RC(z_rarg2_int_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg3RegN() %{
+ constraint(ALLOC_IN_RC(z_rarg3_int_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg4RegN() %{
+ constraint(ALLOC_IN_RC(z_rarg4_int_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg5RegN() %{
+ constraint(ALLOC_IN_RC(z_rarg5_ptrN_reg));
+ match(iRegN);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Long Register
+operand iRegL() %{
+ constraint(ALLOC_IN_RC(z_long_reg));
+ match(RegL);
+ match(revenRegL);
+ match(roddRegL);
+ match(rarg1RegL);
+ match(rarg5RegL);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand revenRegL() %{
+ constraint(ALLOC_IN_RC(z_rarg3_long_reg));
+ match(iRegL);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Revenregl and roddRegL constitute and even-odd-pair.
+operand roddRegL() %{
+ constraint(ALLOC_IN_RC(z_rarg4_long_reg));
+ match(iRegL);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg1RegL() %{
+ constraint(ALLOC_IN_RC(z_rarg1_long_reg));
+ match(iRegL);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rarg5RegL() %{
+ constraint(ALLOC_IN_RC(z_rarg5_long_reg));
+ match(iRegL);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Condition Code Flag Registers
+operand flagsReg() %{
+ constraint(ALLOC_IN_RC(z_condition_reg));
+ match(RegFlags);
+ format %{ "CR" %}
+ interface(REG_INTER);
+%}
+
+// Condition Code Flag Registers for rules with result tuples
+operand TD_flagsReg() %{
+ constraint(ALLOC_IN_RC(z_condition_reg));
+ match(RegFlags);
+ format %{ "CR" %}
+ interface(REG_TUPLE_DEST_INTER);
+%}
+
+operand regD() %{
+ constraint(ALLOC_IN_RC(z_dbl_reg));
+ match(RegD);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rscratchRegD() %{
+ constraint(ALLOC_IN_RC(z_rscratch1_dbl_reg));
+ match(RegD);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand regF() %{
+ constraint(ALLOC_IN_RC(z_flt_reg));
+ match(RegF);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand rscratchRegF() %{
+ constraint(ALLOC_IN_RC(z_rscratch1_flt_reg));
+ match(RegF);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_regP(iRegP reg) %{
+ constraint(ALLOC_IN_RC(z_r9_regP)); // inline_cache_reg
+ match(reg);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand compiler_method_oop_regP(iRegP reg) %{
+ constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_oop_reg
+ match(reg);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_regP(iRegP reg) %{
+ constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_oop_reg
+ match(reg);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Operands to remove register moves in unscaled mode.
+// Match read/write registers with an EncodeP node if neither shift nor add are required.
+operand iRegP2N(iRegP reg) %{
+ predicate(Universe::narrow_oop_shift() == 0 && _leaf->as_EncodeP()->in(0) == NULL);
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(EncodeP reg);
+ format %{ "$reg" %}
+ interface(REG_INTER)
+%}
+
+operand iRegN2P(iRegN reg) %{
+ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 &&
+ _leaf->as_DecodeN()->in(0) == NULL);
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(DecodeN reg);
+ format %{ "$reg" %}
+ interface(REG_INTER)
+%}
+
+
+//----------Complex Operands---------------------------------------------------
+
+// Indirect Memory Reference
+operand indirect(memoryRegP base) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(base);
+ op_cost(1);
+ format %{ "#0[,$base]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+// Indirect with Offset (long)
+operand indOffset20(memoryRegP base, immL20 offset) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP base offset);
+ op_cost(1);
+ format %{ "$offset[,$base]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset20Narrow(iRegN base, immL20 offset) %{
+ predicate(Matcher::narrow_oop_use_complex_address());
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (DecodeN base) offset);
+ op_cost(1);
+ format %{ "$offset[,$base]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Offset (short)
+operand indOffset12(memoryRegP base, uimmL12 offset) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP base offset);
+ op_cost(1);
+ format %{ "$offset[[,$base]]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset12Narrow(iRegN base, uimmL12 offset) %{
+ predicate(Matcher::narrow_oop_use_complex_address());
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (DecodeN base) offset);
+ op_cost(1);
+ format %{ "$offset[[,$base]]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Register Index
+operand indIndex(memoryRegP base, iRegL index) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP base index);
+ op_cost(1);
+ format %{ "#0[($index,$base)]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index($index);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+// Indirect with Offset (long) and index
+operand indOffset20index(memoryRegP base, immL20 offset, iRegL index) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (AddP base index) offset);
+ op_cost(1);
+ format %{ "$offset[($index,$base)]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index($index);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset20indexNarrow(iRegN base, immL20 offset, iRegL index) %{
+ predicate(Matcher::narrow_oop_use_complex_address());
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (AddP (DecodeN base) index) offset);
+ op_cost(1);
+ format %{ "$offset[($index,$base)]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index($index);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Offset (short) and index
+operand indOffset12index(memoryRegP base, uimmL12 offset, iRegL index) %{
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (AddP base index) offset);
+ op_cost(1);
+ format %{ "$offset[[($index,$base)]]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index($index);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+operand indOffset12indexNarrow(iRegN base, uimmL12 offset, iRegL index) %{
+ predicate(Matcher::narrow_oop_use_complex_address());
+ constraint(ALLOC_IN_RC(z_memory_ptr_reg));
+ match(AddP (AddP (DecodeN base) index) offset);
+ op_cost(1);
+ format %{ "$offset[[($index,$base)]]" %}
+ interface(MEMORY_INTER) %{
+ base($base);
+ index($index);
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+//----------Special Memory Operands--------------------------------------------
+
+// Stack Slot Operand
+// This operand is used for loading and storing temporary values on
+// the stack where a match requires a value to flow through memory.
+operand stackSlotI(sRegI reg) %{
+ constraint(ALLOC_IN_RC(stack_slots));
+ op_cost(1);
+ format %{ "[$reg(stackSlotI)]" %}
+ interface(MEMORY_INTER) %{
+ base(0xf); // Z_SP
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($reg); // stack offset
+ %}
+%}
+
+operand stackSlotP(sRegP reg) %{
+ constraint(ALLOC_IN_RC(stack_slots));
+ op_cost(1);
+ format %{ "[$reg(stackSlotP)]" %}
+ interface(MEMORY_INTER) %{
+ base(0xf); // Z_SP
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotF(sRegF reg) %{
+ constraint(ALLOC_IN_RC(stack_slots));
+ op_cost(1);
+ format %{ "[$reg(stackSlotF)]" %}
+ interface(MEMORY_INTER) %{
+ base(0xf); // Z_SP
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotD(sRegD reg) %{
+ constraint(ALLOC_IN_RC(stack_slots));
+ op_cost(1);
+ //match(RegD);
+ format %{ "[$reg(stackSlotD)]" %}
+ interface(MEMORY_INTER) %{
+ base(0xf); // Z_SP
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotL(sRegL reg) %{
+ constraint(ALLOC_IN_RC(stack_slots));
+ op_cost(1); //match(RegL);
+ format %{ "[$reg(stackSlotL)]" %}
+ interface(MEMORY_INTER) %{
+ base(0xf); // Z_SP
+ index(0xffffFFFF); // noreg
+ scale(0x0);
+ disp($reg); // Stack Offset
+ %}
+%}
+
+// Operands for expressing Control Flow
+// NOTE: Label is a predefined operand which should not be redefined in
+// the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op - This is the operation of the comparison, and is limited to
+// the following set of codes:
+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below.
+
+// INT cmpOps for CompareAndBranch and CompareAndTrap instructions should not
+// have mask bit #3 set.
+operand cmpOpT() %{
+ match(Bool);
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x8); // Assembler::bcondEqual
+ not_equal(0x6); // Assembler::bcondNotEqual
+ less(0x4); // Assembler::bcondLow
+ greater_equal(0xa); // Assembler::bcondNotLow
+ less_equal(0xc); // Assembler::bcondNotHigh
+ greater(0x2); // Assembler::bcondHigh
+ overflow(0x1); // Assembler::bcondOverflow
+ no_overflow(0xe); // Assembler::bcondNotOverflow
+ %}
+%}
+
+// When used for floating point comparisons: unordered is treated as less.
+operand cmpOpF() %{
+ match(Bool);
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x8);
+ not_equal(0x7); // Includes 'unordered'.
+ less(0x5); // Includes 'unordered'.
+ greater_equal(0xa);
+ less_equal(0xd); // Includes 'unordered'.
+ greater(0x2);
+ overflow(0x0); // Not meaningful on z/Architecture.
+ no_overflow(0x0); // leave unchanged (zero) therefore
+ %}
+%}
+
+// "Regular" cmpOp for int comparisons, includes bit #3 (overflow).
+operand cmpOp() %{
+ match(Bool);
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x8);
+ not_equal(0x7); // Includes 'unordered'.
+ less(0x5); // Includes 'unordered'.
+ greater_equal(0xa);
+ less_equal(0xd); // Includes 'unordered'.
+ greater(0x2);
+ overflow(0x1); // Assembler::bcondOverflow
+ no_overflow(0xe); // Assembler::bcondNotOverflow
+ %}
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used to simplify
+// instruction definitions by not requiring the AD writer to specify
+// seperate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format. The classic case of this is memory operands.
+// Indirect is not included since its use is limited to Compare & Swap
+
+// Most general memory operand, allows base, index, and long displacement.
+opclass memory(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow);
+opclass memoryRXY(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow);
+
+// General memory operand, allows base, index, and short displacement.
+opclass memoryRX(indirect, indIndex, indOffset12, indOffset12Narrow, indOffset12index, indOffset12indexNarrow);
+
+// Memory operand, allows only base and long displacement.
+opclass memoryRSY(indirect, indOffset20, indOffset20Narrow);
+
+// Memory operand, allows only base and short displacement.
+opclass memoryRS(indirect, indOffset12, indOffset12Narrow);
+
+// Operand classes to match encode and decode.
+opclass iRegN_P2N(iRegN);
+opclass iRegP_N2P(iRegP);
+
+
+//----------PIPELINE-----------------------------------------------------------
+pipeline %{
+
+//----------ATTRIBUTES---------------------------------------------------------
+attributes %{
+ // z/Architecture instructions are of length 2, 4, or 6 bytes.
+ variable_size_instructions;
+ instruction_unit_size = 2;
+
+ // Meaningless on z/Architecture.
+ max_instructions_per_bundle = 1;
+
+ // The z/Architecture processor fetches 64 bytes...
+ instruction_fetch_unit_size = 64;
+
+ // ...in one line.
+ instruction_fetch_units = 1
+%}
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine.
+resources(
+ Z_BR, // branch unit
+ Z_CR, // condition unit
+ Z_FX1, // integer arithmetic unit 1
+ Z_FX2, // integer arithmetic unit 2
+ Z_LDST1, // load/store unit 1
+ Z_LDST2, // load/store unit 2
+ Z_FP1, // float arithmetic unit 1
+ Z_FP2, // float arithmetic unit 2
+ Z_LDST = Z_LDST1 | Z_LDST2,
+ Z_FX = Z_FX1 | Z_FX2,
+ Z_FP = Z_FP1 | Z_FP2
+ );
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline.
+pipe_desc(
+ // TODO: adapt
+ Z_IF, // instruction fetch
+ Z_IC,
+ Z_D0, // decode
+ Z_D1, // decode
+ Z_D2, // decode
+ Z_D3, // decode
+ Z_Xfer1,
+ Z_GD, // group definition
+ Z_MP, // map
+ Z_ISS, // issue
+ Z_RF, // resource fetch
+ Z_EX1, // execute (all units)
+ Z_EX2, // execute (FP, LDST)
+ Z_EX3, // execute (FP, LDST)
+ Z_EX4, // execute (FP)
+ Z_EX5, // execute (FP)
+ Z_EX6, // execute (FP)
+ Z_WB, // write back
+ Z_Xfer2,
+ Z_CP
+ );
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+// Providing the `ins_pipe' declarations in the instruction
+// specifications seems to be of little use. So we use
+// `pipe_class_dummy' for all our instructions at present.
+pipe_class pipe_class_dummy() %{
+ single_instruction;
+ fixed_latency(4);
+%}
+
+// SIGTRAP based implicit range checks in compiled code.
+// Currently, no pipe classes are used on z/Architecture.
+pipe_class pipe_class_trap() %{
+ single_instruction;
+%}
+
+pipe_class pipe_class_fx_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ single_instruction;
+ dst : Z_EX1(write);
+ src1 : Z_RF(read);
+ src2 : Z_RF(read);
+ Z_FX : Z_RF;
+%}
+
+pipe_class pipe_class_ldst(iRegP dst, memory mem) %{
+ single_instruction;
+ mem : Z_RF(read);
+ dst : Z_WB(write);
+ Z_LDST : Z_RF;
+%}
+
+define %{
+ MachNop = pipe_class_dummy;
+%}
+
+%}
+
+//----------INSTRUCTIONS-------------------------------------------------------
+
+//---------- Chain stack slots between similar types --------
+
+// Load integer from stack slot.
+instruct stkI_to_regI(iRegI dst, stackSlotI src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "L $dst,$src\t # stk reload int" %}
+ opcode(L_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store integer to stack slot.
+instruct regI_to_stkI(stackSlotI dst, iRegI src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "ST $src,$dst\t # stk spill int" %}
+ opcode(ST_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load long from stack slot.
+instruct stkL_to_regL(iRegL dst, stackSlotL src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LG $dst,$src\t # stk reload long" %}
+ opcode(LG_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store long to stack slot.
+instruct regL_to_stkL(stackSlotL dst, iRegL src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "STG $src,$dst\t # stk spill long" %}
+ opcode(STG_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load pointer from stack slot, 64-bit encoding.
+instruct stkP_to_regP(iRegP dst, stackSlotP src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LG $dst,$src\t # stk reload ptr" %}
+ opcode(LG_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store pointer to stack slot.
+instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "STG $src,$dst\t # stk spill ptr" %}
+ opcode(STG_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst)); // rs=rt
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Float types
+
+// Load float value from stack slot.
+instruct stkF_to_regF(regF dst, stackSlotF src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "LE(Y) $dst,$src\t # stk reload float" %}
+ opcode(LE_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store float value to stack slot.
+instruct regF_to_stkF(stackSlotF dst, regF src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STE(Y) $src,$dst\t # stk spill float" %}
+ opcode(STE_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load double value from stack slot.
+instruct stkD_to_regD(regD dst, stackSlotD src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LD(Y) $dst,$src\t # stk reload double" %}
+ opcode(LD_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store double value to stack slot.
+instruct regD_to_stkD(stackSlotD dst, regD src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STD(Y) $src,$dst\t # stk spill double" %}
+ opcode(STD_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Load/Store/Move Instructions---------------------------------------
+
+//----------Load Instructions--------------------------------------------------
+
+//------------------
+// MEMORY
+//------------------
+
+// BYTE
+// Load Byte (8bit signed)
+instruct loadB(iRegI dst, memory mem) %{
+ match(Set dst (LoadB mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LB $dst, $mem\t # sign-extend byte to int" %}
+ opcode(LB_ZOPC, LB_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Byte (8bit signed)
+instruct loadB2L(iRegL dst, memory mem) %{
+ match(Set dst (ConvI2L (LoadB mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LGB $dst, $mem\t # sign-extend byte to long" %}
+ opcode(LGB_ZOPC, LGB_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into an int reg.
+instruct loadUB(iRegI dst, memory mem) %{
+ match(Set dst (LoadUB mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LLGC $dst,$mem\t # zero-extend byte to int" %}
+ opcode(LLGC_ZOPC, LLGC_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register.
+instruct loadUB2L(iRegL dst, memory mem) %{
+ match(Set dst (ConvI2L (LoadUB mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LLGC $dst,$mem\t # zero-extend byte to long" %}
+ opcode(LLGC_ZOPC, LLGC_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// CHAR/SHORT
+
+// Load Short (16bit signed)
+instruct loadS(iRegI dst, memory mem) %{
+ match(Set dst (LoadS mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "LH(Y) $dst,$mem\t # sign-extend short to int" %}
+ opcode(LHY_ZOPC, LH_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Short (16bit signed)
+instruct loadS2L(iRegL dst, memory mem) %{
+ match(Set dst (ConvI2L (LoadS mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LGH $dst,$mem\t # sign-extend short to long" %}
+ opcode(LGH_ZOPC, LGH_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Char (16bit Unsigned)
+instruct loadUS(iRegI dst, memory mem) %{
+ match(Set dst (LoadUS mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LLGH $dst,$mem\t # zero-extend short to int" %}
+ opcode(LLGH_ZOPC, LLGH_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
+instruct loadUS2L(iRegL dst, memory mem) %{
+ match(Set dst (ConvI2L (LoadUS mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LLGH $dst,$mem\t # zero-extend short to long" %}
+ opcode(LLGH_ZOPC, LLGH_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// INT
+
+// Load Integer
+instruct loadI(iRegI dst, memory mem) %{
+ match(Set dst (LoadI mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "L(Y) $dst,$mem\t #" %}
+ opcode(LY_ZOPC, L_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load and convert to long.
+instruct loadI2L(iRegL dst, memory mem) %{
+ match(Set dst (ConvI2L (LoadI mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LGF $dst,$mem\t #" %}
+ opcode(LGF_ZOPC, LGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memory mem, immL_FFFFFFFF mask) %{
+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LLGF $dst,$mem\t # zero-extend int to long" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// range = array length (=jint)
+// Load Range
+instruct loadRange(iRegI dst, memory mem) %{
+ match(Set dst (LoadRange mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "L(Y) $dst,$mem\t # range" %}
+ opcode(LY_ZOPC, L_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// LONG
+
+// Load Long - aligned
+instruct loadL(iRegL dst, memory mem) %{
+ match(Set dst (LoadL mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # long" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Long - UNaligned
+instruct loadL_unaligned(iRegL dst, memory mem) %{
+ match(Set dst (LoadL_unaligned mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # unaligned long" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// PTR
+
+// Load Pointer
+instruct loadP(iRegP dst, memory mem) %{
+ match(Set dst (LoadP mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # ptr" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// LoadP + CastP2L
+instruct castP2X_loadP(iRegL dst, memory mem) %{
+ match(Set dst (CastP2X (LoadP mem)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # ptr + p2x" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegP dst, memory mem) %{
+ match(Set dst (LoadKlass mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # klass ptr" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadTOC(iRegL dst) %{
+ effect(DEF dst);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ // TODO: check why this attribute causes many unnecessary rematerializations.
+ //
+ // The graphs I saw just had high register pressure. Further the
+ // register TOC is loaded to is overwritten by the constant short
+ // after. Here something as round robin register allocation might
+ // help. But rematerializing seems not to hurt, jack even seems to
+ // improve slightly.
+ //
+ // Without this flag we get spill-split recycle sanity check
+ // failures in
+ // spec.benchmarks._228_jack.NfaState::GenerateCode. This happens in
+ // a block with three loadConP_dynTOC nodes and a tlsLoadP. The
+ // tlsLoadP has a huge amount of outs and forces the TOC down to the
+ // stack. Later tlsLoadP is rematerialized, leaving the register
+ // allocator with TOC on the stack and a badly placed reload.
+ ins_should_rematerialize(true);
+ format %{ "LARL $dst, &constant_pool\t; load dynTOC" %}
+ ins_encode %{ __ load_toc($dst$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// FLOAT
+
+// Load Float
+instruct loadF(regF dst, memory mem) %{
+ match(Set dst (LoadF mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "LE(Y) $dst,$mem" %}
+ opcode(LEY_ZOPC, LE_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// DOUBLE
+
+// Load Double
+instruct loadD(regD dst, memory mem) %{
+ match(Set dst (LoadD mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "LD(Y) $dst,$mem" %}
+ opcode(LDY_ZOPC, LD_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Double - UNaligned
+instruct loadD_unaligned(regD dst, memory mem) %{
+ match(Set dst (LoadD_unaligned mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "LD(Y) $dst,$mem" %}
+ opcode(LDY_ZOPC, LD_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------------------
+// IMMEDIATES
+//----------------------
+
+instruct loadConI(iRegI dst, immI src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LGFI $dst,$src\t # (int)" %}
+ ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost.
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConI16(iRegI dst, immI16 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LGHI $dst,$src\t # (int)" %}
+ ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost.
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConI_0(iRegI dst, immI_0 src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "loadConI $dst,$src\t # (int) XGR because ZERO is loaded" %}
+ opcode(XGR_ZOPC);
+ ins_encode(z_rreform(dst, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConUI16(iRegI dst, uimmI16 src) %{
+ match(Set dst src);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LLILL $dst,$src" %}
+ opcode(LLILL_ZOPC);
+ ins_encode(z_riform_unsigned(dst, src) );
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load long constant from TOC with pcrelative address.
+instruct loadConL_pcrelTOC(iRegL dst, immL src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST_LO);
+ size(6);
+ format %{ "LGRL $dst,[pcrelTOC]\t # load long $src from table" %}
+ ins_encode %{
+ address long_address = __ long_constant($src$$constant);
+ if (long_address == NULL) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+ __ load_long_pcrelative($dst$$Register, long_address);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL32(iRegL dst, immL32 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LGFI $dst,$src\t # (long)" %}
+ ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost.
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL16(iRegL dst, immL16 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LGHI $dst,$src\t # (long)" %}
+ ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost.
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConL_0(iRegL dst, immL_0 src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ format %{ "LoadConL $dst,$src\t # (long) XGR because ZERO is loaded" %}
+ opcode(XGR_ZOPC);
+ ins_encode(z_rreform(dst, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load ptr constant from TOC with pc relative address.
+// Special handling for oop constants required.
+instruct loadConP_pcrelTOC(iRegP dst, immP src) %{
+ match(Set dst src);
+ ins_cost(MEMORY_REF_COST_LO);
+ size(6);
+ format %{ "LGRL $dst,[pcrelTOC]\t # load ptr $src from table" %}
+ ins_encode %{
+ relocInfo::relocType constant_reloc = $src->constant_reloc();
+ if (constant_reloc == relocInfo::oop_type) {
+ AddressLiteral a = __ allocate_oop_address((jobject)$src$$constant);
+ bool success = __ load_oop_from_toc($dst$$Register, a);
+ if (!success) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+ } else if (constant_reloc == relocInfo::metadata_type) {
+ AddressLiteral a = __ constant_metadata_address((Metadata *)$src$$constant);
+ address const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
+ if (const_toc_addr == NULL) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+ __ load_long_pcrelative($dst$$Register, const_toc_addr);
+ } else { // Non-oop pointers, e.g. card mark base, heap top.
+ address long_address = __ long_constant((jlong)$src$$constant);
+ if (long_address == NULL) {
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+ __ load_long_pcrelative($dst$$Register, long_address);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// We don't use immP16 to avoid problems with oops.
+instruct loadConP0(iRegP dst, immP0 src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ size(4);
+ format %{ "XGR $dst,$dst\t # NULL ptr" %}
+ opcode(XGR_ZOPC);
+ ins_encode(z_rreform(dst, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Load Float Constant Instructions-------------------------------------------------
+
+// We may not specify this instruction via an `expand' rule. If we do,
+// code selection will forget that this instruction needs a floating
+// point constant inserted into the code buffer. So `Shorten_branches'
+// will fail.
+instruct loadConF_dynTOC(regF dst, immF src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ // If this instruction rematerializes, it prolongs the live range
+ // of the toc node, causing illegal graphs.
+ ins_cannot_rematerialize(true);
+ format %{ "LE(Y) $dst,$constantoffset[,$constanttablebase]\t # load FLOAT $src from table" %}
+ ins_encode %{
+ __ load_float_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// E may not specify this instruction via an `expand' rule. If we do,
+// code selection will forget that this instruction needs a floating
+// point constant inserted into the code buffer. So `Shorten_branches'
+// will fail.
+instruct loadConD_dynTOC(regD dst, immD src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ // If this instruction rematerializes, it prolongs the live range
+ // of the toc node, causing illegal graphs.
+ ins_cannot_rematerialize(true);
+ format %{ "LD(Y) $dst,$constantoffset[,$constanttablebase]\t # load DOUBLE $src from table" %}
+ ins_encode %{
+ __ load_double_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Special case: Load Const 0.0F
+
+// There's a special instr to clear a FP register.
+instruct loadConF0(regF dst, immFp0 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LZER $dst,$src\t # clear to zero" %}
+ opcode(LZER_ZOPC);
+ ins_encode(z_rreform(dst, Z_F0));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// There's a special instr to clear a FP register.
+instruct loadConD0(regD dst, immDp0 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LZDR $dst,$src\t # clear to zero" %}
+ opcode(LZDR_ZOPC);
+ ins_encode(z_rreform(dst, Z_F0));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Store Instructions-------------------------------------------------
+
+// BYTE
+
+// Store Byte
+instruct storeB(memory mem, iRegI src) %{
+ match(Set mem (StoreB mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "STC(Y) $src,$mem\t # byte" %}
+ opcode(STCY_ZOPC, STC_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct storeCM(memory mem, immI_0 src) %{
+ match(Set mem (StoreCM mem src));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "STC(Y) $src,$mem\t # CMS card-mark byte (must be 0!)" %}
+ ins_encode %{
+ guarantee($mem$$index$$Register != Z_R0, "content will not be used.");
+ if ($mem$$index$$Register != noreg) {
+ // Can't use clear_mem --> load const zero and store character.
+ __ load_const_optimized(Z_R0_scratch, (long)0);
+ if (Immediate::is_uimm12($mem$$disp)) {
+ __ z_stc(Z_R0_scratch, $mem$$Address);
+ } else {
+ __ z_stcy(Z_R0_scratch, $mem$$Address);
+ }
+ } else {
+ __ clear_mem(Address($mem$$Address), 1);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// CHAR/SHORT
+
+// Store Char/Short
+instruct storeC(memory mem, iRegI src) %{
+ match(Set mem (StoreC mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "STH(Y) $src,$mem\t # short" %}
+ opcode(STHY_ZOPC, STH_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// INT
+
+// Store Integer
+instruct storeI(memory mem, iRegI src) %{
+ match(Set mem (StoreI mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "ST(Y) $src,$mem\t # int" %}
+ opcode(STY_ZOPC, ST_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// LONG
+
+// Store Long
+instruct storeL(memory mem, iRegL src) %{
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "STG $src,$mem\t # long" %}
+ opcode(STG_ZOPC, STG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// PTR
+
+// Store Pointer
+instruct storeP(memory dst, memoryRegP src) %{
+ match(Set dst (StoreP dst src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "STG $src,$dst\t # ptr" %}
+ opcode(STG_ZOPC, STG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// FLOAT
+
+// Store Float
+instruct storeF(memory mem, regF src) %{
+ match(Set mem (StoreF mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "STE(Y) $src,$mem\t # float" %}
+ opcode(STEY_ZOPC, STE_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// DOUBLE
+
+// Store Double
+instruct storeD(memory mem, regD src) %{
+ match(Set mem (StoreD mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "STD(Y) $src,$mem\t # double" %}
+ opcode(STDY_ZOPC, STD_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Prefetch instructions. Must be safe to execute with invalid address (cannot fault).
+
+// Should support match rule for PrefetchAllocation.
+// Still needed after 8068977 for PrefetchAllocate.
+instruct prefetchAlloc(memory mem) %{
+ match(PrefetchAllocation mem);
+ predicate(VM_Version::has_Prefetch());
+ ins_cost(DEFAULT_COST);
+ format %{ "PREFETCH 2, $mem\t # Prefetch allocation, z10 only" %}
+ ins_encode %{ __ z_pfd(0x02, $mem$$Address); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Memory init instructions------------------------------------------
+
+// Move Immediate to 1-byte memory.
+instruct memInitB(memoryRSY mem, immI8 src) %{
+ match(Set mem (StoreB mem src));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MVI $mem,$src\t # direct mem init 1" %}
+ ins_encode %{
+ if (Immediate::is_uimm12((long)$mem$$disp)) {
+ __ z_mvi($mem$$Address, $src$$constant);
+ } else {
+ __ z_mviy($mem$$Address, $src$$constant);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 2-byte memory.
+instruct memInitC(memoryRS mem, immI16 src) %{
+ match(Set mem (StoreC mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "MVHHI $mem,$src\t # direct mem init 2" %}
+ opcode(MVHHI_ZOPC);
+ ins_encode(z_silform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 4-byte memory.
+instruct memInitI(memoryRS mem, immI16 src) %{
+ match(Set mem (StoreI mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "MVHI $mem,$src\t # direct mem init 4" %}
+ opcode(MVHI_ZOPC);
+ ins_encode(z_silform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// Move Immediate to 8-byte memory.
+instruct memInitL(memoryRS mem, immL16 src) %{
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "MVGHI $mem,$src\t # direct mem init 8" %}
+ opcode(MVGHI_ZOPC);
+ ins_encode(z_silform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Move Immediate to 8-byte memory.
+instruct memInitP(memoryRS mem, immP16 src) %{
+ match(Set mem (StoreP mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "MVGHI $mem,$src\t # direct mem init 8" %}
+ opcode(MVGHI_ZOPC);
+ ins_encode(z_silform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Instructions for compressed pointers (cOop and NKlass)-------------
+
+// See cOop encoding classes for elaborate comment.
+
+// Moved here because it is needed in expand rules for encode.
+// Long negation.
+instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
+ match(Set dst (SubL zero src));
+ effect(KILL cr);
+ size(4);
+ format %{ "NEG $dst, $src\t # long" %}
+ ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Compressed Pointer
+
+// Load narrow oop
+instruct loadN(iRegN dst, memory mem) %{
+ match(Set dst (LoadN mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LoadN $dst,$mem\t# (cOop)" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load narrow Klass Pointer
+instruct loadNKlass(iRegN dst, memory mem) %{
+ match(Set dst (LoadNKlass mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load constant Compressed Pointer
+
+instruct loadConN(iRegN dst, immN src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "loadConN $dst,$src\t # (cOop)" %}
+ ins_encode %{
+ AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
+ __ relocate(cOop.rspec(), 1);
+ __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConN0(iRegN dst, immN0 src, flagsReg cr) %{
+ match(Set dst src);
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "loadConN $dst,$src\t # (cOop) XGR because ZERO is loaded" %}
+ opcode(XGR_ZOPC);
+ ins_encode(z_rreform(dst, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct loadConNKlass(iRegN dst, immNKlass src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
+ ins_encode %{
+ AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
+ __ relocate(NKlass.rspec(), 1);
+ __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load and Decode Compressed Pointer
+// optimized variants for Unscaled cOops
+
+instruct decodeLoadN(iRegP dst, memory mem) %{
+ match(Set dst (DecodeN (LoadN mem)));
+ predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "DecodeLoadN $dst,$mem\t# (cOop Load+Decode)" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct decodeLoadNKlass(iRegP dst, memory mem) %{
+ match(Set dst (DecodeNKlass (LoadNKlass mem)));
+ predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "DecodeLoadNKlass $dst,$mem\t# (load/decode NKlass)" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
+ match(Set dst (DecodeNKlass src));
+ ins_cost(3 * DEFAULT_COST);
+ size(12);
+ format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %}
+ ins_encode %{
+ AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
+ __ relocate(NKlass.rspec(), 1);
+ __ load_const($dst$$Register, (Klass*)NKlass.value());
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Decode Compressed Pointer
+
+// General decoder
+instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
+ match(Set dst (DecodeN src));
+ effect(KILL cr);
+ predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "decodeN $dst,$src\t# (decode cOop)" %}
+ ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// General Klass decoder
+instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
+ match(Set dst (DecodeNKlass src));
+ effect(KILL cr);
+ ins_cost(3 * DEFAULT_COST);
+ format %{ "decode_klass $dst,$src" %}
+ ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// General decoder
+instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
+ match(Set dst (DecodeN src));
+ effect(KILL cr);
+ predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
+ n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
+ (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
+ ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "decodeN $dst,$src\t# (decode cOop NN)" %}
+ ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+ instruct loadBase(iRegL dst, immL baseImm) %{
+ effect(DEF dst, USE baseImm);
+ predicate(false);
+ format %{ "llihl $dst=$baseImm \t// load heap base" %}
+ ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
+ ins_pipe(pipe_class_dummy);
+ %}
+
+ // Decoder for heapbased mode peeling off loading the base.
+ instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
+ match(Set dst (DecodeN src base));
+ // Note: Effect TEMP dst was used with the intention to get
+ // different regs for dst and base, but this has caused ADLC to
+ // generate wrong code. Oop_decoder generates additional lgr when
+ // dst==base.
+ effect(KILL cr);
+ predicate(false);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
+ ins_encode %{
+ __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
+ (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
+ %}
+ ins_pipe(pipe_class_dummy);
+ %}
+
+ // Decoder for heapbased mode peeling off loading the base.
+ instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
+ match(Set dst (DecodeN src base));
+ effect(KILL cr);
+ predicate(false);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
+ ins_encode %{
+ __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
+ (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
+ %}
+ ins_pipe(pipe_class_dummy);
+ %}
+
+// Decoder for heapbased mode peeling off loading the base.
+instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
+ match(Set dst (DecodeN src));
+ predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ expand %{
+ immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+ iRegL base;
+ loadBase(base, baseImm);
+ decodeN_base(dst, src, base, cr);
+ %}
+%}
+
+// Decoder for heapbased mode peeling off loading the base.
+instruct decodeN_NN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
+ match(Set dst (DecodeN src));
+ predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
+ n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
+ Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode_NN);
+ ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ expand %{
+ immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+ iRegL base;
+ loadBase(base, baseImm);
+ decodeN_NN_base(dst, src, base, cr);
+ %}
+%}
+
+// Encode Compressed Pointer
+
+// General encoder
+instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
+ match(Set dst (EncodeP src));
+ effect(KILL cr);
+ predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
+ (Universe::narrow_oop_base() == 0 ||
+ Universe::narrow_oop_base_disjoint() ||
+ !ExpandLoadingBaseEncode));
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "encodeP $dst,$src\t# (encode cOop)" %}
+ ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// General class encoder
+instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
+ match(Set dst (EncodePKlass src));
+ effect(KILL cr);
+ format %{ "encode_klass $dst,$src" %}
+ ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
+ match(Set dst (EncodeP src));
+ effect(KILL cr);
+ predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
+ (Universe::narrow_oop_base() == 0 ||
+ Universe::narrow_oop_base_disjoint() ||
+ !ExpandLoadingBaseEncode_NN));
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "encodeP $dst,$src\t# (encode cOop)" %}
+ ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+ // Encoder for heapbased mode peeling off loading the base.
+ instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
+ match(Set dst (EncodeP src (Binary base dst)));
+ effect(TEMP_DEF dst);
+ predicate(false);
+ ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
+ ins_encode %{
+ jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
+ (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
+ __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
+ %}
+ ins_pipe(pipe_class_dummy);
+ %}
+
+ // Encoder for heapbased mode peeling off loading the base.
+ instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
+ match(Set dst (EncodeP src base));
+ effect(USE pow2_offset);
+ predicate(false);
+ ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
+ ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
+ ins_pipe(pipe_class_dummy);
+ %}
+
+// Encoder for heapbased mode peeling off loading the base.
+instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
+ match(Set dst (EncodeP src));
+ effect(KILL cr);
+ predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
+ (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ expand %{
+ immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
+ immL_0 zero %{ (0) %}
+ flagsReg ccr;
+ iRegL base;
+ iRegL negBase;
+ loadBase(base, baseImm);
+ negL_reg_reg(negBase, zero, base, ccr);
+ encodeP_base(dst, src, negBase);
+ %}
+%}
+
+// Encoder for heapbased mode peeling off loading the base.
+instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
+ match(Set dst (EncodeP src));
+ effect(KILL cr);
+ predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
+ (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode_NN));
+ ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ expand %{
+ immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
+ immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
+ immL_0 zero %{ 0 %}
+ flagsReg ccr;
+ iRegL base;
+ iRegL negBase;
+ loadBase(base, baseImm);
+ negL_reg_reg(negBase, zero, base, ccr);
+ encodeP_NN_base(dst, src, negBase, pow2_offset);
+ %}
+%}
+
+// Store Compressed Pointer
+
+// Store Compressed Pointer
+instruct storeN(memory mem, iRegN_P2N src) %{
+ match(Set mem (StoreN mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "ST $src,$mem\t# (cOop)" %}
+ opcode(STY_ZOPC, ST_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store Compressed Klass pointer
+instruct storeNKlass(memory mem, iRegN src) %{
+ match(Set mem (StoreNKlass mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP_SIZE);
+ format %{ "ST $src,$mem\t# (cKlass)" %}
+ opcode(STY_ZOPC, ST_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Compare Compressed Pointers
+
+instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
+ match(Set cr (CmpN src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(2);
+ format %{ "CLR $src1,$src2\t# (cOop)" %}
+ opcode(CLR_ZOPC);
+ ins_encode(z_rrform(src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
+ match(Set cr (CmpN src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "CLFI $src1,$src2\t# (cOop) compare immediate narrow" %}
+ ins_encode %{
+ AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
+ __ relocate(cOop.rspec(), 1);
+ __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
+ match(Set cr (CmpN src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "CLFI $src1,$src2\t# (NKlass) compare immediate narrow" %}
+ ins_encode %{
+ AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
+ __ relocate(NKlass.rspec(), 1);
+ __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
+ match(Set cr (CmpN src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(2);
+ format %{ "LTR $src1,$src2\t# (cOop) LTR because comparing against zero" %}
+ opcode(LTR_ZOPC);
+ ins_encode(z_rrform(src1, src1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------MemBar Instructions-----------------------------------------------
+
+// Memory barrier flavors
+
+instruct membar_acquire() %{
+ match(MemBarAcquire);
+ match(LoadFence);
+ ins_cost(4*MEMORY_REF_COST);
+ size(0);
+ format %{ "MEMBAR-acquire" %}
+ ins_encode %{ __ z_acquire(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_acquire_lock() %{
+ match(MemBarAcquireLock);
+ ins_cost(0);
+ size(0);
+ format %{ "MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_release() %{
+ match(MemBarRelease);
+ match(StoreFence);
+ ins_cost(4 * MEMORY_REF_COST);
+ size(0);
+ format %{ "MEMBAR-release" %}
+ ins_encode %{ __ z_release(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_release_lock() %{
+ match(MemBarReleaseLock);
+ ins_cost(0);
+ size(0);
+ format %{ "MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_volatile() %{
+ match(MemBarVolatile);
+ ins_cost(4 * MEMORY_REF_COST);
+ size(2);
+ format %{ "MEMBAR-volatile" %}
+ ins_encode %{ __ z_fence(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct unnecessary_membar_volatile() %{
+ match(MemBarVolatile);
+ predicate(Matcher::post_store_load_barrier(n));
+ ins_cost(0);
+ size(0);
+ format %{ "# MEMBAR-volatile (empty)" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_CPUOrder() %{
+ match(MemBarCPUOrder);
+ ins_cost(0);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "MEMBAR-CPUOrder (empty)" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+ size(0);
+ format %{ "MEMBAR-storestore (empty)" %}
+ ins_encode();
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Register Move Instructions-----------------------------------------
+instruct roundDouble_nop(regD dst) %{
+ match(Set dst (RoundDouble dst));
+ ins_cost(0);
+ // TODO: s390 port size(FIXED_SIZE);
+ // z/Architecture results are already "rounded" (i.e., normal-format IEEE).
+ ins_encode();
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct roundFloat_nop(regF dst) %{
+ match(Set dst (RoundFloat dst));
+ ins_cost(0);
+ // TODO: s390 port size(FIXED_SIZE);
+ // z/Architecture results are already "rounded" (i.e., normal-format IEEE).
+ ins_encode();
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Cast Long to Pointer for unsafe natives.
+instruct castX2P(iRegP dst, iRegL src) %{
+ match(Set dst (CastX2P src));
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "LGR $dst,$src\t # CastX2P" %}
+ ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Cast Pointer to Long for unsafe natives.
+instruct castP2X(iRegL dst, iRegP_N2P src) %{
+ match(Set dst (CastP2X src));
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "LGR $dst,$src\t # CastP2X" %}
+ ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct stfSSD(stackSlotD stkSlot, regD src) %{
+ // %%%% TODO: Tell the coalescer that this kind of node is a copy!
+ match(Set stkSlot src); // chain rule
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ " STD $src,$stkSlot\t # stk" %}
+ opcode(STD_ZOPC);
+ ins_encode(z_form_rt_mem(src, stkSlot));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct stfSSF(stackSlotF stkSlot, regF src) %{
+ // %%%% TODO: Tell the coalescer that this kind of node is a copy!
+ match(Set stkSlot src); // chain rule
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "STE $src,$stkSlot\t # stk" %}
+ opcode(STE_ZOPC);
+ ins_encode(z_form_rt_mem(src, stkSlot));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Conditional Move---------------------------------------------------
+
+instruct cmovN_reg(cmpOp cmp, flagsReg cr, iRegN dst, iRegN_P2N src) %{
+ match(Set dst (CMoveN (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveN,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_reg(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovN_imm(cmpOp cmp, flagsReg cr, iRegN dst, immN0 src) %{
+ match(Set dst (CMoveN (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveN,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_imm(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovI_reg(cmpOp cmp, flagsReg cr, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveI,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_reg(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovI_imm(cmpOp cmp, flagsReg cr, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveI,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_imm(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovP_reg(cmpOp cmp, flagsReg cr, iRegP dst, iRegP_N2P src) %{
+ match(Set dst (CMoveP (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveP,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_reg(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovP_imm(cmpOp cmp, flagsReg cr, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveP,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_imm(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovF_reg(cmpOpF cmp, flagsReg cr, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveF,$cmp $dst,$src" %}
+ ins_encode %{
+ // Don't emit code if operands are identical (same register).
+ if ($dst$$FloatRegister != $src$$FloatRegister) {
+ Label done;
+ __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done);
+ __ z_ler($dst$$FloatRegister, $src$$FloatRegister);
+ __ bind(done);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovD_reg(cmpOpF cmp, flagsReg cr, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveD,$cmp $dst,$src" %}
+ ins_encode %{
+ // Don't emit code if operands are identical (same register).
+ if ($dst$$FloatRegister != $src$$FloatRegister) {
+ Label done;
+ __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done);
+ __ z_ldr($dst$$FloatRegister, $src$$FloatRegister);
+ __ bind(done);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovL_reg(cmpOp cmp, flagsReg cr, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveL,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_reg(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmovL_imm(cmpOp cmp, flagsReg cr, iRegL dst, immL16 src) %{
+ match(Set dst (CMoveL (Binary cmp cr) (Binary dst src)));
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CMoveL,$cmp $dst,$src" %}
+ ins_encode(z_enc_cmov_imm(cmp,dst,src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(threadRegP dst) %{
+ match(Set dst (ThreadLocal));
+ ins_cost(0);
+ size(0);
+ ins_should_rematerialize(true);
+ format %{ "# $dst=ThreadLocal" %}
+ ins_encode(/* empty */);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct checkCastPP(iRegP dst) %{
+ match(Set dst (CheckCastPP dst));
+ size(0);
+ format %{ "# checkcastPP of $dst" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct castPP(iRegP dst) %{
+ match(Set dst (CastPP dst));
+ size(0);
+ format %{ "# castPP of $dst" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct castII(iRegI dst) %{
+ match(Set dst (CastII dst));
+ size(0);
+ format %{ "# castII of $dst" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Conditional_store--------------------------------------------------
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flags (EQ) on success.
+
+// Implement LoadPLocked. Must be ordered against changes of the memory location
+// by storePConditional.
+// Don't know whether this is ever used.
+instruct loadPLocked(iRegP dst, memory mem) %{
+ match(Set dst (LoadPLocked mem));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "LG $dst,$mem\t # LoadPLocked" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// As compareAndSwapP, but return flag register instead of boolean value in
+// int register.
+// This instruction is matched if UseTLAB is off. Needed to pass
+// option tests. Mem_ptr must be a memory operand, else this node
+// does not get Flag_needs_anti_dependence_check set by adlc. If this
+// is not set this node can be rematerialized which leads to errors.
+instruct storePConditional(indirect mem_ptr, rarg5RegP oldval, iRegP_N2P newval, flagsReg cr) %{
+ match(Set cr (StorePConditional mem_ptr (Binary oldval newval)));
+ effect(KILL oldval);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "storePConditional $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casL(oldval, newval, mem_ptr));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// As compareAndSwapL, but return flag register instead of boolean value in
+// int register.
+// Used by sun/misc/AtomicLongCSImpl.java. Mem_ptr must be a memory
+// operand, else this node does not get
+// Flag_needs_anti_dependence_check set by adlc. If this is not set
+// this node can be rematerialized which leads to errors.
+instruct storeLConditional(indirect mem_ptr, rarg5RegL oldval, iRegL newval, flagsReg cr) %{
+ match(Set cr (StoreLConditional mem_ptr (Binary oldval newval)));
+ effect(KILL oldval);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "storePConditional $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casL(oldval, newval, mem_ptr));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// No flag versions for CompareAndSwap{P,I,L,N} because matcher can't match them.
+
+instruct compareAndSwapI_bool(iRegP mem_ptr, rarg5RegI oldval, iRegI newval, iRegI res, flagsReg cr) %{
+ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+ size(16);
+ format %{ "$res = CompareAndSwapI $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casI(oldval, newval, mem_ptr),
+ z_enc_cctobool(res));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRegI res, flagsReg cr) %{
+ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+ size(18);
+ format %{ "$res = CompareAndSwapL $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casL(oldval, newval, mem_ptr),
+ z_enc_cctobool(res));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{
+ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+ size(18);
+ format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casL(oldval, newval, mem_ptr),
+ z_enc_cctobool(res));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{
+ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, USE_KILL oldval, KILL cr);
+ size(16);
+ format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
+ ins_encode(z_enc_casI(oldval, newval, mem_ptr),
+ z_enc_cctobool(res));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Atomic operations on memory (GetAndSet*, GetAndAdd*)---------------
+
+// Exploit: direct memory arithmetic
+// Prereqs: - instructions available
+// - instructions guarantee atomicity
+// - immediate operand to be added
+// - immediate operand is small enough (8-bit signed).
+// - result of instruction is not used
+instruct addI_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immI8 src, flagsReg cr) %{
+ match(Set dummy (GetAndAddI mem src));
+ effect(KILL cr);
+ predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used());
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "ASI [$mem],$src\t # GetAndAddI (atomic)" %}
+ opcode(ASI_ZOPC);
+ ins_encode(z_siyform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Fallback: direct memory arithmetic not available
+// Disadvantages: - CS-Loop required, very expensive.
+// - more code generated (26 to xx bytes vs. 6 bytes)
+instruct addI_mem_imm16_atomic(memoryRSY mem, iRegI dst, immI16 src, iRegI tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddI mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LGF $dst,[$mem]\n\t"
+ " AHIK $tmp,$dst,$src\n\t"
+ " CSY $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ int Isrc = $src$$constant;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lgf(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_ahik(Rtmp, Rdst, Isrc);
+ } else {
+ __ z_lr(Rtmp, Rdst);
+ __ z_ahi(Rtmp, Isrc);
+ }
+ // Swap into memory location.
+ __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_mem_imm32_atomic(memoryRSY mem, iRegI dst, immI src, iRegI tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddI mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+200*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LGF $dst,[$mem]\n\t"
+ " LGR $tmp,$dst\n\t"
+ " AFI $tmp,$src\n\t"
+ " CSY $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ int Isrc = $src$$constant;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lgf(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_lr(Rtmp, Rdst);
+ __ z_afi(Rtmp, Isrc);
+ // Swap into memory location.
+ __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_mem_reg_atomic(memoryRSY mem, iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddI mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LGF $dst,[$mem]\n\t"
+ " ARK $tmp,$dst,$src\n\t"
+ " CSY $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lgf(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_ark(Rtmp, Rdst, Rsrc);
+ } else {
+ __ z_lr(Rtmp, Rdst);
+ __ z_ar(Rtmp, Rsrc);
+ }
+ __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// Exploit: direct memory arithmetic
+// Prereqs: - instructions available
+// - instructions guarantee atomicity
+// - immediate operand to be added
+// - immediate operand is small enough (8-bit signed).
+// - result of instruction is not used
+instruct addL_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immL8 src, flagsReg cr) %{
+ match(Set dummy (GetAndAddL mem src));
+ effect(KILL cr);
+ predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used());
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "AGSI [$mem],$src\t # GetAndAddL (atomic)" %}
+ opcode(AGSI_ZOPC);
+ ins_encode(z_siyform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Fallback: direct memory arithmetic not available
+// Disadvantages: - CS-Loop required, very expensive.
+// - more code generated (26 to xx bytes vs. 6 bytes)
+instruct addL_mem_imm16_atomic(memoryRSY mem, iRegL dst, immL16 src, iRegL tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddL mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LG $dst,[$mem]\n\t"
+ " AGHIK $tmp,$dst,$src\n\t"
+ " CSG $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ int Isrc = $src$$constant;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lg(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_aghik(Rtmp, Rdst, Isrc);
+ } else {
+ __ z_lgr(Rtmp, Rdst);
+ __ z_aghi(Rtmp, Isrc);
+ }
+ __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_mem_imm32_atomic(memoryRSY mem, iRegL dst, immL32 src, iRegL tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddL mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LG $dst,[$mem]\n\t"
+ " LGR $tmp,$dst\n\t"
+ " AGFI $tmp,$src\n\t"
+ " CSG $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ int Isrc = $src$$constant;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lg(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_lgr(Rtmp, Rdst);
+ __ z_agfi(Rtmp, Isrc);
+ __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_mem_reg_atomic(memoryRSY mem, iRegL dst, iRegL src, iRegL tmp, flagsReg cr) %{
+ match(Set dst (GetAndAddL mem src));
+ effect(KILL cr, TEMP_DEF dst, TEMP tmp);
+ ins_cost(MEMORY_REF_COST+100*DEFAULT_COST);
+ format %{ "BEGIN ATOMIC {\n\t"
+ " LG $dst,[$mem]\n\t"
+ " AGRK $tmp,$dst,$src\n\t"
+ " CSG $dst,$tmp,$mem\n\t"
+ " retry if failed\n\t"
+ "} END ATOMIC"
+ %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rdst = $dst$$Register;
+ Register Rtmp = $tmp$$Register;
+ Label retry;
+
+ // Iterate until update with incremented value succeeds.
+ __ z_lg(Rdst, $mem$$Address); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_agrk(Rtmp, Rdst, Rsrc);
+ } else {
+ __ z_lgr(Rtmp, Rdst);
+ __ z_agr(Rtmp, Rsrc);
+ }
+ __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Increment value in memory, save old value in dst.
+instruct addI_mem_reg_atomic_z196(memoryRSY mem, iRegI dst, iRegI src) %{
+ match(Set dst (GetAndAddI mem src));
+ predicate(VM_Version::has_LoadAndALUAtomicV1());
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+ size(6);
+ format %{ "LAA $dst,$src,[$mem]" %}
+ ins_encode %{ __ z_laa($dst$$Register, $src$$Register, $mem$$Address); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Increment value in memory, save old value in dst.
+instruct addL_mem_reg_atomic_z196(memoryRSY mem, iRegL dst, iRegL src) %{
+ match(Set dst (GetAndAddL mem src));
+ predicate(VM_Version::has_LoadAndALUAtomicV1());
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+ size(6);
+ format %{ "LAAG $dst,$src,[$mem]" %}
+ ins_encode %{ __ z_laag($dst$$Register, $src$$Register, $mem$$Address); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+instruct xchgI_reg_mem(memoryRSY mem, iRegI dst, iRegI tmp, flagsReg cr) %{
+ match(Set dst (GetAndSetI mem dst));
+ effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+ format %{ "XCHGI $dst,[$mem]\t # EXCHANGE (int, atomic), temp $tmp" %}
+ ins_encode(z_enc_SwapI(mem, dst, tmp));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{
+ match(Set dst (GetAndSetL mem dst));
+ effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+ format %{ "XCHGL $dst,[$mem]\t # EXCHANGE (long, atomic), temp $tmp" %}
+ ins_encode(z_enc_SwapL(mem, dst, tmp));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
+ match(Set dst (GetAndSetN mem dst));
+ effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+ format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
+ ins_encode(z_enc_SwapI(mem, dst, tmp));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{
+ match(Set dst (GetAndSetP mem dst));
+ effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
+ format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
+ ins_encode(z_enc_SwapL(mem, dst, tmp));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Arithmetic Instructions--------------------------------------------
+
+// The rules are sorted by right operand type and operand length. Please keep
+// it that way.
+// Left operand type is always reg. Left operand len is I, L, P
+// Right operand type is reg, imm, mem. Right operand len is S, I, L, P
+// Special instruction formats, e.g. multi-operand, are inserted at the end.
+
+// ADD
+
+// REG = REG + REG
+
+// Register Addition
+instruct addI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (AddI dst src));
+ effect(KILL cr);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AR $dst,$src\t # int CISC ALU" %}
+ opcode(AR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (AddI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "ARK $dst,$src1,$src2\t # int RISC ALU" %}
+ opcode(ARK_ZOPC);
+ ins_encode(z_rrfform(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+// Avoid use of LA(Y) for general ALU operation.
+// Immediate Addition
+instruct addI_reg_imm16_CISC(iRegI dst, immI16 con, flagsReg cr) %{
+ match(Set dst (AddI dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AHI $dst,$con\t # int CISC ALU" %}
+ opcode(AHI_ZOPC);
+ ins_encode(z_riform_signed(dst, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+// Immediate Addition
+instruct addI_reg_imm16_RISC(iRegI dst, iRegI src, immI16 con, flagsReg cr) %{
+ match(Set dst (AddI src con));
+ effect(KILL cr);
+ predicate( VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AHIK $dst,$src,$con\t # int RISC ALU" %}
+ opcode(AHIK_ZOPC);
+ ins_encode(z_rieform_d(dst, src, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm32(iRegI dst, immI src, flagsReg cr) %{
+ match(Set dst (AddI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "AFI $dst,$src" %}
+ opcode(AFI_ZOPC);
+ ins_encode(z_rilform_signed(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm12(iRegI dst, iRegI src, uimmI12 con) %{
+ match(Set dst (AddI src con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con(,$src)\t # int d12(,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Addition
+instruct addI_reg_imm20(iRegI dst, iRegI src, immI20 con) %{
+ match(Set dst (AddI src con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LAY $dst,$con(,$src)\t # int d20(,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_reg_reg_imm12(iRegI dst, iRegI src1, iRegI src2, uimmI12 con) %{
+ match(Set dst (AddI (AddI src1 src2) con));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con($src1,$src2)\t # int d12(x,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addI_reg_reg_imm20(iRegI dst, iRegI src1, iRegI src2, immI20 con) %{
+ match(Set dst (AddI (AddI src1 src2) con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LAY $dst,$con($src1,$src2)\t # int d20(x,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + MEM
+
+instruct addI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+ match(Set dst (AddI dst (LoadI src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "A(Y) $dst, $src\t # int" %}
+ opcode(AY_ZOPC, A_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 4-byte memory operand and result
+instruct addI_mem_imm(memoryRSY mem, immI8 src, flagsReg cr) %{
+ match(Set mem (StoreI mem (AddI (LoadI mem) src)));
+ effect(KILL cr);
+ predicate(VM_Version::has_MemWithImmALUOps());
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "ASI $mem,$src\t # direct mem add 4" %}
+ opcode(ASI_ZOPC);
+ ins_encode(z_siyform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//
+
+// REG = REG + REG
+
+instruct addL_reg_regI(iRegL dst, iRegI src, flagsReg cr) %{
+ match(Set dst (AddL dst (ConvI2L src)));
+ effect(KILL cr);
+ size(4);
+ format %{ "AGFR $dst,$src\t # long<-int CISC ALU" %}
+ opcode(AGFR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{
+ match(Set dst (AddL dst src));
+ effect(KILL cr);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AGR $dst, $src\t # long CISC ALU" %}
+ opcode(AGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+ match(Set dst (AddL src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "AGRK $dst,$src1,$src2\t # long RISC ALU" %}
+ opcode(AGRK_ZOPC);
+ ins_encode(z_rrfform(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+instruct addL_reg_imm12(iRegL dst, iRegL src, uimmL12 con) %{
+ match(Set dst (AddL src con));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con(,$src)\t # long d12(,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_imm20(iRegL dst, iRegL src, immL20 con) %{
+ match(Set dst (AddL src con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LAY $dst,$con(,$src)\t # long d20(,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_imm32(iRegL dst, immL32 con, flagsReg cr) %{
+ match(Set dst (AddL dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "AGFI $dst,$con\t # long CISC ALU" %}
+ opcode(AGFI_ZOPC);
+ ins_encode(z_rilform_signed(dst, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_imm16_CISC(iRegL dst, immL16 con, flagsReg cr) %{
+ match(Set dst (AddL dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AGHI $dst,$con\t # long CISC ALU" %}
+ opcode(AGHI_ZOPC);
+ ins_encode(z_riform_signed(dst, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addL_reg_imm16_RISC(iRegL dst, iRegL src, immL16 con, flagsReg cr) %{
+ match(Set dst (AddL src con));
+ effect(KILL cr);
+ predicate( VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "AGHIK $dst,$src,$con\t # long RISC ALU" %}
+ opcode(AGHIK_ZOPC);
+ ins_encode(z_rieform_d(dst, src, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + MEM
+
+instruct addL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (AddL dst (ConvI2L (LoadI src))));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "AGF $dst, $src\t # long/int" %}
+ opcode(AGF_ZOPC, AGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (AddL dst (LoadL src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "AG $dst, $src\t # long" %}
+ opcode(AG_ZOPC, AG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_imm12(iRegL dst, iRegL src1, iRegL src2, uimmL12 con) %{
+ match(Set dst (AddL (AddL src1 src2) con));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con($src1,$src2)\t # long d12(x,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addL_reg_reg_imm20(iRegL dst, iRegL src1, iRegL src2, immL20 con) %{
+ match(Set dst (AddL (AddL src1 src2) con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LAY $dst,$con($src1,$src2)\t # long d20(x,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 8-byte memory operand and result.
+instruct addL_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
+ match(Set mem (StoreL mem (AddL (LoadL mem) src)));
+ effect(KILL cr);
+ predicate(VM_Version::has_MemWithImmALUOps());
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "AGSI $mem,$src\t # direct mem add 8" %}
+ opcode(AGSI_ZOPC);
+ ins_encode(z_siyform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// REG = REG + REG
+
+// Ptr Addition
+instruct addP_reg_reg_LA(iRegP dst, iRegP_N2P src1, iRegL src2) %{
+ match(Set dst (AddP src1 src2));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "LA $dst,#0($src1,$src2)\t # ptr 0(x,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg_reg(dst, 0x0, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Ptr Addition
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_reg_CISC(iRegP dst, iRegL src, flagsReg cr) %{
+ match(Set dst (AddP dst src));
+ effect(KILL cr);
+ predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "ALGR $dst,$src\t # ptr CICS ALU" %}
+ opcode(ALGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Ptr Addition
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_reg_RISC(iRegP dst, iRegP_N2P src1, iRegL src2, flagsReg cr) %{
+ match(Set dst (AddP src1 src2));
+ effect(KILL cr);
+ predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "ALGRK $dst,$src1,$src2\t # ptr RISC ALU" %}
+ opcode(ALGRK_ZOPC);
+ ins_encode(z_rrfform(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG + IMM
+
+instruct addP_reg_imm12(iRegP dst, iRegP_N2P src, uimmL12 con) %{
+ match(Set dst (AddP src con));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con(,$src)\t # ptr d12(,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_imm16_CISC(iRegP dst, immL16 src, flagsReg cr) %{
+ match(Set dst (AddP dst src));
+ effect(KILL cr);
+ predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AGHI $dst,$src\t # ptr CISC ALU" %}
+ opcode(AGHI_ZOPC);
+ ins_encode(z_riform_signed(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct addP_reg_imm16_RISC(iRegP dst, iRegP_N2P src, immL16 con, flagsReg cr) %{
+ match(Set dst (AddP src con));
+ effect(KILL cr);
+ predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "ALGHSIK $dst,$src,$con\t # ptr RISC ALU" %}
+ opcode(ALGHSIK_ZOPC);
+ ins_encode(z_rieform_d(dst, src, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_reg_imm20(iRegP dst, memoryRegP src, immL20 con) %{
+ match(Set dst (AddP src con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "LAY $dst,$con(,$src)\t # ptr d20(,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg(dst, con, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Pointer Immediate Addition
+instruct addP_reg_imm32(iRegP dst, immL32 src, flagsReg cr) %{
+ match(Set dst (AddP dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AGFI $dst,$src\t # ptr" %}
+ opcode(AGFI_ZOPC);
+ ins_encode(z_rilform_signed(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REG = REG1 + REG2 + IMM
+
+instruct addP_reg_reg_imm12(iRegP dst, memoryRegP src1, iRegL src2, uimmL12 con) %{
+ match(Set dst (AddP (AddP src1 src2) con));
+ predicate( PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_regN_reg_imm12(iRegP dst, iRegP_N2P src1, iRegL src2, uimmL12 con) %{
+ match(Set dst (AddP (AddP src1 src2) con));
+ predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
+ opcode(LA_ZOPC);
+ ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_reg_reg_imm20(iRegP dst, memoryRegP src1, iRegL src2, immL20 con) %{
+ match(Set dst (AddP (AddP src1 src2) con));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LAY $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) %{
+ match(Set dst (AddP (AddP src1 src2) con));
+ predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LAY $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
+ opcode(LAY_ZOPC);
+ ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// MEM = MEM + IMM
+
+// Add Immediate to 8-byte memory operand and result
+instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
+ match(Set mem (StoreP mem (AddP (LoadP mem) src)));
+ effect(KILL cr);
+ predicate(VM_Version::has_MemWithImmALUOps());
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %}
+ opcode(AGSI_ZOPC);
+ ins_encode(z_siyform(mem, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// SUB
+
+// Register Subtraction
+instruct subI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (SubI dst src));
+ effect(KILL cr);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "SR $dst,$src\t # int CISC ALU" %}
+ opcode(SR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (SubI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "SRK $dst,$src1,$src2\t # int RISC ALU" %}
+ opcode(SRK_ZOPC);
+ ins_encode(z_rrfform(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+ match(Set dst (SubI dst (LoadI src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "S(Y) $dst, $src\t # int" %}
+ opcode(SY_ZOPC, S_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subI_zero_reg(iRegI dst, immI_0 zero, iRegI src, flagsReg cr) %{
+ match(Set dst (SubI zero src));
+ effect(KILL cr);
+ size(2);
+ format %{ "NEG $dst, $src" %}
+ ins_encode %{ __ z_lcr($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+// Long subtraction
+instruct subL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{
+ match(Set dst (SubL dst src));
+ effect(KILL cr);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "SGR $dst,$src\t # int CISC ALU" %}
+ opcode(SGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Avoid use of LA(Y) for general ALU operation.
+instruct subL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+ match(Set dst (SubL src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_DistinctOpnds());
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "SGRK $dst,$src1,$src2\t # int RISC ALU" %}
+ opcode(SGRK_ZOPC);
+ ins_encode(z_rrfform(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_reg_regI_CISC(iRegL dst, iRegI src, flagsReg cr) %{
+ match(Set dst (SubL dst (ConvI2L src)));
+ effect(KILL cr);
+ size(4);
+ format %{ "SGFR $dst, $src\t # int CISC ALU" %}
+ opcode(SGFR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (SubL dst (ConvI2L (LoadI src))));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "SGF $dst, $src\t # long/int" %}
+ opcode(SGF_ZOPC, SGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (SubL dst (LoadL src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "SG $dst, $src\t # long" %}
+ opcode(SG_ZOPC, SG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Moved declaration of negL_reg_reg before encode nodes, where it is used.
+
+// MUL
+
+// Register Multiplication
+instruct mulI_reg_reg(iRegI dst, iRegI src) %{
+ match(Set dst (MulI dst src));
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "MSR $dst, $src" %}
+ opcode(MSR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Multiplication
+instruct mulI_reg_imm16(iRegI dst, immI16 con) %{
+ match(Set dst (MulI dst con));
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "MHI $dst,$con" %}
+ opcode(MHI_ZOPC);
+ ins_encode(z_riform_signed(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate (32bit) Multiplication
+instruct mulI_reg_imm32(iRegI dst, immI con) %{
+ match(Set dst (MulI dst con));
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "MSFI $dst,$con" %}
+ opcode(MSFI_ZOPC);
+ ins_encode(z_rilform_signed(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulI_Reg_mem(iRegI dst, memory src)%{
+ match(Set dst (MulI dst (LoadI src)));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MS(Y) $dst, $src\t # int" %}
+ opcode(MSY_ZOPC, MS_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+instruct mulL_reg_regI(iRegL dst, iRegI src) %{
+ match(Set dst (MulL dst (ConvI2L src)));
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "MSGFR $dst $src\t # long/int" %}
+ opcode(MSGFR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_reg_reg(iRegL dst, iRegL src) %{
+ match(Set dst (MulL dst src));
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "MSGR $dst $src\t # long" %}
+ opcode(MSGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Multiplication
+instruct mulL_reg_imm16(iRegL dst, immL16 src) %{
+ match(Set dst (MulL dst src));
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "MGHI $dst,$src\t # long" %}
+ opcode(MGHI_ZOPC);
+ ins_encode(z_riform_signed(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate (32bit) Multiplication
+instruct mulL_reg_imm32(iRegL dst, immL32 con) %{
+ match(Set dst (MulL dst con));
+ ins_cost(DEFAULT_COST);
+ size(6);
+ format %{ "MSGFI $dst,$con" %}
+ opcode(MSGFI_ZOPC);
+ ins_encode(z_rilform_signed(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_Reg_memI(iRegL dst, memory src)%{
+ match(Set dst (MulL dst (ConvI2L (LoadI src))));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "MSGF $dst, $src\t # long" %}
+ opcode(MSGF_ZOPC, MSGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulL_Reg_mem(iRegL dst, memory src)%{
+ match(Set dst (MulL dst (LoadL src)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "MSG $dst, $src\t # long" %}
+ opcode(MSG_ZOPC, MSG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// DIV
+
+// Integer DIVMOD with Register, both quotient and mod results
+instruct divModI_reg_divmod(roddRegI dst1src1, revenRegI dst2, noOdd_iRegI src2, flagsReg cr) %{
+ match(DivModI dst1src1 src2);
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(VM_Version::has_CompareBranch() ? 24 : 26);
+ format %{ "DIVMODI ($dst1src1, $dst2) $src2" %}
+ ins_encode %{
+ Register d1s1 = $dst1src1$$Register;
+ Register d2 = $dst2$$Register;
+ Register s2 = $src2$$Register;
+
+ assert_different_registers(d1s1, s2);
+
+ Label do_div, done_div;
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cij(s2, -1, Assembler::bcondNotEqual, do_div);
+ } else {
+ __ z_chi(s2, -1);
+ __ z_brne(do_div);
+ }
+ __ z_lcr(d1s1, d1s1);
+ __ clear_reg(d2, false, false);
+ __ z_bru(done_div);
+ __ bind(do_div);
+ __ z_lgfr(d1s1, d1s1);
+ __ z_dsgfr(d2, s2);
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// Register Division
+instruct divI_reg_reg(roddRegI dst, iRegI src1, noOdd_iRegI src2, revenRegI tmp, flagsReg cr) %{
+ match(Set dst (DivI src1 src2));
+ effect(KILL tmp, KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(VM_Version::has_CompareBranch() ? 20 : 22);
+ format %{ "DIV_checked $dst, $src1,$src2\t # treats special case 0x80../-1" %}
+ ins_encode %{
+ Register a = $src1$$Register;
+ Register b = $src2$$Register;
+ Register t = $dst$$Register;
+
+ assert_different_registers(t, b);
+
+ Label do_div, done_div;
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cij(b, -1, Assembler::bcondNotEqual, do_div);
+ } else {
+ __ z_chi(b, -1);
+ __ z_brne(do_div);
+ }
+ __ z_lcr(t, a);
+ __ z_bru(done_div);
+ __ bind(do_div);
+ __ z_lgfr(t, a);
+ __ z_dsgfr(t->predecessor()/* t is odd part of a register pair. */, b);
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Division
+instruct divI_reg_imm16(roddRegI dst, iRegI src1, immI16 src2, revenRegI tmp, flagsReg cr) %{
+ match(Set dst (DivI src1 src2));
+ effect(KILL tmp, KILL cr); // R0 is killed, too.
+ ins_cost(2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "DIV_const $dst,$src1,$src2" %}
+ ins_encode %{
+ // No sign extension of Rdividend needed here.
+ if ($src2$$constant != -1) {
+ __ z_lghi(Z_R0_scratch, $src2$$constant);
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ __ z_dsgfr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch);
+ } else {
+ __ z_lcr($dst$$Register, $src1$$Register);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Long DIVMOD with Register, both quotient and mod results
+instruct divModL_reg_divmod(roddRegL dst1src1, revenRegL dst2, iRegL src2, flagsReg cr) %{
+ match(DivModL dst1src1 src2);
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(VM_Version::has_CompareBranch() ? 22 : 24);
+ format %{ "DIVMODL ($dst1src1, $dst2) $src2" %}
+ ins_encode %{
+ Register d1s1 = $dst1src1$$Register;
+ Register d2 = $dst2$$Register;
+ Register s2 = $src2$$Register;
+
+ Label do_div, done_div;
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgij(s2, -1, Assembler::bcondNotEqual, do_div);
+ } else {
+ __ z_cghi(s2, -1);
+ __ z_brne(do_div);
+ }
+ __ z_lcgr(d1s1, d1s1);
+ // indicate unused result
+ (void) __ clear_reg(d2, true, false);
+ __ z_bru(done_div);
+ __ bind(do_div);
+ __ z_dsgr(d2, s2);
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Division
+instruct divL_reg_reg(roddRegL dst, iRegL src, revenRegL tmp, flagsReg cr) %{
+ match(Set dst (DivL dst src));
+ effect(KILL tmp, KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(VM_Version::has_CompareBranch() ? 18 : 20);
+ format %{ "DIVG_checked $dst, $src\t # long, treats special case 0x80../-1" %}
+ ins_encode %{
+ Register b = $src$$Register;
+ Register t = $dst$$Register;
+
+ Label done_div;
+ __ z_lcgr(t, t); // Does no harm. divisor is in other register.
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgij(b, -1, Assembler::bcondEqual, done_div);
+ } else {
+ __ z_cghi(b, -1);
+ __ z_bre(done_div);
+ }
+ __ z_lcgr(t, t); // Restore sign.
+ __ z_dsgr(t->predecessor()/* t is odd part of a register pair. */, b);
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Long Division
+instruct divL_reg_imm16(roddRegL dst, iRegL src1, immL16 src2, revenRegL tmp, flagsReg cr) %{
+ match(Set dst (DivL src1 src2));
+ effect(KILL tmp, KILL cr); // R0 is killed, too.
+ ins_cost(2 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "DIVG_const $dst,$src1,$src2\t # long" %}
+ ins_encode %{
+ if ($src2$$constant != -1) {
+ __ z_lghi(Z_R0_scratch, $src2$$constant);
+ __ lgr_if_needed($dst$$Register, $src1$$Register);
+ __ z_dsgr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch);
+ } else {
+ __ z_lcgr($dst$$Register, $src1$$Register);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// REM
+
+// Integer Remainder
+// Register Remainder
+instruct modI_reg_reg(revenRegI dst, iRegI src1, noOdd_iRegI src2, roddRegI tmp, flagsReg cr) %{
+ match(Set dst (ModI src1 src2));
+ effect(KILL tmp, KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MOD_checked $dst,$src1,$src2" %}
+ ins_encode %{
+ Register a = $src1$$Register;
+ Register b = $src2$$Register;
+ Register t = $dst$$Register;
+ assert_different_registers(t->successor(), b);
+
+ Label do_div, done_div;
+
+ if ((t->encoding() != b->encoding()) && (t->encoding() != a->encoding())) {
+ (void) __ clear_reg(t, true, false); // Does no harm. Operands are in other regs.
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cij(b, -1, Assembler::bcondEqual, done_div);
+ } else {
+ __ z_chi(b, -1);
+ __ z_bre(done_div);
+ }
+ __ z_lgfr(t->successor(), a);
+ __ z_dsgfr(t/* t is even part of a register pair. */, b);
+ } else {
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cij(b, -1, Assembler::bcondNotEqual, do_div);
+ } else {
+ __ z_chi(b, -1);
+ __ z_brne(do_div);
+ }
+ __ clear_reg(t, true, false);
+ __ z_bru(done_div);
+ __ bind(do_div);
+ __ z_lgfr(t->successor(), a);
+ __ z_dsgfr(t/* t is even part of a register pair. */, b);
+ }
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Remainder
+instruct modI_reg_imm16(revenRegI dst, iRegI src1, immI16 src2, roddRegI tmp, flagsReg cr) %{
+ match(Set dst (ModI src1 src2));
+ effect(KILL tmp, KILL cr); // R0 is killed, too.
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MOD_const $dst,src1,$src2" %}
+ ins_encode %{
+ assert_different_registers($dst$$Register, $src1$$Register);
+ assert_different_registers($dst$$Register->successor(), $src1$$Register);
+ int divisor = $src2$$constant;
+
+ if (divisor != -1) {
+ __ z_lghi(Z_R0_scratch, divisor);
+ __ z_lgfr($dst$$Register->successor(), $src1$$Register);
+ __ z_dsgfr($dst$$Register/* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
+ } else {
+ __ clear_reg($dst$$Register, true, false);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Remainder
+instruct modL_reg_reg(revenRegL dst, roddRegL src1, iRegL src2, flagsReg cr) %{
+ match(Set dst (ModL src1 src2));
+ effect(KILL src1, KILL cr); // R0 is killed, too.
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MODG_checked $dst,$src1,$src2" %}
+ ins_encode %{
+ Register a = $src1$$Register;
+ Register b = $src2$$Register;
+ Register t = $dst$$Register;
+ assert(t->successor() == a, "(t,a) is an even-odd pair" );
+
+ Label do_div, done_div;
+ if (t->encoding() != b->encoding()) {
+ (void) __ clear_reg(t, true, false); // Does no harm. Dividend is in successor.
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgij(b, -1, Assembler::bcondEqual, done_div);
+ } else {
+ __ z_cghi(b, -1);
+ __ z_bre(done_div);
+ }
+ __ z_dsgr(t, b);
+ } else {
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgij(b, -1, Assembler::bcondNotEqual, do_div);
+ } else {
+ __ z_cghi(b, -1);
+ __ z_brne(do_div);
+ }
+ __ clear_reg(t, true, false);
+ __ z_bru(done_div);
+ __ bind(do_div);
+ __ z_dsgr(t, b);
+ }
+ __ bind(done_div);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Long Remainder
+instruct modL_reg_imm16(revenRegL dst, iRegL src1, immL16 src2, roddRegL tmp, flagsReg cr) %{
+ match(Set dst (ModL src1 src2));
+ effect(KILL tmp, KILL cr); // R0 is killed, too.
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MODG_const $dst,src1,$src2\t # long" %}
+ ins_encode %{
+ int divisor = $src2$$constant;
+ if (divisor != -1) {
+ __ z_lghi(Z_R0_scratch, divisor);
+ __ z_lgr($dst$$Register->successor(), $src1$$Register);
+ __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
+ } else {
+ __ clear_reg($dst$$Register, true, false);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// SHIFT
+
+// Shift left logical
+
+// Register Shift Left variable
+instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
+ match(Set dst (LShiftI src nbits));
+ effect(KILL cr); // R1 is killed, too.
+ ins_cost(3 * DEFAULT_COST);
+ size(14);
+ format %{ "SLL $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
+ ins_encode %{
+ __ z_lgr(Z_R1_scratch, $nbits$$Register);
+ __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+ __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Immediate
+// Constant shift count is masked in ideal graph already.
+instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
+ match(Set dst (LShiftI src nbits));
+ size(6);
+ format %{ "SLL $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
+ ins_encode %{
+ int Nbit = $nbits$$constant;
+ __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Immediate by 1bit
+instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
+ match(Set dst (LShiftI src nbits));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %}
+ ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long
+instruct sllL_reg_reg(iRegL dst, iRegL src1, iRegI nbits) %{
+ match(Set dst (LShiftL src1 nbits));
+ size(6);
+ format %{ "SLLG $dst,$src1,[$nbits]" %}
+ opcode(SLLG_ZOPC);
+ ins_encode(z_rsyform_reg_reg(dst, src1, nbits));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long Immediate
+instruct sllL_reg_imm(iRegL dst, iRegL src1, immI nbits) %{
+ match(Set dst (LShiftL src1 nbits));
+ size(6);
+ format %{ "SLLG $dst,$src1,$nbits" %}
+ opcode(SLLG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src1, nbits));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Left Long Immediate by 1bit
+instruct sllL_reg_imm_1(iRegL dst, iRegL src1, immI_1 nbits) %{
+ match(Set dst (LShiftL src1 nbits));
+ predicate(PreferLAoverADD);
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LA $dst,#0($src1,$src1)\t # SLLG by 1 (long)" %}
+ ins_encode %{ __ z_la($dst$$Register, 0, $src1$$Register, $src1$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Shift right arithmetic
+
+// Register Arithmetic Shift Right
+instruct sraI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (RShiftI dst src));
+ effect(KILL cr); // R1 is killed, too.
+ ins_cost(3 * DEFAULT_COST);
+ size(12);
+ format %{ "SRA $dst,[$src] & 31" %}
+ ins_encode %{
+ __ z_lgr(Z_R1_scratch, $src$$Register);
+ __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+ __ z_sra($dst$$Register, 0, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Immediate
+// Constant shift count is masked in ideal graph already.
+instruct sraI_reg_imm(iRegI dst, immI src, flagsReg cr) %{
+ match(Set dst (RShiftI dst src));
+ effect(KILL cr);
+ size(4);
+ format %{ "SRA $dst,$src" %}
+ ins_encode %{
+ int Nbit = $src$$constant;
+ __ z_sra($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Long
+instruct sraL_reg_reg(iRegL dst, iRegL src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (RShiftL src1 src2));
+ effect(KILL cr);
+ size(6);
+ format %{ "SRAG $dst,$src1,[$src2]" %}
+ opcode(SRAG_ZOPC);
+ ins_encode(z_rsyform_reg_reg(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Arithmetic Shift Right Long Immediate
+instruct sraL_reg_imm(iRegL dst, iRegL src1, immI src2, flagsReg cr) %{
+ match(Set dst (RShiftL src1 src2));
+ effect(KILL cr);
+ size(6);
+ format %{ "SRAG $dst,$src1,$src2" %}
+ opcode(SRAG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Shift right logical
+
+// Register Shift Right
+instruct srlI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (URShiftI dst src));
+ effect(KILL cr); // R1 is killed, too.
+ ins_cost(3 * DEFAULT_COST);
+ size(12);
+ format %{ "SRL $dst,[$src] & 31" %}
+ ins_encode %{
+ __ z_lgr(Z_R1_scratch, $src$$Register);
+ __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
+ __ z_srl($dst$$Register, 0, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate
+// Constant shift count is masked in ideal graph already.
+instruct srlI_reg_imm(iRegI dst, immI src) %{
+ match(Set dst (URShiftI dst src));
+ size(4);
+ format %{ "SRL $dst,$src" %}
+ ins_encode %{
+ int Nbit = $src$$constant;
+ __ z_srl($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Long
+instruct srlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+ match(Set dst (URShiftL src1 src2));
+ size(6);
+ format %{ "SRLG $dst,$src1,[$src2]" %}
+ opcode(SRLG_ZOPC);
+ ins_encode(z_rsyform_reg_reg(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Long Immediate
+instruct srlL_reg_imm(iRegL dst, iRegL src1, immI src2) %{
+ match(Set dst (URShiftL src1 src2));
+ size(6);
+ format %{ "SRLG $dst,$src1,$src2" %}
+ opcode(SRLG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate with a CastP2X
+instruct srlP_reg_imm(iRegL dst, iRegP_N2P src1, immI src2) %{
+ match(Set dst (URShiftL (CastP2X src1) src2));
+ size(6);
+ format %{ "SRLG $dst,$src1,$src2\t # Cast ptr $src1 to long and shift" %}
+ opcode(SRLG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Rotate Instructions------------------------------------------------
+
+// Rotate left 32bit.
+instruct rotlI_reg_immI8(iRegI dst, iRegI src, immI8 lshift, immI8 rshift) %{
+ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
+ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+ size(6);
+ format %{ "RLL $dst,$src,$lshift\t # ROTL32" %}
+ opcode(RLL_ZOPC);
+ ins_encode(z_rsyform_const(dst, src, lshift));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate left 64bit.
+instruct rotlL_reg_immI8(iRegL dst, iRegL src, immI8 lshift, immI8 rshift) %{
+ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+ size(6);
+ format %{ "RLLG $dst,$src,$lshift\t # ROTL64" %}
+ opcode(RLLG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src, lshift));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate right 32bit.
+instruct rotrI_reg_immI8(iRegI dst, iRegI src, immI8 rshift, immI8 lshift) %{
+ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
+ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "RLL $dst,$src,$rshift\t # ROTR32" %}
+ opcode(RLL_ZOPC);
+ ins_encode(z_rsyform_const(dst, src, rshift));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Rotate right 64bit.
+instruct rotrL_reg_immI8(iRegL dst, iRegL src, immI8 rshift, immI8 lshift) %{
+ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "RLLG $dst,$src,$rshift\t # ROTR64" %}
+ opcode(RLLG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src, rshift));
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+ match(Set cr (OverflowAddI op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AR $op1,$op2\t # overflow check int" %}
+ ins_encode %{
+ __ z_lr(Z_R0_scratch, $op1$$Register);
+ __ z_ar(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+ match(Set cr (OverflowAddI op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "AR $op1,$op2\t # overflow check int" %}
+ ins_encode %{
+ __ load_const_optimized(Z_R0_scratch, $op2$$constant);
+ __ z_ar(Z_R0_scratch, $op1$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+ match(Set cr (OverflowAddL op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "AGR $op1,$op2\t # overflow check long" %}
+ ins_encode %{
+ __ z_lgr(Z_R0_scratch, $op1$$Register);
+ __ z_agr(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowAddL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
+ match(Set cr (OverflowAddL op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "AGR $op1,$op2\t # overflow check long" %}
+ ins_encode %{
+ __ load_const_optimized(Z_R0_scratch, $op2$$constant);
+ __ z_agr(Z_R0_scratch, $op1$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+ match(Set cr (OverflowSubI op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "SR $op1,$op2\t # overflow check int" %}
+ ins_encode %{
+ __ z_lr(Z_R0_scratch, $op1$$Register);
+ __ z_sr(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+ match(Set cr (OverflowSubI op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "SR $op1,$op2\t # overflow check int" %}
+ ins_encode %{
+ __ load_const_optimized(Z_R1_scratch, $op2$$constant);
+ __ z_lr(Z_R0_scratch, $op1$$Register);
+ __ z_sr(Z_R0_scratch, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+ match(Set cr (OverflowSubL op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "SGR $op1,$op2\t # overflow check long" %}
+ ins_encode %{
+ __ z_lgr(Z_R0_scratch, $op1$$Register);
+ __ z_sgr(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
+ match(Set cr (OverflowSubL op1 op2));
+ effect(DEF cr, USE op1, USE op2);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "SGR $op1,$op2\t # overflow check long" %}
+ ins_encode %{
+ __ load_const_optimized(Z_R1_scratch, $op2$$constant);
+ __ z_lgr(Z_R0_scratch, $op1$$Register);
+ __ z_sgr(Z_R0_scratch, Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
+ match(Set cr (OverflowSubI zero op2));
+ effect(DEF cr, USE op2);
+ format %{ "NEG $op2\t# overflow check int" %}
+ ins_encode %{
+ __ clear_reg(Z_R0_scratch, false, false);
+ __ z_sr(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
+ match(Set cr (OverflowSubL zero op2));
+ effect(DEF cr, USE op2);
+ format %{ "NEGG $op2\t# overflow check long" %}
+ ins_encode %{
+ __ clear_reg(Z_R0_scratch, true, false);
+ __ z_sgr(Z_R0_scratch, $op2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// No intrinsics for multiplication, since there is no easy way
+// to check for overflow.
+
+
+//----------Floating Point Arithmetic Instructions-----------------------------
+
+// ADD
+
+// Add float single precision
+instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
+ match(Set dst (AddF dst src));
+ effect(KILL cr);
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "AEBR $dst,$src" %}
+ opcode(AEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{
+ match(Set dst (AddF dst (LoadF src)));
+ effect(KILL cr);
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "AEB $dst,$src\t # floatMemory" %}
+ opcode(AEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Add float double precision
+instruct addD_reg_reg(regD dst, regD src, flagsReg cr) %{
+ match(Set dst (AddD dst src));
+ effect(KILL cr);
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "ADBR $dst,$src" %}
+ opcode(ADBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct addD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{
+ match(Set dst (AddD dst (LoadD src)));
+ effect(KILL cr);
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "ADB $dst,$src\t # doubleMemory" %}
+ opcode(ADB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// SUB
+
+// Sub float single precision
+instruct subF_reg_reg(regF dst, regF src, flagsReg cr) %{
+ match(Set dst (SubF dst src));
+ effect(KILL cr);
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "SEBR $dst,$src" %}
+ opcode(SEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{
+ match(Set dst (SubF dst (LoadF src)));
+ effect(KILL cr);
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "SEB $dst,$src\t # floatMemory" %}
+ opcode(SEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Sub float double precision
+instruct subD_reg_reg(regD dst, regD src, flagsReg cr) %{
+ match(Set dst (SubD dst src));
+ effect(KILL cr);
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "SDBR $dst,$src" %}
+ opcode(SDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct subD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{
+ match(Set dst (SubD dst (LoadD src)));
+ effect(KILL cr);
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "SDB $dst,$src\t # doubleMemory" %}
+ opcode(SDB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// MUL
+
+// Mul float single precision
+instruct mulF_reg_reg(regF dst, regF src) %{
+ match(Set dst (MulF dst src));
+ // CC unchanged by MUL.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "MEEBR $dst,$src" %}
+ opcode(MEEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulF_reg_mem(regF dst, memoryRX src)%{
+ match(Set dst (MulF dst (LoadF src)));
+ // CC unchanged by MUL.
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "MEEB $dst,$src\t # floatMemory" %}
+ opcode(MEEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Mul float double precision
+instruct mulD_reg_reg(regD dst, regD src) %{
+ match(Set dst (MulD dst src));
+ // CC unchanged by MUL.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "MDBR $dst,$src" %}
+ opcode(MDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct mulD_reg_mem(regD dst, memoryRX src)%{
+ match(Set dst (MulD dst (LoadD src)));
+ // CC unchanged by MUL.
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "MDB $dst,$src\t # doubleMemory" %}
+ opcode(MDB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// DIV
+
+// Div float single precision
+instruct divF_reg_reg(regF dst, regF src) %{
+ match(Set dst (DivF dst src));
+ // CC unchanged by DIV.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "DEBR $dst,$src" %}
+ opcode(DEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct divF_reg_mem(regF dst, memoryRX src)%{
+ match(Set dst (DivF dst (LoadF src)));
+ // CC unchanged by DIV.
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "DEB $dst,$src\t # floatMemory" %}
+ opcode(DEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Div float double precision
+instruct divD_reg_reg(regD dst, regD src) %{
+ match(Set dst (DivD dst src));
+ // CC unchanged by DIV.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "DDBR $dst,$src" %}
+ opcode(DDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct divD_reg_mem(regD dst, memoryRX src)%{
+ match(Set dst (DivD dst (LoadD src)));
+ // CC unchanged by DIV.
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "DDB $dst,$src\t # doubleMemory" %}
+ opcode(DDB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ABS
+
+// Absolute float single precision
+instruct absF_reg(regF dst, regF src, flagsReg cr) %{
+ match(Set dst (AbsF src));
+ effect(KILL cr);
+ size(4);
+ format %{ "LPEBR $dst,$src\t float" %}
+ opcode(LPEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Absolute float double precision
+instruct absD_reg(regD dst, regD src, flagsReg cr) %{
+ match(Set dst (AbsD src));
+ effect(KILL cr);
+ size(4);
+ format %{ "LPDBR $dst,$src\t double" %}
+ opcode(LPDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// NEG(ABS)
+
+// Negative absolute float single precision
+instruct nabsF_reg(regF dst, regF src, flagsReg cr) %{
+ match(Set dst (NegF (AbsF src)));
+ effect(KILL cr);
+ size(4);
+ format %{ "LNEBR $dst,$src\t float" %}
+ opcode(LNEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Negative absolute float double precision
+instruct nabsD_reg(regD dst, regD src, flagsReg cr) %{
+ match(Set dst (NegD (AbsD src)));
+ effect(KILL cr);
+ size(4);
+ format %{ "LNDBR $dst,$src\t double" %}
+ opcode(LNDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// NEG
+
+instruct negF_reg(regF dst, regF src, flagsReg cr) %{
+ match(Set dst (NegF src));
+ effect(KILL cr);
+ size(4);
+ format %{ "NegF $dst,$src\t float" %}
+ ins_encode %{ __ z_lcebr($dst$$FloatRegister, $src$$FloatRegister); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct negD_reg(regD dst, regD src, flagsReg cr) %{
+ match(Set dst (NegD src));
+ effect(KILL cr);
+ size(4);
+ format %{ "NegD $dst,$src\t double" %}
+ ins_encode %{ __ z_lcdbr($dst$$FloatRegister, $src$$FloatRegister); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// SQRT
+
+// Sqrt float precision
+instruct sqrtF_reg(regF dst, regF src) %{
+ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+ // CC remains unchanged.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "SQEBR $dst,$src" %}
+ opcode(SQEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Sqrt double precision
+instruct sqrtD_reg(regD dst, regD src) %{
+ match(Set dst (SqrtD src));
+ // CC remains unchanged.
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "SQDBR $dst,$src" %}
+ opcode(SQDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct sqrtF_mem(regF dst, memoryRX src) %{
+ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+ // CC remains unchanged.
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "SQEB $dst,$src\t # floatMemory" %}
+ opcode(SQEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct sqrtD_mem(regD dst, memoryRX src) %{
+ match(Set dst (SqrtD src));
+ // CC remains unchanged.
+ ins_cost(ALU_MEMORY_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "SQDB $dst,$src\t # doubleMemory" %}
+ opcode(SQDB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Logical Instructions-----------------------------------------------
+
+// Register And
+instruct andI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (AndI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ size(2);
+ format %{ "NR $dst,$src\t # int" %}
+ opcode(NR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+ match(Set dst (AndI dst (LoadI src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "N(Y) $dst, $src\t # int" %}
+ opcode(NY_ZOPC, N_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate And
+instruct andI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{
+ match(Set dst (AndI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "NILF $dst,$src" %}
+ opcode(NILF_ZOPC);
+ ins_encode(z_rilform_unsigned(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_reg_uimmI_LH1(iRegI dst, uimmI_LH1 src, flagsReg cr) %{
+ match(Set dst (AndI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NILH $dst,$src" %}
+ ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andI_reg_uimmI_LL1(iRegI dst, uimmI_LL1 src, flagsReg cr) %{
+ match(Set dst (AndI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NILL $dst,$src" %}
+ ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register And Long
+instruct andL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+ match(Set dst (AndL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NGR $dst,$src\t # long" %}
+ opcode(NGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (AndL dst (LoadL src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "NG $dst, $src\t # long" %}
+ opcode(NG_ZOPC, NG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_LL1(iRegL dst, uimmL_LL1 src, flagsReg cr) %{
+ match(Set dst (AndL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NILL $dst,$src\t # long" %}
+ ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_LH1(iRegL dst, uimmL_LH1 src, flagsReg cr) %{
+ match(Set dst (AndL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NILH $dst,$src\t # long" %}
+ ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_HL1(iRegL dst, uimmL_HL1 src, flagsReg cr) %{
+ match(Set dst (AndL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NIHL $dst,$src\t # long" %}
+ ins_encode %{ __ z_nihl($dst$$Register, ($src$$constant >> 32) & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct andL_reg_uimmL_HH1(iRegL dst, uimmL_HH1 src, flagsReg cr) %{
+ match(Set dst (AndL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "NIHH $dst,$src\t # long" %}
+ ins_encode %{ __ z_nihh($dst$$Register, ($src$$constant >> 48) & 0xFFFF); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// OR
+
+// Or Instructions
+// Register Or
+instruct orI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (OrI dst src));
+ effect(KILL cr);
+ size(2);
+ format %{ "OR $dst,$src" %}
+ opcode(OR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct orI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+ match(Set dst (OrI dst (LoadI src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "O(Y) $dst, $src\t # int" %}
+ opcode(OY_ZOPC, O_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Or
+instruct orI_reg_uimm16(iRegI dst, uimmI16 con, flagsReg cr) %{
+ match(Set dst (OrI dst con));
+ effect(KILL cr);
+ size(4);
+ format %{ "OILL $dst,$con" %}
+ opcode(OILL_ZOPC);
+ ins_encode(z_riform_unsigned(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct orI_reg_uimm32(iRegI dst, uimmI con, flagsReg cr) %{
+ match(Set dst (OrI dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "OILF $dst,$con" %}
+ opcode(OILF_ZOPC);
+ ins_encode(z_rilform_unsigned(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Or Long
+instruct orL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+ match(Set dst (OrL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "OGR $dst,$src\t # long" %}
+ opcode(OGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct orL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (OrL dst (LoadL src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "OG $dst, $src\t # long" %}
+ opcode(OG_ZOPC, OG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Or long
+instruct orL_reg_uimm16(iRegL dst, uimmL16 con, flagsReg cr) %{
+ match(Set dst (OrL dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "OILL $dst,$con\t # long" %}
+ opcode(OILL_ZOPC);
+ ins_encode(z_riform_unsigned(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct orL_reg_uimm32(iRegI dst, uimmL32 con, flagsReg cr) %{
+ match(Set dst (OrI dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "OILF $dst,$con\t # long" %}
+ opcode(OILF_ZOPC);
+ ins_encode(z_rilform_unsigned(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// XOR
+
+// Register Xor
+instruct xorI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (XorI dst src));
+ effect(KILL cr);
+ size(2);
+ format %{ "XR $dst,$src" %}
+ opcode(XR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct xorI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{
+ match(Set dst (XorI dst (LoadI src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "X(Y) $dst, $src\t # int" %}
+ opcode(XY_ZOPC, X_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Xor
+instruct xorI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{
+ match(Set dst (XorI dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "XILF $dst,$src" %}
+ opcode(XILF_ZOPC);
+ ins_encode(z_rilform_unsigned(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{
+ match(Set dst (XorL dst src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "XGR $dst,$src\t # long" %}
+ opcode(XGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct xorL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set dst (XorL dst (LoadL src)));
+ effect(KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "XG $dst, $src\t # long" %}
+ opcode(XG_ZOPC, XG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Immediate Xor Long
+instruct xorL_reg_uimm32(iRegL dst, uimmL32 con, flagsReg cr) %{
+ match(Set dst (XorL dst con));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_HIGH);
+ size(6);
+ format %{ "XILF $dst,$con\t # long" %}
+ opcode(XILF_ZOPC);
+ ins_encode(z_rilform_unsigned(dst,con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Convert to Boolean-------------------------------------------------
+
+// Convert integer to boolean.
+instruct convI2B(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (Conv2B src));
+ effect(KILL cr);
+ ins_cost(3 * DEFAULT_COST);
+ size(6);
+ format %{ "convI2B $dst,$src" %}
+ ins_encode %{
+ __ z_lnr($dst$$Register, $src$$Register); // Rdst := -|Rsrc|, i.e. Rdst == 0 <=> Rsrc == 0
+ __ z_srl($dst$$Register, 31); // Rdst := sign(Rdest)
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convP2B(iRegI dst, iRegP_N2P src, flagsReg cr) %{
+ match(Set dst (Conv2B src));
+ effect(KILL cr);
+ ins_cost(3 * DEFAULT_COST);
+ size(10);
+ format %{ "convP2B $dst,$src" %}
+ ins_encode %{
+ __ z_lngr($dst$$Register, $src$$Register); // Rdst := -|Rsrc| i.e. Rdst == 0 <=> Rsrc == 0
+ __ z_srlg($dst$$Register, $dst$$Register, 63); // Rdst := sign(Rdest)
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpLTMask_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (CmpLTMask dst src));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST);
+ size(18);
+ format %{ "Set $dst CmpLTMask $dst,$src" %}
+ ins_encode %{
+ // Avoid signed 32 bit overflow: Do sign extend and sub 64 bit.
+ __ z_lgfr(Z_R0_scratch, $src$$Register);
+ __ z_lgfr($dst$$Register, $dst$$Register);
+ __ z_sgr($dst$$Register, Z_R0_scratch);
+ __ z_srag($dst$$Register, $dst$$Register, 63);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpLTMask_reg_zero(iRegI dst, immI_0 zero, flagsReg cr) %{
+ match(Set dst (CmpLTMask dst zero));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "Set $dst CmpLTMask $dst,$zero" %}
+ ins_encode %{ __ z_sra($dst$$Register, 31); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------Arithmetic Conversion Instructions---------------------------------
+// The conversions operations are all Alpha sorted. Please keep it that way!
+
+instruct convD2F_reg(regF dst, regD src) %{
+ match(Set dst (ConvD2F src));
+ // CC remains unchanged.
+ size(4);
+ format %{ "LEDBR $dst,$src" %}
+ opcode(LEDBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2I_reg(iRegI dst, regF src, flagsReg cr) %{
+ match(Set dst (ConvF2I src));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(16);
+ format %{ "convF2I $dst,$src" %}
+ ins_encode %{
+ Label done;
+ __ clear_reg($dst$$Register, false, false); // Initialize with result for unordered: 0.
+ __ z_cebr($src$$FloatRegister, $src$$FloatRegister); // Round.
+ __ z_brno(done); // Result is zero if unordered argument.
+ __ z_cfebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convD2I_reg(iRegI dst, regD src, flagsReg cr) %{
+ match(Set dst (ConvD2I src));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(16);
+ format %{ "convD2I $dst,$src" %}
+ ins_encode %{
+ Label done;
+ __ clear_reg($dst$$Register, false, false); // Initialize with result for unordered: 0.
+ __ z_cdbr($src$$FloatRegister, $src$$FloatRegister); // Round.
+ __ z_brno(done); // Result is zero if unordered argument.
+ __ z_cfdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2L_reg(iRegL dst, regF src, flagsReg cr) %{
+ match(Set dst (ConvF2L src));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(16);
+ format %{ "convF2L $dst,$src" %}
+ ins_encode %{
+ Label done;
+ __ clear_reg($dst$$Register, true, false); // Initialize with result for unordered: 0.
+ __ z_cebr($src$$FloatRegister, $src$$FloatRegister); // Round.
+ __ z_brno(done); // Result is zero if unordered argument.
+ __ z_cgebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convD2L_reg(iRegL dst, regD src, flagsReg cr) %{
+ match(Set dst (ConvD2L src));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ size(16);
+ format %{ "convD2L $dst,$src" %}
+ ins_encode %{
+ Label done;
+ __ clear_reg($dst$$Register, true, false); // Initialize with result for unordered: 0.
+ __ z_cdbr($src$$FloatRegister, $src$$FloatRegister); // Round.
+ __ z_brno(done); // Result is zero if unordered argument.
+ __ z_cgdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
+ match(Set dst (ConvF2D src));
+ // CC remains unchanged.
+ size(4);
+ format %{ "LDEBR $dst,$src" %}
+ opcode(LDEBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convF2D_mem(regD dst, memoryRX src) %{
+ match(Set dst (ConvF2D src));
+ // CC remains unchanged.
+ size(6);
+ format %{ "LDEB $dst,$src" %}
+ opcode(LDEB_ZOPC);
+ ins_encode(z_form_rt_memFP(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convI2D_reg(regD dst, iRegI src) %{
+ match(Set dst (ConvI2D src));
+ // CC remains unchanged.
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "CDFBR $dst,$src" %}
+ opcode(CDFBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Optimization that saves up to two memory operations for each conversion.
+instruct convI2F_ireg(regF dst, iRegI src) %{
+ match(Set dst (ConvI2F src));
+ // CC remains unchanged.
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "CEFBR $dst,$src\t # convert int to float" %}
+ opcode(CEFBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convI2L_reg(iRegL dst, iRegI src) %{
+ match(Set dst (ConvI2L src));
+ size(4);
+ format %{ "LGFR $dst,$src\t # int->long" %}
+ opcode(LGFR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend convert int to long.
+instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask) %{
+ match(Set dst (AndL (ConvI2L src) mask));
+ size(4);
+ format %{ "LLGFR $dst, $src \t # zero-extend int to long" %}
+ ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend convert int to long.
+instruct convI2L_mem_zex(iRegL dst, memory src, immL_32bits mask) %{
+ match(Set dst (AndL (ConvI2L (LoadI src)) mask));
+ // Uses load_const_optmized, so size can vary.
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "LLGF $dst, $src \t # zero-extend int to long" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Zero-extend long
+instruct zeroExtend_long(iRegL dst, iRegL src, immL_32bits mask) %{
+ match(Set dst (AndL src mask));
+ size(4);
+ format %{ "LLGFR $dst, $src \t # zero-extend long to long" %}
+ ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct rShiftI16_lShiftI16_reg(iRegI dst, iRegI src, immI_16 amount) %{
+ match(Set dst (RShiftI (LShiftI src amount) amount));
+ size(4);
+ format %{ "LHR $dst,$src\t short->int" %}
+ opcode(LHR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct rShiftI24_lShiftI24_reg(iRegI dst, iRegI src, immI_24 amount) %{
+ match(Set dst (RShiftI (LShiftI src amount) amount));
+ size(4);
+ format %{ "LBR $dst,$src\t byte->int" %}
+ opcode(LBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{
+ match(Set dst (MoveF2I src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "L $dst,$src\t # MoveF2I" %}
+ opcode(L_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// javax.imageio.stream.ImageInputStreamImpl.toFloats([B[FII)
+instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+ match(Set dst (MoveI2F src));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LE $dst,$src\t # MoveI2F" %}
+ opcode(LE_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{
+ match(Set dst (MoveD2L src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "LG $src,$dst\t # MoveD2L" %}
+ opcode(LG_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+ match(Set dst (MoveL2D src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "LD $dst,$src\t # MoveL2D" %}
+ opcode(LD_ZOPC);
+ ins_encode(z_form_rt_mem(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+ match(Set dst (MoveI2F src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "ST $src,$dst\t # MoveI2F" %}
+ opcode(ST_ZOPC);
+ ins_encode(z_form_rt_mem(src, dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+ match(Set dst (MoveD2L src));
+ effect(DEF dst, USE src);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STD $src,$dst\t # MoveD2L" %}
+ opcode(STD_ZOPC);
+ ins_encode(z_form_rt_mem(src,dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+ match(Set dst (MoveL2D src));
+ ins_cost(MEMORY_REF_COST);
+ size(6);
+ format %{ "STG $src,$dst\t # MoveL2D" %}
+ opcode(STG_ZOPC);
+ ins_encode(z_form_rt_mem(src,dst));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2F_reg(regF dst, iRegL src) %{
+ match(Set dst (ConvL2F src));
+ // CC remains unchanged.
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "CEGBR $dst,$src" %}
+ opcode(CEGBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2D_reg(regD dst, iRegL src) %{
+ match(Set dst (ConvL2D src));
+ // CC remains unchanged.
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "CDGBR $dst,$src" %}
+ opcode(CDGBR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct convL2I_reg(iRegI dst, iRegL src) %{
+ match(Set dst (ConvL2I src));
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "LR $dst,$src\t # long->int (if needed)" %}
+ ins_encode %{ __ lr_if_needed($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Register Shift Right Immediate
+instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt, flagsReg cr) %{
+ match(Set dst (ConvL2I (RShiftL src cnt)));
+ effect(KILL cr);
+ size(6);
+ format %{ "SRAG $dst,$src,$cnt" %}
+ opcode(SRAG_ZOPC);
+ ins_encode(z_rsyform_const(dst, src, cnt));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------TRAP based zero checks and range checks----------------------------
+
+// SIGTRAP based implicit range checks in compiled code.
+// A range check in the ideal world has one of the following shapes:
+// - (If le (CmpU length index)), (IfTrue throw exception)
+// - (If lt (CmpU index length)), (IfFalse throw exception)
+//
+// Match range check 'If le (CmpU length index)'
+instruct rangeCheck_iReg_uimmI16(cmpOpT cmp, iRegI length, uimmI16 index, label labl) %{
+ match(If cmp (CmpU length index));
+ effect(USE labl);
+ predicate(TrapBasedRangeChecks &&
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
+ PROB_UNLIKELY(_leaf->as_If ()->_prob) >= PROB_ALWAYS &&
+ Matcher::branches_to_uncommon_trap(_leaf));
+ ins_cost(1);
+ // TODO: s390 port size(FIXED_SIZE);
+
+ ins_is_TrapBasedCheckNode(true);
+
+ format %{ "RangeCheck len=$length cmp=$cmp idx=$index => trap $labl" %}
+ ins_encode %{ __ z_clfit($length$$Register, $index$$constant, $cmp$$cmpcode); %}
+ ins_pipe(pipe_class_trap);
+%}
+
+// Match range check 'If lt (CmpU index length)'
+instruct rangeCheck_iReg_iReg(cmpOpT cmp, iRegI index, iRegI length, label labl, flagsReg cr) %{
+ match(If cmp (CmpU index length));
+ effect(USE labl, KILL cr);
+ predicate(TrapBasedRangeChecks &&
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
+ _leaf->as_If ()->_prob >= PROB_ALWAYS &&
+ Matcher::branches_to_uncommon_trap(_leaf));
+ ins_cost(1);
+ // TODO: s390 port size(FIXED_SIZE);
+
+ ins_is_TrapBasedCheckNode(true);
+
+ format %{ "RangeCheck idx=$index cmp=$cmp len=$length => trap $labl" %}
+ ins_encode %{ __ z_clrt($index$$Register, $length$$Register, $cmp$$cmpcode); %}
+ ins_pipe(pipe_class_trap);
+%}
+
+// Match range check 'If lt (CmpU index length)'
+instruct rangeCheck_uimmI16_iReg(cmpOpT cmp, iRegI index, uimmI16 length, label labl) %{
+ match(If cmp (CmpU index length));
+ effect(USE labl);
+ predicate(TrapBasedRangeChecks &&
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
+ _leaf->as_If ()->_prob >= PROB_ALWAYS &&
+ Matcher::branches_to_uncommon_trap(_leaf));
+ ins_cost(1);
+ // TODO: s390 port size(FIXED_SIZE);
+
+ ins_is_TrapBasedCheckNode(true);
+
+ format %{ "RangeCheck idx=$index cmp=$cmp len= $length => trap $labl" %}
+ ins_encode %{ __ z_clfit($index$$Register, $length$$constant, $cmp$$cmpcode); %}
+ ins_pipe(pipe_class_trap);
+%}
+
+// Implicit zero checks (more implicit null checks).
+instruct zeroCheckP_iReg_imm0(cmpOpT cmp, iRegP_N2P value, immP0 zero, label labl) %{
+ match(If cmp (CmpP value zero));
+ effect(USE labl);
+ predicate(TrapBasedNullChecks &&
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
+ _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) &&
+ Matcher::branches_to_uncommon_trap(_leaf));
+ size(6);
+
+ ins_is_TrapBasedCheckNode(true);
+
+ format %{ "ZeroCheckP value=$value cmp=$cmp zero=$zero => trap $labl" %}
+ ins_encode %{ __ z_cgit($value$$Register, 0, $cmp$$cmpcode); %}
+ ins_pipe(pipe_class_trap);
+%}
+
+// Implicit zero checks (more implicit null checks).
+instruct zeroCheckN_iReg_imm0(cmpOpT cmp, iRegN_P2N value, immN0 zero, label labl) %{
+ match(If cmp (CmpN value zero));
+ effect(USE labl);
+ predicate(TrapBasedNullChecks &&
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
+ _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) &&
+ Matcher::branches_to_uncommon_trap(_leaf));
+ size(6);
+
+ ins_is_TrapBasedCheckNode(true);
+
+ format %{ "ZeroCheckN value=$value cmp=$cmp zero=$zero => trap $labl" %}
+ ins_encode %{ __ z_cit($value$$Register, 0, $cmp$$cmpcode); %}
+ ins_pipe(pipe_class_trap);
+%}
+
+//----------Compare instructions-----------------------------------------------
+
+// INT signed
+
+// Compare Integers
+instruct compI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+ match(Set cr (CmpI op1 op2));
+ size(2);
+ format %{ "CR $op1,$op2" %}
+ opcode(CR_ZOPC);
+ ins_encode(z_rrform(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{
+ match(Set cr (CmpI op1 op2));
+ size(6);
+ format %{ "CFI $op1,$op2" %}
+ opcode(CFI_ZOPC);
+ ins_encode(z_rilform_signed(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm16(flagsReg cr, iRegI op1, immI16 op2) %{
+ match(Set cr (CmpI op1 op2));
+ size(4);
+ format %{ "CHI $op1,$op2" %}
+ opcode(CHI_ZOPC);
+ ins_encode(z_riform_signed(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{
+ match(Set cr (CmpI op1 zero));
+ ins_cost(DEFAULT_COST_LOW);
+ size(2);
+ format %{ "LTR $op1,$op1" %}
+ opcode(LTR_ZOPC);
+ ins_encode(z_rrform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compI_reg_mem(flagsReg cr, iRegI op1, memory op2)%{
+ match(Set cr (CmpI op1 (LoadI op2)));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "C(Y) $op1, $op2\t # int" %}
+ opcode(CY_ZOPC, C_ZOPC);
+ ins_encode(z_form_rt_mem_opt(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// INT unsigned
+
+instruct compU_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{
+ match(Set cr (CmpU op1 op2));
+ size(2);
+ format %{ "CLR $op1,$op2\t # unsigned" %}
+ opcode(CLR_ZOPC);
+ ins_encode(z_rrform(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_uimm(flagsReg cr, iRegI op1, uimmI op2) %{
+ match(Set cr (CmpU op1 op2));
+ size(6);
+ format %{ "CLFI $op1,$op2\t # unsigned" %}
+ opcode(CLFI_ZOPC);
+ ins_encode(z_rilform_unsigned(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{
+ match(Set cr (CmpU op1 zero));
+ ins_cost(DEFAULT_COST_LOW);
+ size(2);
+ format %{ "LTR $op1,$op1\t # unsigned" %}
+ opcode(LTR_ZOPC);
+ ins_encode(z_rrform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compU_reg_mem(flagsReg cr, iRegI op1, memory op2)%{
+ match(Set cr (CmpU op1 (LoadI op2)));
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CL(Y) $op1, $op2\t # unsigned" %}
+ opcode(CLY_ZOPC, CL_ZOPC);
+ ins_encode(z_form_rt_mem_opt(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// LONG signed
+
+instruct compL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{
+ match(Set cr (CmpL op1 op2));
+ size(4);
+ format %{ "CGR $op1,$op2\t # long" %}
+ opcode(CGR_ZOPC);
+ ins_encode(z_rreform(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_regI(flagsReg cr, iRegL op1, iRegI op2) %{
+ match(Set cr (CmpL op1 (ConvI2L op2)));
+ size(4);
+ format %{ "CGFR $op1,$op2\t # long/int" %}
+ opcode(CGFR_ZOPC);
+ ins_encode(z_rreform(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm32(flagsReg cr, iRegL op1, immL32 con) %{
+ match(Set cr (CmpL op1 con));
+ size(6);
+ format %{ "CGFI $op1,$con" %}
+ opcode(CGFI_ZOPC);
+ ins_encode(z_rilform_signed(op1, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm16(flagsReg cr, iRegL op1, immL16 con) %{
+ match(Set cr (CmpL op1 con));
+ size(4);
+ format %{ "CGHI $op1,$con" %}
+ opcode(CGHI_ZOPC);
+ ins_encode(z_riform_signed(op1, con));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_imm0(flagsReg cr, iRegL op1, immL_0 con) %{
+ match(Set cr (CmpL op1 con));
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LTGR $op1,$op1" %}
+ opcode(LTGR_ZOPC);
+ ins_encode(z_rreform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_conv_reg_imm0(flagsReg cr, iRegI op1, immL_0 con) %{
+ match(Set cr (CmpL (ConvI2L op1) con));
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LTGFR $op1,$op1" %}
+ opcode(LTGFR_ZOPC);
+ ins_encode(z_rreform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_mem(iRegL dst, memory src, flagsReg cr)%{
+ match(Set cr (CmpL dst (LoadL src)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "CG $dst, $src\t # long" %}
+ opcode(CG_ZOPC, CG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compL_reg_memI(iRegL dst, memory src, flagsReg cr)%{
+ match(Set cr (CmpL dst (ConvI2L (LoadI src))));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "CGF $dst, $src\t # long/int" %}
+ opcode(CGF_ZOPC, CGF_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// LONG unsigned
+
+// PTR unsigned
+
+instruct compP_reg_reg(flagsReg cr, iRegP_N2P op1, iRegP_N2P op2) %{
+ match(Set cr (CmpP op1 op2));
+ size(4);
+ format %{ "CLGR $op1,$op2\t # ptr" %}
+ opcode(CLGR_ZOPC);
+ ins_encode(z_rreform(op1, op2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compP_reg_imm0(flagsReg cr, iRegP_N2P op1, immP0 op2) %{
+ match(Set cr (CmpP op1 op2));
+ ins_cost(DEFAULT_COST_LOW);
+ size(4);
+ format %{ "LTGR $op1, $op1\t # ptr" %}
+ opcode(LTGR_ZOPC);
+ ins_encode(z_rreform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Don't use LTGFR which performs sign extend.
+instruct compP_decode_reg_imm0(flagsReg cr, iRegN op1, immP0 op2) %{
+ match(Set cr (CmpP (DecodeN op1) op2));
+ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+ ins_cost(DEFAULT_COST_LOW);
+ size(2);
+ format %{ "LTR $op1, $op1\t # ptr" %}
+ opcode(LTR_ZOPC);
+ ins_encode(z_rrform(op1, op1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct compP_reg_mem(iRegP dst, memory src, flagsReg cr)%{
+ match(Set cr (CmpP dst (LoadP src)));
+ ins_cost(MEMORY_REF_COST);
+ size(Z_DISP3_SIZE);
+ format %{ "CLG $dst, $src\t # ptr" %}
+ opcode(CLG_ZOPC, CLG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Max and Min--------------------------------------------------------
+
+// Max Register with Register
+instruct z196_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) { // Rdst preset with src1.
+ __ z_cr(Rsrc1, Rsrc2); // Move src2 only if src1 is NotLow.
+ __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow);
+ } else if (Rdst == Rsrc2) { // Rdst preset with src2.
+ __ z_cr(Rsrc2, Rsrc1); // Move src1 only if src2 is NotLow.
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondNotLow);
+ } else {
+ // Rdst is disjoint from operands, move in either case.
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Min Register with Register.
+instruct z10_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI (z10 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+ Label done;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) {
+ __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done);
+ __ z_lgfr(Rdst, Rsrc2);
+ } else if (Rdst == Rsrc2) {
+ __ z_crj(Rsrc2, Rsrc1, Assembler::bcondLow, done);
+ __ z_lgfr(Rdst, Rsrc1);
+ } else {
+ __ z_lgfr(Rdst, Rsrc1);
+ __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done);
+ __ z_lgfr(Rdst, Rsrc2);
+ }
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(!VM_Version::has_CompareBranch());
+ ins_cost(3 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+ Label done;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) {
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_brl(done);
+ __ z_lgfr(Rdst, Rsrc2);
+ } else if (Rdst == Rsrc2) {
+ __ z_cr(Rsrc2, Rsrc1);
+ __ z_brl(done);
+ __ z_lgfr(Rdst, Rsrc1);
+ } else {
+ __ z_lgfr(Rdst, Rsrc1);
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_brl(done);
+ __ z_lgfr(Rdst, Rsrc2);
+ }
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI const32 (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ int Isrc2 = $src2$$constant;
+
+ if (Rdst == Rsrc1) {
+ __ load_const_optimized(Z_R0_scratch, Isrc2);
+ __ z_cfi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow);
+ } else {
+ __ load_const_optimized(Rdst, Isrc2);
+ __ z_cfi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI const32" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_cfi($src1$$Register, $src2$$constant);
+ __ z_brl(done);
+ __ z_lgfi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI const16 (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ int Isrc2 = $src2$$constant;
+
+ if (Rdst == Rsrc1) {
+ __ load_const_optimized(Z_R0_scratch, Isrc2);
+ __ z_chi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow);
+ } else {
+ __ load_const_optimized(Rdst, Isrc2);
+ __ z_chi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondLow);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI const16" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_chi($src1$$Register, $src2$$constant);
+ __ z_brl(done);
+ __ z_lghi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z10_minI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{
+ match(Set dst (MinI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MinI $dst $src1,$src2\t MinI const8 (z10 only)" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondLow, done);
+ __ z_lghi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Max Register with Register
+instruct z196_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) { // Rdst preset with src1.
+ __ z_cr(Rsrc1, Rsrc2); // Move src2 only if src1 is NotHigh.
+ __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh);
+ } else if (Rdst == Rsrc2) { // Rdst preset with src2.
+ __ z_cr(Rsrc2, Rsrc1); // Move src1 only if src2 is NotHigh.
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondNotHigh);
+ } else { // Rdst is disjoint from operands, move in either case.
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Max Register with Register
+instruct z10_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI (z10 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+ Label done;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) {
+ __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done);
+ __ z_lgfr(Rdst, Rsrc2);
+ } else if (Rdst == Rsrc2) {
+ __ z_crj(Rsrc2, Rsrc1, Assembler::bcondHigh, done);
+ __ z_lgfr(Rdst, Rsrc1);
+ } else {
+ __ z_lgfr(Rdst, Rsrc1);
+ __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done);
+ __ z_lgfr(Rdst, Rsrc2);
+ }
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(!VM_Version::has_CompareBranch());
+ ins_cost(3 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+ Label done;
+
+ if (Rsrc1 == Rsrc2) {
+ if (Rdst != Rsrc1) {
+ __ z_lgfr(Rdst, Rsrc1);
+ }
+ } else if (Rdst == Rsrc1) {
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_brh(done);
+ __ z_lgfr(Rdst, Rsrc2);
+ } else if (Rdst == Rsrc2) {
+ __ z_cr(Rsrc2, Rsrc1);
+ __ z_brh(done);
+ __ z_lgfr(Rdst, Rsrc1);
+ } else {
+ __ z_lgfr(Rdst, Rsrc1);
+ __ z_cr(Rsrc1, Rsrc2);
+ __ z_brh(done);
+ __ z_lgfr(Rdst, Rsrc2);
+ }
+
+ __ bind(done);
+ %}
+
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI const32 (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ int Isrc2 = $src2$$constant;
+
+ if (Rdst == Rsrc1) {
+ __ load_const_optimized(Z_R0_scratch, Isrc2);
+ __ z_cfi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh);
+ } else {
+ __ load_const_optimized(Rdst, Isrc2);
+ __ z_cfi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI const32" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_cfi($src1$$Register, $src2$$constant);
+ __ z_brh(done);
+ __ z_lgfi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z196_maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_LoadStoreConditional());
+ ins_cost(3 * DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI const16 (z196 only)" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ int Isrc2 = $src2$$constant;
+ if (Rdst == Rsrc1) {
+ __ load_const_optimized(Z_R0_scratch, Isrc2);
+ __ z_chi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh);
+ } else {
+ __ load_const_optimized(Rdst, Isrc2);
+ __ z_chi(Rsrc1, Isrc2);
+ __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ ins_cost(2 * DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI const16" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_chi($src1$$Register, $src2$$constant);
+ __ z_brh(done);
+ __ z_lghi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct z10_maxI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{
+ match(Set dst (MaxI src1 src2));
+ effect(KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(DEFAULT_COST + BRANCH_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MaxI $dst $src1,$src2\t MaxI const8" %}
+ ins_encode %{
+ Label done;
+ if ($dst$$Register != $src1$$Register) {
+ __ z_lgfr($dst$$Register, $src1$$Register);
+ }
+ __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondHigh, done);
+ __ z_lghi($dst$$Register, $src2$$constant);
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Abs---------------------------------------------------------------
+
+instruct absI_reg(iRegI dst, iRegI src, flagsReg cr) %{
+ match(Set dst (AbsI src));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LPR $dst, $src" %}
+ opcode(LPR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct negabsI_reg(iRegI dst, iRegI src, immI_0 zero, flagsReg cr) %{
+ match(Set dst (SubI zero (AbsI src)));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST_LOW);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LNR $dst, $src" %}
+ opcode(LNR_ZOPC);
+ ins_encode(z_rrform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Float Compares----------------------------------------------------
+
+// Compare floating, generate condition code.
+instruct cmpF_cc(flagsReg cr, regF src1, regF src2) %{
+ match(Set cr (CmpF src1 src2));
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "FCMPcc $src1,$src2\t # float" %}
+ ins_encode %{ __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_cc(flagsReg cr, regD src1, regD src2) %{
+ match(Set cr (CmpD src1 src2));
+ ins_cost(ALU_REG_COST);
+ size(4);
+ format %{ "FCMPcc $src1,$src2 \t # double" %}
+ ins_encode %{ __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpF_cc_mem(flagsReg cr, regF src1, memoryRX src2) %{
+ match(Set cr (CmpF src1 (LoadF src2)));
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "FCMPcc_mem $src1,$src2\t # floatMemory" %}
+ opcode(CEB_ZOPC);
+ ins_encode(z_form_rt_memFP(src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_cc_mem(flagsReg cr, regD src1, memoryRX src2) %{
+ match(Set cr (CmpD src1 (LoadD src2)));
+ ins_cost(ALU_MEMORY_COST);
+ size(6);
+ format %{ "DCMPcc_mem $src1,$src2\t # doubleMemory" %}
+ opcode(CDB_ZOPC);
+ ins_encode(z_form_rt_memFP(src1, src2));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Compare floating, generate condition code
+instruct cmpF0_cc(flagsReg cr, regF src1, immFpm0 src2) %{
+ match(Set cr (CmpF src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "LTEBR $src1,$src1\t # float" %}
+ opcode(LTEBR_ZOPC);
+ ins_encode(z_rreform(src1, src1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD0_cc(flagsReg cr, regD src1, immDpm0 src2) %{
+ match(Set cr (CmpD src1 src2));
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "LTDBR $src1,$src1 \t # double" %}
+ opcode(LTDBR_ZOPC);
+ ins_encode(z_rreform(src1, src1));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg cr) %{
+ match(Set dst (CmpF3 src1 src2));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+ size(24);
+ format %{ "CmpF3 $dst,$src1,$src2" %}
+ ins_encode %{
+ // compare registers
+ __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister);
+ // Convert condition code into -1,0,1, where
+ // -1 means unordered or less
+ // 0 means equal
+ // 1 means greater.
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi( $dst$$Register, 0);
+ __ z_locgr($dst$$Register, one, Assembler::bcondHigh);
+ __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered);
+ } else {
+ Label done;
+ __ clear_reg($dst$$Register, true, false);
+ __ z_bre(done);
+ __ z_lhi($dst$$Register, 1);
+ __ z_brh(done);
+ __ z_lhi($dst$$Register, -1);
+ __ bind(done);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg cr) %{
+ match(Set dst (CmpD3 src1 src2));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+ size(24);
+ format %{ "CmpD3 $dst,$src1,$src2" %}
+ ins_encode %{
+ // compare registers
+ __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister);
+ // Convert condition code into -1,0,1, where
+ // -1 means unordered or less
+ // 0 means equal
+ // 1 means greater.
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi( $dst$$Register, 0);
+ __ z_locgr($dst$$Register, one, Assembler::bcondHigh);
+ __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered);
+ } else {
+ Label done;
+ // indicate unused result
+ (void) __ clear_reg($dst$$Register, true, false);
+ __ z_bre(done);
+ __ z_lhi($dst$$Register, 1);
+ __ z_brh(done);
+ __ z_lhi($dst$$Register, -1);
+ __ bind(done);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------Branches---------------------------------------------------------
+// Jump
+
+// Direct Branch.
+instruct branch(label labl) %{
+ match(Goto);
+ effect(USE labl);
+ ins_cost(BRANCH_COST);
+ size(4);
+ format %{ "BRU $labl" %}
+ ins_encode(z_enc_bru(labl));
+ ins_pipe(pipe_class_dummy);
+ // If set to 1 this indicates that the current instruction is a
+ // short variant of a long branch. This avoids using this
+ // instruction in first-pass matching. It will then only be used in
+ // the `Shorten_branches' pass.
+ ins_short_branch(1);
+%}
+
+// Direct Branch.
+instruct branchFar(label labl) %{
+ match(Goto);
+ effect(USE labl);
+ ins_cost(BRANCH_COST);
+ size(6);
+ format %{ "BRUL $labl" %}
+ ins_encode(z_enc_brul(labl));
+ ins_pipe(pipe_class_dummy);
+ // This is not a short variant of a branch, but the long variant.
+ ins_short_branch(0);
+%}
+
+// Conditional Near Branch
+instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
+ // Same match rule as `branchConFar'.
+ match(If cmp cr);
+ effect(USE lbl);
+ ins_cost(BRANCH_COST);
+ size(4);
+ format %{ "branch_con_short,$cmp $cr, $lbl" %}
+ ins_encode(z_enc_branch_con_short(cmp, lbl));
+ ins_pipe(pipe_class_dummy);
+ // If set to 1 this indicates that the current instruction is a
+ // short variant of a long branch. This avoids using this
+ // instruction in first-pass matching. It will then only be used in
+ // the `Shorten_branches' pass.
+ ins_short_branch(1);
+%}
+
+// This is for cases when the z/Architecture conditional branch instruction
+// does not reach far enough. So we emit a far branch here, which is
+// more expensive.
+//
+// Conditional Far Branch
+instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
+ // Same match rule as `branchCon'.
+ match(If cmp cr);
+ effect(USE cr, USE lbl);
+ // Make more expensive to prefer compare_and_branch over separate instructions.
+ ins_cost(2 * BRANCH_COST);
+ size(6);
+ format %{ "branch_con_far,$cmp $cr, $lbl" %}
+ ins_encode(z_enc_branch_con_far(cmp, lbl));
+ ins_pipe(pipe_class_dummy);
+ // This is not a short variant of a branch, but the long variant..
+ ins_short_branch(0);
+%}
+
+instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
+ match(CountedLoopEnd cmp cr);
+ effect(USE labl);
+ ins_cost(BRANCH_COST);
+ size(4);
+ format %{ "branch_con_short,$cmp $labl\t # counted loop end" %}
+ ins_encode(z_enc_branch_con_short(cmp, labl));
+ ins_pipe(pipe_class_dummy);
+ // If set to 1 this indicates that the current instruction is a
+ // short variant of a long branch. This avoids using this
+ // instruction in first-pass matching. It will then only be used in
+ // the `Shorten_branches' pass.
+ ins_short_branch(1);
+%}
+
+instruct branchLoopEndFar(cmpOp cmp, flagsReg cr, label labl) %{
+ match(CountedLoopEnd cmp cr);
+ effect(USE labl);
+ ins_cost(BRANCH_COST);
+ size(6);
+ format %{ "branch_con_far,$cmp $labl\t # counted loop end" %}
+ ins_encode(z_enc_branch_con_far(cmp, labl));
+ ins_pipe(pipe_class_dummy);
+ // This is not a short variant of a branch, but the long variant.
+ ins_short_branch(0);
+%}
+
+//----------Compare and Branch (short distance)------------------------------
+
+// INT REG operands for loop counter processing.
+instruct testAndBranchLoopEnd_Reg(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(CountedLoopEnd boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end SHORT" %}
+ opcode(CRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// INT REG operands.
+instruct cmpb_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// Unsigned INT REG operands
+instruct cmpbU_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpU src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// LONG REG operands
+instruct cmpb_RegL(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpL src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CGRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// PTR REG operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegPP(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLGRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+instruct cmpb_RegNN(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLGRJ_ZOPC);
+ ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// INT REG/IMM operands for loop counter processing
+instruct testAndBranchLoopEnd_Imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+ match(CountedLoopEnd boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end SHORT" %}
+ opcode(CIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// INT REG/IMM operands
+instruct cmpb_RegI_imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// INT REG/IMM operands
+instruct cmpbU_RegI_imm(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpU src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// LONG REG/IMM operands
+instruct cmpb_RegL_imm(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpL src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CGIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// PTR REG-imm operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegP_immP(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLGIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+// Compare against zero only, do not mix N and P oops (encode/decode required).
+instruct cmpb_RegN_immP0(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLGIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+instruct cmpb_RegN_imm(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %}
+ opcode(CLGIJ_ZOPC);
+ ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(1);
+%}
+
+
+//----------Compare and Branch (far distance)------------------------------
+
+// INT REG operands for loop counter processing
+instruct testAndBranchLoopEnd_RegFar(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(CountedLoopEnd boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end FAR" %}
+ opcode(CR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// INT REG operands
+instruct cmpb_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// INT REG operands
+instruct cmpbU_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpU src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// LONG REG operands
+instruct cmpb_RegL_Far(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpL src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CGR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// PTR REG operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegPP_Far(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLGR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+instruct cmpb_RegNN_Far(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLGR_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// INT REG/IMM operands for loop counter processing
+instruct testAndBranchLoopEnd_ImmFar(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+ match(CountedLoopEnd boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end FAR" %}
+ opcode(CHI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// INT REG/IMM operands
+instruct cmpb_RegI_imm_Far(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpI src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CHI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// INT REG/IMM operands
+instruct cmpbU_RegI_imm_Far(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpU src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLFI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// LONG REG/IMM operands
+instruct cmpb_RegL_imm_Far(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpL src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CGHI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// PTR REG-imm operands
+
+// Separate rules for regular and narrow oops. ADLC can't recognize
+// rules with polymorphic operands to be sisters -> shorten_branches
+// will not shorten.
+
+instruct cmpb_RegP_immP_Far(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP src1 src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLGFI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// Compare against zero only, do not mix N and P oops (encode/decode required).
+instruct cmpb_RegN_immP0_Far(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) src2));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLGFI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+instruct cmpb_RegN_immN_Far(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{
+ match(If boolnode (CmpP (DecodeN src1) (DecodeN src2)));
+ effect(USE labl, KILL cr);
+ predicate(VM_Version::has_CompareBranch());
+ ins_cost(BRANCH_COST+DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %}
+ opcode(CLGFI_ZOPC, BRCL_ZOPC);
+ ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode));
+ ins_pipe(pipe_class_dummy);
+ ins_short_branch(0);
+%}
+
+// ============================================================================
+// Long Compare
+
+// Due to a shortcoming in the ADLC, it mixes up expressions like:
+// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
+// difference between 'Y' and '0L'. The tree-matches for the CmpI sections
+// are collapsed internally in the ADLC's dfa-gen code. The match for
+// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
+// foo match ends up with the wrong leaf. One fix is to not match both
+// reg-reg and reg-zero forms of long-compare. This is unfortunate because
+// both forms beat the trinary form of long-compare and both are very useful
+// on platforms which have few registers.
+
+// Manifest a CmpL3 result in an integer register. Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr) %{
+ match(Set dst (CmpL3 src1 src2));
+ effect(KILL cr);
+ ins_cost(DEFAULT_COST * 5 + BRANCH_COST);
+ size(24);
+ format %{ "CmpL3 $dst,$src1,$src2" %}
+ ins_encode %{
+ Label done;
+ // compare registers
+ __ z_cgr($src1$$Register, $src2$$Register);
+ // Convert condition code into -1,0,1, where
+ // -1 means less
+ // 0 means equal
+ // 1 means greater.
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi( $dst$$Register, 0);
+ __ z_locgr($dst$$Register, one, Assembler::bcondHigh);
+ __ z_locgr($dst$$Register, minus_one, Assembler::bcondLow);
+ } else {
+ __ clear_reg($dst$$Register, true, false);
+ __ z_bre(done);
+ __ z_lhi($dst$$Register, 1);
+ __ z_brh(done);
+ __ z_lhi($dst$$Register, -1);
+ }
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// Safepoint Instruction
+
+instruct safePoint() %{
+ match(SafePoint);
+ predicate(false);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "UNIMPLEMENTED Safepoint_ " %}
+ ins_encode(enc_unimplemented());
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct safePoint_poll(iRegP poll, flagsReg cr) %{
+ match(SafePoint poll);
+ effect(USE poll, KILL cr); // R0 is killed, too.
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "TM #0[,$poll],#111\t # Safepoint: poll for GC" %}
+ ins_encode %{
+ // Mark the code position where the load from the safepoint
+ // polling page was emitted as relocInfo::poll_type.
+ __ relocate(relocInfo::poll_type);
+ __ load_from_polling_page($poll$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+
+// Call Instructions
+
+// Call Java Static Instruction
+instruct CallStaticJavaDirect_dynTOC(method meth) %{
+ match(CallStaticJava);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CALL,static dynTOC $meth; ==> " %}
+ ins_encode( z_enc_java_static_call(meth) );
+ ins_pipe(pipe_class_dummy);
+ ins_alignment(2);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect_dynTOC(method meth) %{
+ match(CallDynamicJava);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "CALL,dynamic dynTOC $meth; ==> " %}
+ ins_encode(z_enc_java_dynamic_call(meth));
+ ins_pipe(pipe_class_dummy);
+ ins_alignment(2);
+%}
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+ match(CallRuntime);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ ins_num_consts(1);
+ ins_alignment(2);
+ format %{ "CALL,runtime" %}
+ ins_encode( z_enc_java_to_runtime_call(meth) );
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Call runtime without safepoint - same as CallRuntime
+instruct CallLeafDirect(method meth) %{
+ match(CallLeaf);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ ins_num_consts(1);
+ ins_alignment(2);
+ format %{ "CALL,runtime leaf $meth" %}
+ ins_encode( z_enc_java_to_runtime_call(meth) );
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Call runtime without safepoint - same as CallLeaf
+instruct CallLeafNoFPDirect(method meth) %{
+ match(CallLeafNoFP);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ ins_num_consts(1);
+ format %{ "CALL,runtime leaf nofp $meth" %}
+ ins_encode( z_enc_java_to_runtime_call(meth) );
+ ins_pipe(pipe_class_dummy);
+ ins_alignment(2);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
+ match(TailCall jump_target method_oop);
+ ins_cost(CALL_COST);
+ size(2);
+ format %{ "Jmp $jump_target\t# $method_oop holds method oop" %}
+ ins_encode %{ __ z_br($jump_target$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Return Instruction
+instruct Ret() %{
+ match(Return);
+ size(2);
+ format %{ "BR(Z_R14) // branch to link register" %}
+ ins_encode %{ __ z_br(Z_R14); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
+ match(TailJump jump_target ex_oop);
+ ins_cost(CALL_COST);
+ size(8);
+ format %{ "TailJump $jump_target" %}
+ ins_encode %{
+ __ z_lg(Z_ARG2/* issuing pc */, _z_abi(return_pc), Z_SP);
+ __ z_br($jump_target$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+instruct CreateException(rarg1RegP ex_oop) %{
+ match(Set ex_oop (CreateEx));
+ ins_cost(0);
+ size(0);
+ format %{ "# exception oop; no code emitted" %}
+ ins_encode(/*empty*/);
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException() %{
+ match(Rethrow);
+ ins_cost(CALL_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "Jmp rethrow_stub" %}
+ ins_encode %{
+ cbuf.set_insts_mark();
+ __ load_const_optimized(Z_R1_scratch, (address)OptoRuntime::rethrow_stub());
+ __ z_br(Z_R1_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+ match(Halt);
+ ins_cost(CALL_COST);
+ size(2);
+ format %{ "ILLTRAP; ShouldNotReachHere" %}
+ ins_encode %{ __ z_illtrap(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
+// array for an instance of the superklass. Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()). Return
+// not zero for a miss or zero for a hit. The encoding ALSO sets flags.
+instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc,
+ rarg4RegP scratch1, rarg5RegP scratch2) %{
+ match(Set index (PartialSubtypeCheck sub super));
+ effect(KILL pcc, KILL scratch1, KILL scratch2);
+ ins_cost(10 * DEFAULT_COST);
+ size(12);
+ format %{ " CALL PartialSubtypeCheck\n" %}
+ ins_encode %{
+ AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
+ __ load_const_optimized(Z_ARG4, stub_address);
+ __ z_basr(Z_R14, Z_ARG4);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero,
+ rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{
+ match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero));
+ effect(KILL scratch1, KILL scratch2, KILL index);
+ ins_cost(10 * DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "CALL PartialSubtypeCheck_vs_zero\n" %}
+ ins_encode %{
+ AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
+ __ load_const_optimized(Z_ARG4, stub_address);
+ __ z_basr(Z_R14, Z_ARG4);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{
+ match(Set pcc (FastLock oop box));
+ effect(TEMP tmp1, TEMP tmp2);
+ ins_cost(100);
+ // TODO: s390 port size(VARIABLE_SIZE); // Uses load_const_optimized.
+ format %{ "FASTLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %}
+ ins_encode %{ __ compiler_fast_lock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
+ UseBiasedLocking && !UseOptoBiasInlining); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct cmpFastUnlock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{
+ match(Set pcc (FastUnlock oop box));
+ effect(TEMP tmp1, TEMP tmp2);
+ ins_cost(100);
+ // TODO: s390 port size(FIXED_SIZE); // emitted code depends on UseBiasedLocking being on/off.
+ format %{ "FASTUNLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %}
+ ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
+ UseBiasedLocking && !UseOptoBiasInlining); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy, flagsReg cr) %{
+ match(Set dummy (ClearArray cnt base));
+ effect(KILL cr);
+ ins_cost(100);
+ // TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to varying #instructions.
+ format %{ "ClearArrayConst $cnt,$base" %}
+ ins_encode %{ __ Clear_Array_Const($cnt$$constant, $base$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+ match(Set dummy (ClearArray cnt base));
+ effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+ ins_cost(200);
+ // TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to optimized constant loader.
+ format %{ "ClearArrayConstBig $cnt,$base" %}
+ ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+ match(Set dummy (ClearArray cnt base));
+ effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+ ins_cost(300);
+ // TODO: s390 port size(FIXED_SIZE); // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
+ format %{ "ClearArrayVar $cnt,$base" %}
+ ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// ============================================================================
+// CompactStrings
+
+// String equals
+instruct string_equalsL(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(300);
+ format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
+ ins_encode %{
+ __ array_equals(false, $str1$$Register, $str2$$Register,
+ $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, true /* byte */);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equalsU(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(300);
+ format %{ "String Equals char[] $str1,$str2,$cnt -> $result" %}
+ ins_encode %{
+ __ array_equals(false, $str1$$Register, $str2$$Register,
+ $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, false /* byte */);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equals_imm(iRegP str1, iRegP str2, uimmI8 cnt, iRegI result, flagsReg cr) %{
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(KILL cr); // R0 is killed, too.
+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+ ins_cost(100);
+ format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
+ ins_encode %{
+ const int cnt_imm = $cnt$$constant;
+ if (cnt_imm) { __ z_clc(0, cnt_imm - 1, $str1$$Register, 0, $str2$$Register); }
+ __ z_lhi($result$$Register, 1);
+ if (cnt_imm) {
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ z_lhi(Z_R0_scratch, 0);
+ __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label Lskip;
+ __ z_bre(Lskip);
+ __ clear_reg($result$$Register);
+ __ bind(Lskip);
+ }
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_equalsC_imm(iRegP str1, iRegP str2, immI8 cnt, iRegI result, flagsReg cr) %{
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(KILL cr); // R0 is killed, too.
+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(100);
+ format %{ "String Equals $str1,$str2,$cnt -> $result" %}
+ ins_encode %{
+ const int cnt_imm = $cnt$$constant; // positive immI8 (7 bits used)
+ if (cnt_imm) { __ z_clc(0, (cnt_imm << 1) - 1, $str1$$Register, 0, $str2$$Register); }
+ __ z_lhi($result$$Register, 1);
+ if (cnt_imm) {
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ z_lhi(Z_R0_scratch, 0);
+ __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label Lskip;
+ __ z_bre(Lskip);
+ __ clear_reg($result$$Register);
+ __ bind(Lskip);
+ }
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Array equals
+instruct array_equalsB(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (AryEq ary1 ary2));
+ effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(300);
+ format %{ "Array Equals $ary1,$ary2 -> $result" %}
+ ins_encode %{
+ __ array_equals(true, $ary1$$Register, $ary2$$Register,
+ noreg, $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, true /* byte */);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct array_equalsC(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (AryEq ary1 ary2));
+ effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+ ins_cost(300);
+ format %{ "Array Equals $ary1,$ary2 -> $result" %}
+ ins_encode %{
+ __ array_equals(true, $ary1$$Register, $ary2$$Register,
+ noreg, $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, false /* byte */);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// String CompareTo
+instruct string_compareL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(300);
+ format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrCompNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(300);
+ format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareLU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+ ins_cost(300);
+ format %{ "String Compare byte[],char[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, StrIntrinsicNode::LU);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+ ins_cost(300);
+ format %{ "String Compare char[],byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
+ ins_encode %{
+ __ string_compare($str2$$Register, $str1$$Register,
+ $cnt2$$Register, $cnt1$$Register,
+ $oddReg$$Register, $evenReg$$Register,
+ $result$$Register, StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// String IndexOfChar
+instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ predicate(CompactStrings);
+ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ ins_cost(200);
+ format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+ ins_encode %{
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $ch$$Register, 0 /* unused, ch is in register */,
+ $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(200);
+ format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+ ins_encode %{
+ immPOper *needleOper = (immPOper *)$needle;
+ const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+ ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char *
+ jchar chr;
+#ifdef VM_LITTLE_ENDIAN
+ Unimplemented();
+#else
+ chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+ ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
+#endif
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ noreg, chr,
+ $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_L(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(200);
+ format %{ "String IndexOf L [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+ ins_encode %{
+ immPOper *needleOper = (immPOper *)$needle;
+ const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+ ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char *
+ jchar chr = (jchar)needle_values->element_value(0).as_byte();
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ noreg, chr,
+ $oddReg$$Register, $evenReg$$Register, true /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm1_UL(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+ ins_cost(200);
+ format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
+ ins_encode %{
+ immPOper *needleOper = (immPOper *)$needle;
+ const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+ ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char *
+ jchar chr = (jchar)needle_values->element_value(0).as_byte();
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ noreg, chr,
+ $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// String IndexOf
+instruct indexOf_imm_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+ effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(250);
+ format %{ "String IndexOf U [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, noreg, $needlecntImm$$constant,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+ effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(250);
+ format %{ "String IndexOf L [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, noreg, $needlecntImm$$constant,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_imm_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+ effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+ ins_cost(250);
+ format %{ "String IndexOf UL [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, noreg, $needlecntImm$$constant,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
+ ins_cost(300);
+ format %{ "String IndexOf U [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, $needlecnt$$Register, 0,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+ ins_cost(300);
+ format %{ "String IndexOf L [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, $needlecnt$$Register, 0,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct indexOf_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+ effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+ ins_cost(300);
+ format %{ "String IndexOf UL [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %}
+ ins_encode %{
+ __ string_indexof($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $needle$$Register, $needlecnt$$Register, 0,
+ $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// char[] to byte[] compression
+instruct string_compress(iRegP src, rarg5RegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+ match(Set result (StrCompressedCopy src (Binary dst len)));
+ effect(TEMP_DEF result, USE_KILL dst, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+ ins_cost(300);
+ format %{ "String Compress $src->$dst($len) -> $result" %}
+ ins_encode %{
+ __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
+ $evenReg$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// byte[] to char[] inflation. trot implementation is shorter, but slower than the unrolled icm(h) loop.
+//instruct string_inflate_trot(Universe dummy, iRegP src, revenRegP dst, roddRegI len, iRegI tmp, flagsReg cr) %{
+// match(Set dummy (StrInflatedCopy src (Binary dst len)));
+// effect(USE_KILL dst, USE_KILL len, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+// predicate(VM_Version::has_ETF2Enhancements());
+// ins_cost(300);
+// format %{ "String Inflate (trot) $dst,$src($len)" %}
+// ins_encode %{
+// __ string_inflate_trot($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
+// %}
+// ins_pipe(pipe_class_dummy);
+//%}
+
+// byte[] to char[] inflation
+instruct string_inflate(Universe dummy, rarg5RegP src, iRegP dst, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+ match(Set dummy (StrInflatedCopy src (Binary dst len)));
+ effect(USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+ ins_cost(300);
+ format %{ "String Inflate $src->$dst($len)" %}
+ ins_encode %{
+ __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $evenReg$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// StringCoding.java intrinsics
+instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
+ match(Set result (HasNegatives ary1 len));
+ effect(TEMP_DEF result, USE_KILL ary1, TEMP oddReg, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+ ins_cost(300);
+ format %{ "has negatives byte[] $ary1($len) -> $result" %}
+ ins_encode %{
+ __ has_negatives($result$$Register, $ary1$$Register, $len$$Register,
+ $oddReg$$Register, $evenReg$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(rarg5RegP src, iRegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, iRegI tmp2, flagsReg cr) %{
+ match(Set result (EncodeISOArray src (Binary dst len)));
+ effect(TEMP_DEF result, USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, TEMP tmp2, KILL cr); // R0, R1 are killed, too.
+ ins_cost(300);
+ format %{ "Encode array $src->$dst($len) -> $result" %}
+ ins_encode %{
+ __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
+ $evenReg$$Register, $tmp$$Register, $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch (root_instr_name [preceeding_instruction]*);
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+// [, ...]);
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace (instr_name([instruction_number.operand_name]*));
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser. An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+// match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+// match(Set dst (AddI dst src));
+// effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+// // increment preceeded by register-register move
+// peepmatch (incI_eReg movI);
+// // require that the destination register of the increment
+// // match the destination register of the move
+// peepconstraint (0.dst == 1.dst);
+// // construct a replacement instruction that sets
+// // the destination to (move's source register + one)
+// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole %{
+// peepmatch (incI_eReg movI);
+// peepconstraint (0.dst == 1.dst);
+// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+// peepmatch (decI_eReg movI);
+// peepconstraint (0.dst == 1.dst);
+// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+// peepmatch (addI_eReg_imm movI);
+// peepconstraint (0.dst == 1.dst);
+// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src));
+// %}
+//
+// peephole %{
+// peepmatch (addP_eReg_imm movP);
+// peepconstraint (0.dst == 1.dst);
+// peepreplace (leaP_eReg_immI(0.dst 1.src 0.src));
+// %}
+
+
+// This peephole rule does not work, probably because ADLC can't handle two effects:
+// Effect 1 is defining 0.op1 and effect 2 is setting CC
+// condense a load from memory and subsequent test for zero
+// into a single, more efficient ICM instruction.
+// peephole %{
+// peepmatch (compI_iReg_imm0 loadI);
+// peepconstraint (1.dst == 0.op1);
+// peepreplace (loadtest15_iReg_mem(0.op1 0.op1 1.mem));
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+// match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+// match(Set dst (LoadI mem));
+// %}
+//
+peephole %{
+ peepmatch (loadI storeI);
+ peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
+ peepreplace (storeI(1.mem 1.mem 1.src));
+%}
+
+peephole %{
+ peepmatch (loadL storeL);
+ peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
+ peepreplace (storeL(1.mem 1.mem 1.src));
+%}
+
+peephole %{
+ peepmatch (loadP storeP);
+ peepconstraint (1.src == 0.dst, 1.dst == 0.mem);
+ peepreplace (storeP(1.dst 1.dst 1.src));
+%}
+
+//----------SUPERWORD RULES---------------------------------------------------
+
+// Expand rules for special cases
+
+instruct expand_storeF(stackSlotF mem, regF src) %{
+ // No match rule, false predicate, for expand only.
+ effect(DEF mem, USE src);
+ predicate(false);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "STE $src,$mem\t # replicate(float2stack)" %}
+ opcode(STE_ZOPC, STE_ZOPC);
+ ins_encode(z_form_rt_mem(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct expand_LoadLogical_I2L(iRegL dst, stackSlotF mem) %{
+ // No match rule, false predicate, for expand only.
+ effect(DEF dst, USE mem);
+ predicate(false);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LLGF $dst,$mem\t # replicate(stack2reg(unsigned))" %}
+ opcode(LLGF_ZOPC, LLGF_ZOPC);
+ ins_encode(z_form_rt_mem(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes)
+instruct expand_Repl2I_reg(iRegL dst, iRegL src) %{
+ // Dummy match rule, false predicate, for expand only.
+ match(Set dst (ConvI2L src));
+ predicate(false);
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "REPLIC2F $dst,$src\t # replicate(pack2F)" %}
+ ins_encode %{
+ if ($dst$$Register == $src$$Register) {
+ __ z_sllg(Z_R0_scratch, $src$$Register, 64-32);
+ __ z_ogr($dst$$Register, Z_R0_scratch);
+ } else {
+ __ z_sllg($dst$$Register, $src$$Register, 64-32);
+ __ z_ogr( $dst$$Register, $src$$Register);
+ }
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replication
+
+// Exploit rotate_then_insert, if available
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+ match(Set dst (ReplicateB src));
+ effect(KILL cr);
+ predicate((n->as_Vector()->length() == 8));
+ format %{ "REPLIC8B $dst,$src\t # pack8B" %}
+ ins_encode %{
+ if ($dst$$Register != $src$$Register) {
+ __ z_lgr($dst$$Register, $src$$Register);
+ }
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 48, 55, 8, false);
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false);
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_imm(iRegL dst, immB_n0m1 src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 8);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC8B $dst,$src\t # pack8B imm" %}
+ ins_encode %{
+ int64_t Isrc8 = $src$$constant & 0x000000ff;
+ int64_t Isrc16 = Isrc8 << 8 | Isrc8;
+ int64_t Isrc32 = Isrc16 << 16 | Isrc16;
+ assert(Isrc8 != 0x000000ff && Isrc8 != 0, "should be handled by other match rules.");
+
+ __ z_llilf($dst$$Register, Isrc32);
+ __ z_iihf($dst$$Register, Isrc32);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_imm0(iRegL dst, immI_0 src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 8);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC8B $dst,$src\t # pack8B imm0" %}
+ ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar byte to packed byte values (8 Bytes).
+instruct Repl8B_immm1(iRegL dst, immB_minus1 src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 8);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC8B $dst,$src\t # pack8B immm1" %}
+ ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Exploit rotate_then_insert, if available
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+ match(Set dst (ReplicateS src));
+ effect(KILL cr);
+ predicate((n->as_Vector()->length() == 4));
+ format %{ "REPLIC4S $dst,$src\t # pack4S" %}
+ ins_encode %{
+ if ($dst$$Register != $src$$Register) {
+ __ z_lgr($dst$$Register, $src$$Register);
+ }
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false);
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_imm(iRegL dst, immS_n0m1 src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC4S $dst,$src\t # pack4S imm" %}
+ ins_encode %{
+ int64_t Isrc16 = $src$$constant & 0x0000ffff;
+ int64_t Isrc32 = Isrc16 << 16 | Isrc16;
+ assert(Isrc16 != 0x0000ffff && Isrc16 != 0, "Repl4S_imm: (src == " INT64_FORMAT
+ ") should be handled by other match rules.", $src$$constant);
+
+ __ z_llilf($dst$$Register, Isrc32);
+ __ z_iihf($dst$$Register, Isrc32);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_imm0(iRegL dst, immI_0 src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC4S $dst,$src\t # pack4S imm0" %}
+ ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar short to packed short values (8 Bytes).
+instruct Repl4S_immm1(iRegL dst, immS_minus1 src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC4S $dst,$src\t # pack4S immm1" %}
+ ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Exploit rotate_then_insert, if available.
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
+ match(Set dst (ReplicateI src));
+ effect(KILL cr);
+ predicate((n->as_Vector()->length() == 2));
+ format %{ "REPLIC2I $dst,$src\t # pack2I" %}
+ ins_encode %{
+ if ($dst$$Register != $src$$Register) {
+ __ z_lgr($dst$$Register, $src$$Register);
+ }
+ __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_imm(iRegL dst, immI_n0m1 src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 2);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC2I $dst,$src\t # pack2I imm" %}
+ ins_encode %{
+ int64_t Isrc32 = $src$$constant;
+ assert(Isrc32 != -1 && Isrc32 != 0, "should be handled by other match rules.");
+
+ __ z_llilf($dst$$Register, Isrc32);
+ __ z_iihf($dst$$Register, Isrc32);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_imm0(iRegL dst, immI_0 src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 2);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC2I $dst,$src\t # pack2I imm0" %}
+ ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar int to packed int values (8 Bytes).
+instruct Repl2I_immm1(iRegL dst, immI_minus1 src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 2);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC2I $dst,$src\t # pack2I immm1" %}
+ ins_encode %{ __ z_lghi($dst$$Register, -1); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//
+
+instruct Repl2F_reg_indirect(iRegL dst, regF src, flagsReg cr) %{
+ match(Set dst (ReplicateF src));
+ effect(KILL cr);
+ predicate(!VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2);
+ format %{ "REPLIC2F $dst,$src\t # pack2F indirect" %}
+ expand %{
+ stackSlotF tmp;
+ iRegL tmp2;
+ expand_storeF(tmp, src);
+ expand_LoadLogical_I2L(tmp2, tmp);
+ expand_Repl2I_reg(dst, tmp2);
+ %}
+%}
+
+// Replicate scalar float to packed float values in GREG (8 Bytes).
+instruct Repl2F_reg_direct(iRegL dst, regF src, flagsReg cr) %{
+ match(Set dst (ReplicateF src));
+ effect(KILL cr);
+ predicate(VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2);
+ format %{ "REPLIC2F $dst,$src\t # pack2F direct" %}
+ ins_encode %{
+ assert(VM_Version::has_FPSupportEnhancements(), "encoder should never be called on old H/W");
+ __ z_lgdr($dst$$Register, $src$$FloatRegister);
+
+ __ z_srlg(Z_R0_scratch, $dst$$Register, 32); // Floats are left-justified in 64bit reg.
+ __ z_iilf($dst$$Register, 0); // Save a "result not ready" stall.
+ __ z_ogr($dst$$Register, Z_R0_scratch);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar float immediate to packed float values in GREG (8 Bytes).
+instruct Repl2F_imm(iRegL dst, immF src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 2);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC2F $dst,$src\t # pack2F imm" %}
+ ins_encode %{
+ union {
+ int Isrc32;
+ float Fsrc32;
+ };
+ Fsrc32 = $src$$constant;
+ __ z_llilf($dst$$Register, Isrc32);
+ __ z_iihf($dst$$Register, Isrc32);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Replicate scalar float immediate zeroes to packed float values in GREG (8 Bytes).
+// Do this only for 'real' zeroes, especially don't loose sign of negative zeroes.
+instruct Repl2F_imm0(iRegL dst, immFp0 src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 2);
+ ins_should_rematerialize(true);
+ format %{ "REPLIC2F $dst,$src\t # pack2F imm0" %}
+ ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store
+
+// Store Aligned Packed Byte register to memory (8 Bytes).
+instruct storeA8B(memory mem, iRegL src) %{
+ match(Set mem (StoreVector mem src));
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "STG $src,$mem\t # ST(packed8B)" %}
+ opcode(STG_ZOPC, STG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(src, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load
+
+instruct loadV8(iRegL dst, memory mem) %{
+ match(Set dst (LoadVector mem));
+ predicate(n->as_LoadVector()->memory_size() == 8);
+ ins_cost(MEMORY_REF_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "LG $dst,$mem\t # L(packed8B)" %}
+ opcode(LG_ZOPC, LG_ZOPC);
+ ins_encode(z_form_rt_mem_opt(dst, mem));
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------POPULATION COUNT RULES--------------------------------------------
+
+// Byte reverse
+
+instruct bytes_reverse_int(iRegI dst, iRegI src) %{
+ match(Set dst (ReverseBytesI src));
+ predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "LRVR $dst,$src\t# byte reverse int" %}
+ opcode(LRVR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct bytes_reverse_long(iRegL dst, iRegL src) %{
+ match(Set dst (ReverseBytesL src));
+ predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "LRVGR $dst,$src\t# byte reverse long" %}
+ opcode(LRVGR_ZOPC);
+ ins_encode(z_rreform(dst, src));
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Leading zeroes
+
+// The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
+// returns the bit position of the leftmost 1 in the 64bit source register.
+// As the bits are numbered from left to right (0..63), the returned
+// position index is equivalent to the number of leading zeroes.
+// If no 1-bit is found (i.e. the regsiter contains zero), the instruction
+// returns position 64. That's exactly what we need.
+
+instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
+ match(Set dst (CountLeadingZerosI src));
+ effect(KILL tmp, KILL cr);
+ predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported
+ ins_cost(3 * DEFAULT_COST);
+ size(14);
+ format %{ "SLLG $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
+ "IILH $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
+ "FLOGR $dst,$dst"
+ %}
+ ins_encode %{
+ // Performance experiments indicate that "FLOGR" is using some kind of
+ // iteration to find the leftmost "1" bit.
+ //
+ // The prior implementation zero-extended the 32-bit argument to 64 bit,
+ // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
+ // We could gain measurable speedup in micro benchmark:
+ //
+ // leading trailing
+ // z10: int 2.04 1.68
+ // long 1.00 1.02
+ // z196: int 0.99 1.23
+ // long 1.00 1.11
+ //
+ // By shifting the argument into the high-word instead of zero-extending it.
+ // The add'l branch on condition (taken for a zero argument, very infrequent,
+ // good prediction) is well compensated for by the savings.
+ //
+ // We leave the previous implementation in for some time in the future when
+ // the "FLOGR" instruction may become less iterative.
+
+ // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
+ __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
+ __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src.
+ __ z_flogr($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
+ match(Set dst (CountLeadingZerosL src));
+ effect(KILL tmp, KILL cr);
+ predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "FLOGR $dst,$src \t# count leading zeros (long)\n\t" %}
+ ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// trailing zeroes
+
+// We transform the trailing zeroes problem to a leading zeroes problem
+// such that can use the FLOGR instruction to our advantage.
+
+// With
+// tmp1 = src - 1
+// we flip all trailing zeroes to ones and the rightmost one to zero.
+// All other bits remain unchanged.
+// With the complement
+// tmp2 = ~src
+// we get all ones in the trailing zeroes positions. Thus,
+// tmp3 = tmp1 & tmp2
+// yields ones in the trailing zeroes positions and zeroes elsewhere.
+// Now we can apply FLOGR and get 64-(trailing zeroes).
+instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
+ match(Set dst (CountTrailingZerosI src));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported
+ ins_cost(8 * DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
+ format %{ "LLGFR $dst,$src \t# clear upper 32 bits (we are dealing with int)\n\t"
+ "LCGFR $tmp,$src \t# load 2's complement (32->64 bit)\n\t"
+ "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
+ "AGHI $tmp,-1 \t# tmp2 = -src-1 = ~src\n\t"
+ "NGR $dst,$tmp \t# tmp3 = tmp1&tmp2\n\t"
+ "FLOGR $dst,$dst \t# count trailing zeros (int)\n\t"
+ "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
+ "LCR $dst,$dst \t# res = -tmp4"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ // Rtmp only needed for for zero-argument shortcut. With kill effect in
+ // match rule Rsrc = roddReg would be possible, saving one register.
+ Register Rtmp = $tmp$$Register;
+
+ assert_different_registers(Rdst, Rsrc, Rtmp);
+
+ // Algorithm:
+ // - Isolate the least significant (rightmost) set bit using (src & (-src)).
+ // All other bits in the result are zero.
+ // - Find the "leftmost one" bit position in the single-bit result from previous step.
+ // - 63-("leftmost one" bit position) gives the # of trailing zeros.
+
+ // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
+ Label done;
+ __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
+ __ z_lcgfr(Rtmp, Rsrc);
+ __ z_bre(done); // Taken very infrequently, good prediction, no BHT entry.
+
+ __ z_nr(Rtmp, Rsrc); // (src) & (-src) leaves nothing but least significant bit.
+ __ z_ahi(Rtmp, -1); // Subtract one to fill all trailing zero positions with ones.
+ // Use 32bit op to prevent borrow propagation (case Rdst = 0x80000000)
+ // into upper half of reg. Not relevant with sllg below.
+ __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register.
+ __ z_bre(done); // Shortcut for argument = 1, result will be 0.
+ // Depends on CC set by ahi above.
+ // Taken very infrequently, good prediction, no BHT entry.
+ // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
+ // after SLLG Rdst == 0(64bit)).
+ __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
+ __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros
+ __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
+ __ bind(done);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
+ match(Set dst (CountTrailingZerosL src));
+ effect(TEMP_DEF dst, KILL tmp, KILL cr);
+ predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported
+ ins_cost(8 * DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
+ format %{ "LCGR $dst,$src \t# preserve src\n\t"
+ "NGR $dst,$src \t#"
+ "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
+ "FLOGR $dst,$dst \t# count trailing zeros (long), kill $tmp\n\t"
+ "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
+ "LCR $dst,$dst \t#"
+ %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
+
+ // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
+ __ z_lcgr(Rdst, Rsrc);
+ __ z_ngr(Rdst, Rsrc);
+ __ add2reg(Rdst, -1);
+ __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
+ __ add2reg(Rdst, -64);
+ __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+
+// bit count
+
+instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+ match(Set dst (PopCountI src));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ predicate(UsePopCountInstruction && VM_Version::has_PopCount());
+ ins_cost(DEFAULT_COST);
+ size(24);
+ format %{ "POPCNT $dst,$src\t# pop count int" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+
+ // Prefer compile-time assertion over run-time SIGILL.
+ assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
+ assert_different_registers(Rdst, Rtmp);
+
+ // Version 2: shows 10%(z196) improvement over original.
+ __ z_popcnt(Rdst, Rsrc);
+ __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7
+ __ z_alr(Rdst, Rtmp); // into byte6 and byte7
+ __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7)
+ __ z_alr(Rdst, Rtmp); // into byte7
+ __ z_llgcr(Rdst, Rdst); // zero-extend sum
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
+ match(Set dst (PopCountL src));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ predicate(UsePopCountInstruction && VM_Version::has_PopCount());
+ ins_cost(DEFAULT_COST);
+ // TODO: s390 port size(FIXED_SIZE);
+ format %{ "POPCNT $dst,$src\t# pop count long" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+
+ // Prefer compile-time assertion over run-time SIGILL.
+ assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
+ assert_different_registers(Rdst, Rtmp);
+
+ // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
+ __ z_popcnt(Rdst, Rsrc);
+ __ z_ahhlr(Rdst, Rdst, Rdst);
+ __ z_sllg(Rtmp, Rdst, 16);
+ __ z_algr(Rdst, Rtmp);
+ __ z_sllg(Rtmp, Rdst, 8);
+ __ z_algr(Rdst, Rtmp);
+ __ z_srlg(Rdst, Rdst, 56);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// ============================================================================
+// TYPE PROFILING RULES
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,3552 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "registerSaver_s390.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/ad.hpp"
+#include "opto/runtime.hpp"
+#endif
+
+#ifdef PRODUCT
+#define __ masm->
+#else
+#define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
+#endif
+
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+#define RegisterSaver_LiveIntReg(regname) \
+ { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
+
+#define RegisterSaver_LiveFloatReg(regname) \
+ { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
+
+// Registers which are not saved/restored, but still they have got a frame slot.
+// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
+#define RegisterSaver_ExcludedIntReg(regname) \
+ { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
+
+// Registers which are not saved/restored, but still they have got a frame slot.
+// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
+#define RegisterSaver_ExcludedFloatReg(regname) \
+ { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
+ // Live registers which get spilled to the stack. Register positions
+ // in this array correspond directly to the stack layout.
+ //
+ // live float registers:
+ //
+ RegisterSaver_LiveFloatReg(Z_F0 ),
+ // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+ RegisterSaver_LiveFloatReg(Z_F2 ),
+ RegisterSaver_LiveFloatReg(Z_F3 ),
+ RegisterSaver_LiveFloatReg(Z_F4 ),
+ RegisterSaver_LiveFloatReg(Z_F5 ),
+ RegisterSaver_LiveFloatReg(Z_F6 ),
+ RegisterSaver_LiveFloatReg(Z_F7 ),
+ RegisterSaver_LiveFloatReg(Z_F8 ),
+ RegisterSaver_LiveFloatReg(Z_F9 ),
+ RegisterSaver_LiveFloatReg(Z_F10),
+ RegisterSaver_LiveFloatReg(Z_F11),
+ RegisterSaver_LiveFloatReg(Z_F12),
+ RegisterSaver_LiveFloatReg(Z_F13),
+ RegisterSaver_LiveFloatReg(Z_F14),
+ RegisterSaver_LiveFloatReg(Z_F15),
+ //
+ // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+ // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+ RegisterSaver_LiveIntReg(Z_R2 ),
+ RegisterSaver_LiveIntReg(Z_R3 ),
+ RegisterSaver_LiveIntReg(Z_R4 ),
+ RegisterSaver_LiveIntReg(Z_R5 ),
+ RegisterSaver_LiveIntReg(Z_R6 ),
+ RegisterSaver_LiveIntReg(Z_R7 ),
+ RegisterSaver_LiveIntReg(Z_R8 ),
+ RegisterSaver_LiveIntReg(Z_R9 ),
+ RegisterSaver_LiveIntReg(Z_R10),
+ RegisterSaver_LiveIntReg(Z_R11),
+ RegisterSaver_LiveIntReg(Z_R12),
+ RegisterSaver_LiveIntReg(Z_R13),
+ // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+ // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
+ // Live registers which get spilled to the stack. Register positions
+ // in this array correspond directly to the stack layout.
+ //
+ // live float registers: All excluded, but still they get a stack slot to get same frame size.
+ //
+ RegisterSaver_ExcludedFloatReg(Z_F0 ),
+ // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+ RegisterSaver_ExcludedFloatReg(Z_F2 ),
+ RegisterSaver_ExcludedFloatReg(Z_F3 ),
+ RegisterSaver_ExcludedFloatReg(Z_F4 ),
+ RegisterSaver_ExcludedFloatReg(Z_F5 ),
+ RegisterSaver_ExcludedFloatReg(Z_F6 ),
+ RegisterSaver_ExcludedFloatReg(Z_F7 ),
+ RegisterSaver_ExcludedFloatReg(Z_F8 ),
+ RegisterSaver_ExcludedFloatReg(Z_F9 ),
+ RegisterSaver_ExcludedFloatReg(Z_F10),
+ RegisterSaver_ExcludedFloatReg(Z_F11),
+ RegisterSaver_ExcludedFloatReg(Z_F12),
+ RegisterSaver_ExcludedFloatReg(Z_F13),
+ RegisterSaver_ExcludedFloatReg(Z_F14),
+ RegisterSaver_ExcludedFloatReg(Z_F15),
+ //
+ // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+ // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+ RegisterSaver_LiveIntReg(Z_R2 ),
+ RegisterSaver_LiveIntReg(Z_R3 ),
+ RegisterSaver_LiveIntReg(Z_R4 ),
+ RegisterSaver_LiveIntReg(Z_R5 ),
+ RegisterSaver_LiveIntReg(Z_R6 ),
+ RegisterSaver_LiveIntReg(Z_R7 ),
+ RegisterSaver_LiveIntReg(Z_R8 ),
+ RegisterSaver_LiveIntReg(Z_R9 ),
+ RegisterSaver_LiveIntReg(Z_R10),
+ RegisterSaver_LiveIntReg(Z_R11),
+ RegisterSaver_LiveIntReg(Z_R12),
+ RegisterSaver_LiveIntReg(Z_R13),
+ // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+ // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
+ // Live registers which get spilled to the stack. Register positions
+ // in this array correspond directly to the stack layout.
+ //
+ // live float registers:
+ //
+ RegisterSaver_LiveFloatReg(Z_F0 ),
+ // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+ RegisterSaver_LiveFloatReg(Z_F2 ),
+ RegisterSaver_LiveFloatReg(Z_F3 ),
+ RegisterSaver_LiveFloatReg(Z_F4 ),
+ RegisterSaver_LiveFloatReg(Z_F5 ),
+ RegisterSaver_LiveFloatReg(Z_F6 ),
+ RegisterSaver_LiveFloatReg(Z_F7 ),
+ RegisterSaver_LiveFloatReg(Z_F8 ),
+ RegisterSaver_LiveFloatReg(Z_F9 ),
+ RegisterSaver_LiveFloatReg(Z_F10),
+ RegisterSaver_LiveFloatReg(Z_F11),
+ RegisterSaver_LiveFloatReg(Z_F12),
+ RegisterSaver_LiveFloatReg(Z_F13),
+ RegisterSaver_LiveFloatReg(Z_F14),
+ RegisterSaver_LiveFloatReg(Z_F15),
+ //
+ // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+ // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+ RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
+ RegisterSaver_LiveIntReg(Z_R3 ),
+ RegisterSaver_LiveIntReg(Z_R4 ),
+ RegisterSaver_LiveIntReg(Z_R5 ),
+ RegisterSaver_LiveIntReg(Z_R6 ),
+ RegisterSaver_LiveIntReg(Z_R7 ),
+ RegisterSaver_LiveIntReg(Z_R8 ),
+ RegisterSaver_LiveIntReg(Z_R9 ),
+ RegisterSaver_LiveIntReg(Z_R10),
+ RegisterSaver_LiveIntReg(Z_R11),
+ RegisterSaver_LiveIntReg(Z_R12),
+ RegisterSaver_LiveIntReg(Z_R13),
+ // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+ // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
+};
+
+// Live argument registers which get spilled to the stack.
+static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
+ RegisterSaver_LiveFloatReg(Z_FARG1),
+ RegisterSaver_LiveFloatReg(Z_FARG2),
+ RegisterSaver_LiveFloatReg(Z_FARG3),
+ RegisterSaver_LiveFloatReg(Z_FARG4),
+ RegisterSaver_LiveIntReg(Z_ARG1),
+ RegisterSaver_LiveIntReg(Z_ARG2),
+ RegisterSaver_LiveIntReg(Z_ARG3),
+ RegisterSaver_LiveIntReg(Z_ARG4),
+ RegisterSaver_LiveIntReg(Z_ARG5)
+};
+
+static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
+ // Live registers which get spilled to the stack. Register positions
+ // in this array correspond directly to the stack layout.
+ //
+ // live float registers:
+ //
+ RegisterSaver_LiveFloatReg(Z_F0 ),
+ // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
+ RegisterSaver_LiveFloatReg(Z_F2 ),
+ RegisterSaver_LiveFloatReg(Z_F3 ),
+ RegisterSaver_LiveFloatReg(Z_F4 ),
+ RegisterSaver_LiveFloatReg(Z_F5 ),
+ RegisterSaver_LiveFloatReg(Z_F6 ),
+ RegisterSaver_LiveFloatReg(Z_F7 ),
+ // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
+ // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
+ //
+ // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
+ // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
+ RegisterSaver_LiveIntReg(Z_R2 ),
+ RegisterSaver_LiveIntReg(Z_R3 ),
+ RegisterSaver_LiveIntReg(Z_R4 ),
+ RegisterSaver_LiveIntReg(Z_R5 ),
+ // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
+ // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
+ // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
+ // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
+};
+
+int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
+ int reg_space = -1;
+ switch (reg_set) {
+ case all_registers: reg_space = sizeof(RegisterSaver_LiveRegs); break;
+ case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
+ case all_integer_registers: reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
+ case all_volatile_registers: reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
+ case arg_registers: reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
+ default: ShouldNotReachHere();
+ }
+ return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
+}
+
+
+int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
+ return live_reg_save_size(reg_set) + frame::z_abi_160_size;
+}
+
+
+// return_pc: Specify the register that should be stored as the return pc in the current frame.
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
+ // Record volatile registers as callee-save values in an OopMap so
+ // their save locations will be propagated to the caller frame's
+ // RegisterMap during StackFrameStream construction (needed for
+ // deoptimization; see compiledVFrame::create_stack_value).
+
+ // Calculate frame size.
+ const int frame_size_in_bytes = live_reg_frame_size(reg_set);
+ const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+ const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
+
+ // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
+ OopMap* map = new OopMap(frame_size_in_slots, 0);
+
+ int regstosave_num = 0;
+ const RegisterSaver::LiveRegType* live_regs = NULL;
+
+ switch (reg_set) {
+ case all_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveRegs;
+ break;
+ case all_registers_except_r2:
+ regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveRegsWithoutR2;
+ break;
+ case all_integer_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveIntRegs;
+ break;
+ case all_volatile_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveVolatileRegs;
+ break;
+ case arg_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveArgRegs;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ // Save return pc in old frame.
+ __ save_return_pc(return_pc);
+
+ // Push a new frame (includes stack linkage).
+ __ push_frame(frame_size_in_bytes);
+
+ // Register save area in new frame starts above z_abi_160 area.
+ int offset = register_save_offset;
+
+ Register first = noreg;
+ Register last = noreg;
+ int first_offset = -1;
+ bool float_spilled = false;
+
+ for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+ int reg_num = live_regs[i].reg_num;
+ int reg_type = live_regs[i].reg_type;
+
+ switch (reg_type) {
+ case RegisterSaver::int_reg: {
+ Register reg = as_Register(reg_num);
+ if (last != reg->predecessor()) {
+ if (first != noreg) {
+ __ z_stmg(first, last, first_offset, Z_SP);
+ }
+ first = reg;
+ first_offset = offset;
+ DEBUG_ONLY(float_spilled = false);
+ }
+ last = reg;
+ assert(last != Z_R0, "r0 would require special treatment");
+ assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
+ break;
+ }
+
+ case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
+ continue; // Continue with next loop iteration.
+
+ case RegisterSaver::float_reg: {
+ FloatRegister freg = as_FloatRegister(reg_num);
+ __ z_std(freg, offset, Z_SP);
+ DEBUG_ONLY(float_spilled = true);
+ break;
+ }
+
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+
+ // Second set_callee_saved is really a waste but we'll keep things as they were for now
+ map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
+ map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
+ }
+ assert(first != noreg, "Should spill at least one int reg.");
+ __ z_stmg(first, last, first_offset, Z_SP);
+
+ // And we're done.
+ return map;
+}
+
+
+// Generate the OopMap (again, regs where saved before).
+OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
+ // Calculate frame size.
+ const int frame_size_in_bytes = live_reg_frame_size(reg_set);
+ const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+ const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
+
+ // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
+ OopMap* map = new OopMap(frame_size_in_slots, 0);
+
+ int regstosave_num = 0;
+ const RegisterSaver::LiveRegType* live_regs = NULL;
+
+ switch (reg_set) {
+ case all_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveRegs;
+ break;
+ case all_registers_except_r2:
+ regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveRegsWithoutR2;
+ break;
+ case all_integer_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveIntRegs;
+ break;
+ case all_volatile_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveVolatileRegs;
+ break;
+ case arg_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveArgRegs;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ // Register save area in new frame starts above z_abi_160 area.
+ int offset = register_save_offset;
+ for (int i = 0; i < regstosave_num; i++) {
+ if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
+ map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
+ map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
+ }
+ offset += reg_size;
+ }
+ return map;
+}
+
+
+// Pop the current frame and restore all the registers that we saved.
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
+ int offset;
+ const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
+
+ Register first = noreg;
+ Register last = noreg;
+ int first_offset = -1;
+ bool float_spilled = false;
+
+ int regstosave_num = 0;
+ const RegisterSaver::LiveRegType* live_regs = NULL;
+
+ switch (reg_set) {
+ case all_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveRegs;
+ break;
+ case all_registers_except_r2:
+ regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveRegsWithoutR2;
+ break;
+ case all_integer_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
+ live_regs = RegisterSaver_LiveIntRegs;
+ break;
+ case all_volatile_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveVolatileRegs;
+ break;
+ case arg_registers:
+ regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
+ live_regs = RegisterSaver_LiveArgRegs;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ // Restore all registers (ints and floats).
+
+ // Register save area in new frame starts above z_abi_160 area.
+ offset = register_save_offset;
+
+ for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+ int reg_num = live_regs[i].reg_num;
+ int reg_type = live_regs[i].reg_type;
+
+ switch (reg_type) {
+ case RegisterSaver::excluded_reg:
+ continue; // Continue with next loop iteration.
+
+ case RegisterSaver::int_reg: {
+ Register reg = as_Register(reg_num);
+ if (last != reg->predecessor()) {
+ if (first != noreg) {
+ __ z_lmg(first, last, first_offset, Z_SP);
+ }
+ first = reg;
+ first_offset = offset;
+ DEBUG_ONLY(float_spilled = false);
+ }
+ last = reg;
+ assert(last != Z_R0, "r0 would require special treatment");
+ assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
+ break;
+ }
+
+ case RegisterSaver::float_reg: {
+ FloatRegister freg = as_FloatRegister(reg_num);
+ __ z_ld(freg, offset, Z_SP);
+ DEBUG_ONLY(float_spilled = true);
+ break;
+ }
+
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ assert(first != noreg, "Should spill at least one int reg.");
+ __ z_lmg(first, last, first_offset, Z_SP);
+
+ // Pop the frame.
+ __ pop_frame();
+
+ // Restore the flags.
+ __ restore_return_pc();
+}
+
+
+// Pop the current frame and restore the registers that might be holding a result.
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+ int i;
+ int offset;
+ const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
+ sizeof(RegisterSaver::LiveRegType);
+ const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
+
+ // Restore all result registers (ints and floats).
+ offset = register_save_offset;
+ for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
+ int reg_num = RegisterSaver_LiveRegs[i].reg_num;
+ int reg_type = RegisterSaver_LiveRegs[i].reg_type;
+ switch (reg_type) {
+ case RegisterSaver::excluded_reg:
+ continue; // Continue with next loop iteration.
+ case RegisterSaver::int_reg: {
+ if (as_Register(reg_num) == Z_RET) { // int result_reg
+ __ z_lg(as_Register(reg_num), offset, Z_SP);
+ }
+ break;
+ }
+ case RegisterSaver::float_reg: {
+ if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
+ __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
+ }
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+ }
+}
+
+#if INCLUDE_CDS
+size_t SharedRuntime::trampoline_size() {
+ return MacroAssembler::load_const_size() + 2;
+}
+
+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
+ // Think about using pc-relative branch.
+ __ load_const(Z_R1_scratch, destination);
+ __ z_br(Z_R1_scratch);
+}
+#endif
+
+// ---------------------------------------------------------------------------
+void SharedRuntime::save_native_result(MacroAssembler * masm,
+ BasicType ret_type,
+ int frame_slots) {
+ Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
+
+ switch (ret_type) {
+ case T_BOOLEAN: // Save shorter types as int. Do we need sign extension at restore??
+ case T_BYTE:
+ case T_CHAR:
+ case T_SHORT:
+ case T_INT:
+ __ reg2mem_opt(Z_RET, memaddr, false);
+ break;
+ case T_OBJECT: // Save pointer types as long.
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_VOID:
+ case T_LONG:
+ __ reg2mem_opt(Z_RET, memaddr);
+ break;
+ case T_FLOAT:
+ __ freg2mem_opt(Z_FRET, memaddr, false);
+ break;
+ case T_DOUBLE:
+ __ freg2mem_opt(Z_FRET, memaddr);
+ break;
+ }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm,
+ BasicType ret_type,
+ int frame_slots) {
+ Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
+
+ switch (ret_type) {
+ case T_BOOLEAN: // Restore shorter types as int. Do we need sign extension at restore??
+ case T_BYTE:
+ case T_CHAR:
+ case T_SHORT:
+ case T_INT:
+ __ mem2reg_opt(Z_RET, memaddr, false);
+ break;
+ case T_OBJECT: // Restore pointer types as long.
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_VOID:
+ case T_LONG:
+ __ mem2reg_opt(Z_RET, memaddr);
+ break;
+ case T_FLOAT:
+ __ mem2freg_opt(Z_FRET, memaddr, false);
+ break;
+ case T_DOUBLE:
+ __ mem2freg_opt(Z_FRET, memaddr);
+ break;
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go. Values in the VMRegPair regs array refer to 4-byte
+// quantities. Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
+// up to RegisterImpl::number_of_registers are the 64-bit integer registers.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words, which are
+// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
+// units regardless of build.
+
+// The Java calling convention is a "shifted" version of the C ABI.
+// By skipping the first C ABI register we can call non-static jni methods
+// with small numbers of arguments without having to shuffle the arguments
+// at all. Since we control the java ABI we ought to at least get some
+// advantage out of it.
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ int total_args_passed,
+ int is_outgoing) {
+ // c2c calling conventions for compiled-compiled calls.
+
+ // An int/float occupies 1 slot here.
+ const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats.
+ const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
+
+ const VMReg z_iarg_reg[5] = {
+ Z_R2->as_VMReg(),
+ Z_R3->as_VMReg(),
+ Z_R4->as_VMReg(),
+ Z_R5->as_VMReg(),
+ Z_R6->as_VMReg()
+ };
+ const VMReg z_farg_reg[4] = {
+ Z_F0->as_VMReg(),
+ Z_F2->as_VMReg(),
+ Z_F4->as_VMReg(),
+ Z_F6->as_VMReg()
+ };
+ const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+ const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+ assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
+ assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
+
+ int i;
+ int stk = 0;
+ int ireg = 0;
+ int freg = 0;
+
+ for (int i = 0; i < total_args_passed; ++i) {
+ switch (sig_bt[i]) {
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ if (ireg < z_num_iarg_registers) {
+ // Put int/ptr in register.
+ regs[i].set1(z_iarg_reg[ireg]);
+ ++ireg;
+ } else {
+ // Put int/ptr on stack.
+ regs[i].set1(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_intfloat;
+ }
+ break;
+ case T_LONG:
+ assert(sig_bt[i+1] == T_VOID, "expecting half");
+ if (ireg < z_num_iarg_registers) {
+ // Put long in register.
+ regs[i].set2(z_iarg_reg[ireg]);
+ ++ireg;
+ } else {
+ // Put long on stack and align to 2 slots.
+ if (stk & 0x1) { ++stk; }
+ regs[i].set2(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_longdouble;
+ }
+ break;
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_ADDRESS:
+ if (ireg < z_num_iarg_registers) {
+ // Put ptr in register.
+ regs[i].set2(z_iarg_reg[ireg]);
+ ++ireg;
+ } else {
+ // Put ptr on stack and align to 2 slots, because
+ // "64-bit pointers record oop-ishness on 2 aligned adjacent
+ // registers." (see OopFlow::build_oop_map).
+ if (stk & 0x1) { ++stk; }
+ regs[i].set2(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_longdouble;
+ }
+ break;
+ case T_FLOAT:
+ if (freg < z_num_farg_registers) {
+ // Put float in register.
+ regs[i].set1(z_farg_reg[freg]);
+ ++freg;
+ } else {
+ // Put float on stack.
+ regs[i].set1(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_intfloat;
+ }
+ break;
+ case T_DOUBLE:
+ assert(sig_bt[i+1] == T_VOID, "expecting half");
+ if (freg < z_num_farg_registers) {
+ // Put double in register.
+ regs[i].set2(z_farg_reg[freg]);
+ ++freg;
+ } else {
+ // Put double on stack and align to 2 slots.
+ if (stk & 0x1) { ++stk; }
+ regs[i].set2(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_longdouble;
+ }
+ break;
+ case T_VOID:
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+ // Do not count halves.
+ regs[i].set_bad();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ return round_to(stk, 2);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ VMRegPair *regs2,
+ int total_args_passed) {
+ assert(regs2 == NULL, "second VMRegPair array not used on this platform");
+
+ // Calling conventions for C runtime calls and calls to JNI native methods.
+ const VMReg z_iarg_reg[5] = {
+ Z_R2->as_VMReg(),
+ Z_R3->as_VMReg(),
+ Z_R4->as_VMReg(),
+ Z_R5->as_VMReg(),
+ Z_R6->as_VMReg()
+ };
+ const VMReg z_farg_reg[4] = {
+ Z_F0->as_VMReg(),
+ Z_F2->as_VMReg(),
+ Z_F4->as_VMReg(),
+ Z_F6->as_VMReg()
+ };
+ const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
+ const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
+
+ // Check calling conventions consistency.
+ assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
+ assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
+
+ // Avoid passing C arguments in the wrong stack slots.
+
+ // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
+ // 2 such slots, like 64 bit values do.
+ const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats.
+ const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
+
+ int i;
+ // Leave room for C-compatible ABI
+ int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
+ int freg = 0;
+ int ireg = 0;
+
+ // We put the first 5 arguments into registers and the rest on the
+ // stack. Float arguments are already in their argument registers
+ // due to c2c calling conventions (see calling_convention).
+ for (int i = 0; i < total_args_passed; ++i) {
+ switch (sig_bt[i]) {
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ // Fall through, handle as long.
+ case T_LONG:
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_METADATA:
+ // Oops are already boxed if required (JNI).
+ if (ireg < z_num_iarg_registers) {
+ regs[i].set2(z_iarg_reg[ireg]);
+ ++ireg;
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_longdouble;
+ }
+ break;
+ case T_FLOAT:
+ if (freg < z_num_farg_registers) {
+ regs[i].set1(z_farg_reg[freg]);
+ ++freg;
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(stk+1));
+ stk += inc_stk_for_intfloat;
+ }
+ break;
+ case T_DOUBLE:
+ assert(sig_bt[i+1] == T_VOID, "expecting half");
+ if (freg < z_num_farg_registers) {
+ regs[i].set2(z_farg_reg[freg]);
+ ++freg;
+ } else {
+ // Put double on stack.
+ regs[i].set2(VMRegImpl::stack2reg(stk));
+ stk += inc_stk_for_longdouble;
+ }
+ break;
+ case T_VOID:
+ // Do not count halves.
+ regs[i].set_bad();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ return round_to(stk, 2);
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+// Argument shufflers
+//
+////////////////////////////////////////////////////////////////////////
+
+//----------------------------------------------------------------------
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+//----------------------------------------------------------------------
+static int reg2slot(VMReg r) {
+ return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+}
+
+static int reg2offset(VMReg r) {
+ return reg2slot(r) * VMRegImpl::stack_slot_size;
+}
+
+static void verify_oop_args(MacroAssembler *masm,
+ int total_args_passed,
+ const BasicType *sig_bt,
+ const VMRegPair *regs) {
+ if (!VerifyOops) { return; }
+
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
+ VMReg r = regs[i].first();
+ assert(r->is_valid(), "bad oop arg");
+
+ if (r->is_stack()) {
+ __ z_lg(Z_R0_scratch,
+ Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+ __ verify_oop(Z_R0_scratch);
+ } else {
+ __ verify_oop(r->as_Register());
+ }
+ }
+ }
+}
+
+static void gen_special_dispatch(MacroAssembler *masm,
+ int total_args_passed,
+ vmIntrinsics::ID special_dispatch,
+ const BasicType *sig_bt,
+ const VMRegPair *regs) {
+ verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+ // Now write the args into the outgoing interpreter space.
+ bool has_receiver = false;
+ Register receiver_reg = noreg;
+ int member_arg_pos = -1;
+ Register member_reg = noreg;
+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+
+ if (ref_kind != 0) {
+ member_arg_pos = total_args_passed - 1; // trailing MemberName argument
+ member_reg = Z_R9; // Known to be free at this point.
+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+ } else {
+ guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
+ has_receiver = true;
+ }
+
+ if (member_reg != noreg) {
+ // Load the member_arg into register, if necessary.
+ assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+ assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+
+ VMReg r = regs[member_arg_pos].first();
+ assert(r->is_valid(), "bad member arg");
+
+ if (r->is_stack()) {
+ __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
+ } else {
+ // No data motion is needed.
+ member_reg = r->as_Register();
+ }
+ }
+
+ if (has_receiver) {
+ // Make sure the receiver is loaded into a register.
+ assert(total_args_passed > 0, "oob");
+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+
+ VMReg r = regs[0].first();
+ assert(r->is_valid(), "bad receiver arg");
+
+ if (r->is_stack()) {
+ // Porting note: This assumes that compiled calling conventions always
+ // pass the receiver oop in a register. If this is not true on some
+ // platform, pick a temp and load the receiver from stack.
+ assert(false, "receiver always in a register");
+ receiver_reg = Z_R13; // Known to be free at this point.
+ __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
+ } else {
+ // No data motion is needed.
+ receiver_reg = r->as_Register();
+ }
+ }
+
+ // Figure out which address we are really jumping to:
+ MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+ receiver_reg, member_reg,
+ /*for_compiler_entry:*/ true);
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+// Argument shufflers
+//
+////////////////////////////////////////////////////////////////////////
+
+// Is the size of a vector size (in bytes) bigger than a size saved by default?
+// 8 bytes registers are saved by default on z/Architecture.
+bool SharedRuntime::is_wide_vector(int size) {
+ // Note, MaxVectorSize == 8 on this platform.
+ assert(size <= 8, "%d bytes vectors are not supported", size);
+ return size > 8;
+}
+
+//----------------------------------------------------------------------
+// An oop arg. Must pass a handle not the oop itself
+//----------------------------------------------------------------------
+static void object_move(MacroAssembler *masm,
+ OopMap *map,
+ int oop_handle_offset,
+ int framesize_in_slots,
+ VMRegPair src,
+ VMRegPair dst,
+ bool is_receiver,
+ int *receiver_offset) {
+ int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+
+ assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
+
+ // Must pass a handle. First figure out the location we use as a handle.
+
+ if (src.first()->is_stack()) {
+ // Oop is already on the stack, put handle on stack or in register
+ // If handle will be on the stack, use temp reg to calculate it.
+ Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
+ Label skip;
+ int slot_in_older_frame = reg2slot(src.first());
+
+ guarantee(!is_receiver, "expecting receiver in register");
+ map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
+
+ __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
+ __ load_and_test_long(Z_R0, Address(rHandle));
+ __ z_brne(skip);
+ // Use a NULL handle if oop is NULL.
+ __ clear_reg(rHandle, true, false);
+ __ bind(skip);
+
+ // Copy handle to the right place (register or stack).
+ if (dst.first()->is_stack()) {
+ __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
+ } // else
+ // nothing to do. rHandle uses the correct register
+ } else {
+ // Oop is passed in an input register. We must flush it to the stack.
+ const Register rOop = src.first()->as_Register();
+ const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
+ int oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
+ int oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
+ NearLabel skip;
+
+ if (is_receiver) {
+ *receiver_offset = oop_slot_offset;
+ }
+ map->set_oop(VMRegImpl::stack2reg(oop_slot));
+
+ // Flush Oop to stack, calculate handle.
+ __ z_stg(rOop, oop_slot_offset, Z_SP);
+ __ add2reg(rHandle, oop_slot_offset, Z_SP);
+
+ // If Oop == NULL, use a NULL handle.
+ __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
+ __ clear_reg(rHandle, true, false);
+ __ bind(skip);
+
+ // Copy handle to the right place (register or stack).
+ if (dst.first()->is_stack()) {
+ __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
+ } // else
+ // nothing to do here, since rHandle = dst.first()->as_Register in this case.
+ }
+}
+
+//----------------------------------------------------------------------
+// A float arg. May have to do float reg to int reg conversion
+//----------------------------------------------------------------------
+static void float_move(MacroAssembler *masm,
+ VMRegPair src,
+ VMRegPair dst,
+ int framesize_in_slots,
+ int workspace_slot_offset) {
+ int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+ int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
+
+ // We do not accept an argument in a VMRegPair to be spread over two slots,
+ // no matter what physical location (reg or stack) the slots may have.
+ // We just check for the unaccepted slot to be invalid.
+ assert(!src.second()->is_valid(), "float in arg spread over two slots");
+ assert(!dst.second()->is_valid(), "float out arg spread over two slots");
+
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack -> stack. The easiest of the bunch.
+ __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+ Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
+ } else {
+ // stack to reg
+ Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+ if (dst.first()->is_Register()) {
+ __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
+ } else {
+ __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
+ }
+ }
+ } else if (src.first()->is_Register()) {
+ if (dst.first()->is_stack()) {
+ // gpr -> stack
+ __ reg2mem_opt(src.first()->as_Register(),
+ Address(Z_SP, reg2offset(dst.first()), false ));
+ } else {
+ if (dst.first()->is_Register()) {
+ // gpr -> gpr
+ __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
+ src.first()->as_Register(), T_INT);
+ } else {
+ if (VM_Version::has_FPSupportEnhancements()) {
+ // gpr -> fpr. Exploit z10 capability of direct transfer.
+ __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
+ } else {
+ // gpr -> fpr. Use work space on stack to transfer data.
+ Address stackaddr(Z_SP, workspace_offset);
+
+ __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
+ __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
+ }
+ }
+ }
+ } else {
+ if (dst.first()->is_stack()) {
+ // fpr -> stack
+ __ freg2mem_opt(src.first()->as_FloatRegister(),
+ Address(Z_SP, reg2offset(dst.first())), false);
+ } else {
+ if (dst.first()->is_Register()) {
+ if (VM_Version::has_FPSupportEnhancements()) {
+ // fpr -> gpr.
+ __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
+ } else {
+ // fpr -> gpr. Use work space on stack to transfer data.
+ Address stackaddr(Z_SP, workspace_offset);
+
+ __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
+ __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
+ }
+ } else {
+ // fpr -> fpr
+ __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
+ src.first()->as_FloatRegister(), T_FLOAT);
+ }
+ }
+ }
+}
+
+//----------------------------------------------------------------------
+// A double arg. May have to do double reg to long reg conversion
+//----------------------------------------------------------------------
+static void double_move(MacroAssembler *masm,
+ VMRegPair src,
+ VMRegPair dst,
+ int framesize_in_slots,
+ int workspace_slot_offset) {
+ int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+ int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
+
+ // Since src is always a java calling convention we know that the
+ // src pair is always either all registers or all stack (and aligned?)
+
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack -> stack. The easiest of the bunch.
+ __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+ Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
+ } else {
+ // stack to reg
+ Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+
+ if (dst.first()->is_Register()) {
+ __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
+ } else {
+ __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
+ }
+ }
+ } else if (src.first()->is_Register()) {
+ if (dst.first()->is_stack()) {
+ // gpr -> stack
+ __ reg2mem_opt(src.first()->as_Register(),
+ Address(Z_SP, reg2offset(dst.first())));
+ } else {
+ if (dst.first()->is_Register()) {
+ // gpr -> gpr
+ __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
+ src.first()->as_Register(), T_LONG);
+ } else {
+ if (VM_Version::has_FPSupportEnhancements()) {
+ // gpr -> fpr. Exploit z10 capability of direct transfer.
+ __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
+ } else {
+ // gpr -> fpr. Use work space on stack to transfer data.
+ Address stackaddr(Z_SP, workspace_offset);
+ __ reg2mem_opt(src.first()->as_Register(), stackaddr);
+ __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
+ }
+ }
+ }
+ } else {
+ if (dst.first()->is_stack()) {
+ // fpr -> stack
+ __ freg2mem_opt(src.first()->as_FloatRegister(),
+ Address(Z_SP, reg2offset(dst.first())));
+ } else {
+ if (dst.first()->is_Register()) {
+ if (VM_Version::has_FPSupportEnhancements()) {
+ // fpr -> gpr. Exploit z10 capability of direct transfer.
+ __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
+ } else {
+ // fpr -> gpr. Use work space on stack to transfer data.
+ Address stackaddr(Z_SP, workspace_offset);
+
+ __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
+ __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
+ }
+ } else {
+ // fpr -> fpr
+ // In theory these overlap but the ordering is such that this is likely a nop.
+ __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
+ src.first()->as_FloatRegister(), T_DOUBLE);
+ }
+ }
+ }
+}
+
+//----------------------------------------------------------------------
+// A long arg.
+//----------------------------------------------------------------------
+static void long_move(MacroAssembler *masm,
+ VMRegPair src,
+ VMRegPair dst,
+ int framesize_in_slots) {
+ int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
+
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack -> stack. The easiest of the bunch.
+ __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
+ Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
+ } else {
+ // stack to reg
+ assert(dst.first()->is_Register(), "long dst value must be in GPR");
+ __ mem2reg_opt(dst.first()->as_Register(),
+ Address(Z_SP, reg2offset(src.first()) + frame_offset));
+ }
+ } else {
+ // reg to reg
+ assert(src.first()->is_Register(), "long src value must be in GPR");
+ if (dst.first()->is_stack()) {
+ // reg -> stack
+ __ reg2mem_opt(src.first()->as_Register(),
+ Address(Z_SP, reg2offset(dst.first())));
+ } else {
+ // reg -> reg
+ assert(dst.first()->is_Register(), "long dst value must be in GPR");
+ __ move_reg_if_needed(dst.first()->as_Register(),
+ T_LONG, src.first()->as_Register(), T_LONG);
+ }
+ }
+}
+
+
+//----------------------------------------------------------------------
+// A int-like arg.
+//----------------------------------------------------------------------
+// On z/Architecture we will store integer like items to the stack as 64 bit
+// items, according to the z/Architecture ABI, even though Java would only store
+// 32 bits for a parameter.
+// We do sign extension for all base types. That is ok since the only
+// unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
+// Sign extension 32->64 bit will thus not affect the value.
+//----------------------------------------------------------------------
+static void move32_64(MacroAssembler *masm,
+ VMRegPair src,
+ VMRegPair dst,
+ int framesize_in_slots) {
+ int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+
+ if (src.first()->is_stack()) {
+ Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
+ if (dst.first()->is_stack()) {
+ // stack -> stack. MVC not posible due to sign extension.
+ Address firstaddr(Z_SP, reg2offset(dst.first()));
+ __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
+ __ reg2mem_opt(Z_R0_scratch, firstaddr);
+ } else {
+ // stack -> reg, sign extended
+ __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
+ }
+ } else {
+ if (dst.first()->is_stack()) {
+ // reg -> stack, sign extended
+ Address firstaddr(Z_SP, reg2offset(dst.first()));
+ __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
+ __ reg2mem_opt(src.first()->as_Register(), firstaddr);
+ } else {
+ // reg -> reg, sign extended
+ __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
+ }
+ }
+}
+
+static void save_or_restore_arguments(MacroAssembler *masm,
+ const int stack_slots,
+ const int total_in_args,
+ const int arg_save_area,
+ OopMap *map,
+ VMRegPair *in_regs,
+ BasicType *in_sig_bt) {
+
+ // If map is non-NULL then the code should store the values,
+ // otherwise it should load them.
+ int slot = arg_save_area;
+ // Handle double words first.
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
+ int offset = slot * VMRegImpl::stack_slot_size;
+ slot += VMRegImpl::slots_per_word;
+ assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
+ const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
+ Address stackaddr(Z_SP, offset);
+ if (map != NULL) {
+ __ freg2mem_opt(freg, stackaddr);
+ } else {
+ __ mem2freg_opt(freg, stackaddr);
+ }
+ } else if (in_regs[i].first()->is_Register() &&
+ (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
+ int offset = slot * VMRegImpl::stack_slot_size;
+ const Register reg = in_regs[i].first()->as_Register();
+ if (map != NULL) {
+ __ z_stg(reg, offset, Z_SP);
+ if (in_sig_bt[i] == T_ARRAY) {
+ map->set_oop(VMRegImpl::stack2reg(slot));
+ }
+ } else {
+ __ z_lg(reg, offset, Z_SP);
+ slot += VMRegImpl::slots_per_word;
+ assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
+ }
+ }
+ }
+
+ // Save or restore single word registers.
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_regs[i].first()->is_FloatRegister()) {
+ if (in_sig_bt[i] == T_FLOAT) {
+ int offset = slot * VMRegImpl::stack_slot_size;
+ slot++;
+ assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
+ const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
+ Address stackaddr(Z_SP, offset);
+ if (map != NULL) {
+ __ freg2mem_opt(freg, stackaddr, false);
+ } else {
+ __ mem2freg_opt(freg, stackaddr, false);
+ }
+ }
+ } else if (in_regs[i].first()->is_stack() &&
+ in_sig_bt[i] == T_ARRAY && map != NULL) {
+ int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+ }
+ }
+}
+
+// Check GCLocker::needs_gc and enter the runtime if it's true. This
+// keeps a new JNI critical region from starting until a GC has been
+// forced. Save down any oops in registers and describe them in an OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler *masm,
+ const int stack_slots,
+ const int total_in_args,
+ const int arg_save_area,
+ OopMapSet *oop_maps,
+ VMRegPair *in_regs,
+ BasicType *in_sig_bt) {
+ __ block_comment("check GCLocker::needs_gc");
+ Label cont;
+
+ // Check GCLocker::_needs_gc flag.
+ __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
+ __ z_cli(0, Z_R1_scratch, 0);
+ __ z_bre(cont);
+
+ // Save down any values that are live in registers and call into the
+ // runtime to halt for a GC.
+ OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+ save_or_restore_arguments(masm, stack_slots, total_in_args,
+ arg_save_area, map, in_regs, in_sig_bt);
+ address the_pc = __ pc();
+ __ set_last_Java_frame(Z_SP, noreg);
+
+ __ block_comment("block_for_jni_critical");
+ __ z_lgr(Z_ARG1, Z_thread);
+
+ address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
+ __ call_c(entry_point);
+ oop_maps->add_gc_map(__ offset(), map);
+
+ __ reset_last_Java_frame();
+
+ // Reload all the register arguments.
+ save_or_restore_arguments(masm, stack_slots, total_in_args,
+ arg_save_area, NULL, in_regs, in_sig_bt);
+
+ __ bind(cont);
+
+ if (StressCriticalJNINatives) {
+ // Stress register saving
+ OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+ save_or_restore_arguments(masm, stack_slots, total_in_args,
+ arg_save_area, map, in_regs, in_sig_bt);
+
+ // Destroy argument registers.
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_regs[i].first()->is_Register()) {
+ // Don't set CC.
+ __ clear_reg(in_regs[i].first()->as_Register(), true, false);
+ } else {
+ if (in_regs[i].first()->is_FloatRegister()) {
+ FloatRegister fr = in_regs[i].first()->as_FloatRegister();
+ __ z_lcdbr(fr, fr);
+ }
+ }
+ }
+
+ save_or_restore_arguments(masm, stack_slots, total_in_args,
+ arg_save_area, NULL, in_regs, in_sig_bt);
+ }
+}
+
+static void move_ptr(MacroAssembler *masm,
+ VMRegPair src,
+ VMRegPair dst,
+ int framesize_in_slots) {
+ int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
+
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack to stack
+ __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
+ __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
+ } else {
+ // stack to reg
+ __ mem2reg_opt(dst.first()->as_Register(),
+ Address(Z_SP, reg2offset(src.first()) + frame_offset));
+ }
+ } else {
+ if (dst.first()->is_stack()) {
+ // reg to stack
+ __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
+ } else {
+ __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
+ }
+ }
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler *masm,
+ VMRegPair reg,
+ BasicType in_elem_type,
+ VMRegPair body_arg,
+ VMRegPair length_arg,
+ int framesize_in_slots) {
+ Register tmp_reg = Z_tmp_2;
+ Register tmp2_reg = Z_tmp_1;
+
+ assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+ "possible collision");
+ assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+ "possible collision");
+
+ // Pass the length, ptr pair.
+ NearLabel set_out_args;
+ VMRegPair tmp, tmp2;
+
+ tmp.set_ptr(tmp_reg->as_VMReg());
+ tmp2.set_ptr(tmp2_reg->as_VMReg());
+ if (reg.first()->is_stack()) {
+ // Load the arg up from the stack.
+ move_ptr(masm, reg, tmp, framesize_in_slots);
+ reg = tmp;
+ }
+
+ const Register first = reg.first()->as_Register();
+
+ // Don't set CC, indicate unused result.
+ (void) __ clear_reg(tmp2_reg, true, false);
+ if (tmp_reg != first) {
+ __ clear_reg(tmp_reg, true, false); // Don't set CC.
+ }
+ __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
+ __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
+ __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
+
+ __ bind(set_out_args);
+ move_ptr(masm, tmp, body_arg, framesize_in_slots);
+ move32_64(masm, tmp2, length_arg, framesize_in_slots);
+}
+
+//----------------------------------------------------------------------
+// Wrap a JNI call.
+//----------------------------------------------------------------------
+#undef USE_RESIZE_FRAME
+nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
+ const methodHandle& method,
+ int compile_id,
+ BasicType *in_sig_bt,
+ VMRegPair *in_regs,
+ BasicType ret_type) {
+#ifdef COMPILER2
+ int total_in_args = method->size_of_parameters();
+ if (method->is_method_handle_intrinsic()) {
+ vmIntrinsics::ID iid = method->intrinsic_id();
+ intptr_t start = (intptr_t) __ pc();
+ int vep_offset = ((intptr_t) __ pc()) - start;
+
+ gen_special_dispatch(masm, total_in_args,
+ method->intrinsic_id(), in_sig_bt, in_regs);
+
+ int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
+
+ __ flush();
+
+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // No out slots at all, actually.
+
+ return nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ vep_offset,
+ frame_complete,
+ stack_slots / VMRegImpl::slots_per_word,
+ in_ByteSize(-1),
+ in_ByteSize(-1),
+ (OopMapSet *) NULL);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////
+ //
+ // Precalculations before generating any code
+ //
+ ///////////////////////////////////////////////////////////////////////
+
+ bool is_critical_native = true;
+ address native_func = method->critical_native_function();
+ if (native_func == NULL) {
+ native_func = method->native_function();
+ is_critical_native = false;
+ }
+ assert(native_func != NULL, "must have function");
+
+ //---------------------------------------------------------------------
+ // We have received a description of where all the java args are located
+ // on entry to the wrapper. We need to convert these args to where
+ // the jni function will expect them. To figure out where they go
+ // we convert the java signature to a C signature by inserting
+ // the hidden arguments as arg[0] and possibly arg[1] (static method).
+ //
+ // The first hidden argument arg[0] is a pointer to the JNI environment.
+ // It is generated for every call.
+ // The second argument arg[1] to the JNI call, which is hidden for static
+ // methods, is the boxed lock object. For static calls, the lock object
+ // is the static method itself. The oop is constructed here. for instance
+ // calls, the lock is performed on the object itself, the pointer of
+ // which is passed as the first visible argument.
+ //---------------------------------------------------------------------
+
+ // Additionally, on z/Architecture we must convert integers
+ // to longs in the C signature. We do this in advance in order to have
+ // no trouble with indexes into the bt-arrays.
+ // So convert the signature and registers now, and adjust the total number
+ // of in-arguments accordingly.
+ bool method_is_static = method->is_static();
+ int total_c_args = total_in_args;
+
+ if (!is_critical_native) {
+ int n_hidden_args = method_is_static ? 2 : 1;
+ total_c_args += n_hidden_args;
+ } else {
+ // No JNIEnv*, no this*, but unpacked arrays (base+length).
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_sig_bt[i] == T_ARRAY) {
+ total_c_args ++;
+ }
+ }
+ }
+
+ BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+ VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+ BasicType* in_elem_bt = NULL;
+
+ // Create the signature for the C call:
+ // 1) add the JNIEnv*
+ // 2) add the class if the method is static
+ // 3) copy the rest of the incoming signature (shifted by the number of
+ // hidden arguments)
+
+ int argc = 0;
+ if (!is_critical_native) {
+ out_sig_bt[argc++] = T_ADDRESS;
+ if (method->is_static()) {
+ out_sig_bt[argc++] = T_OBJECT;
+ }
+
+ for (int i = 0; i < total_in_args; i++) {
+ out_sig_bt[argc++] = in_sig_bt[i];
+ }
+ } else {
+ Thread* THREAD = Thread::current();
+ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+ SignatureStream ss(method->signature());
+ int o = 0;
+ for (int i = 0; i < total_in_args; i++, o++) {
+ if (in_sig_bt[i] == T_ARRAY) {
+ // Arrays are passed as tuples (int, elem*).
+ Symbol* atype = ss.as_symbol(CHECK_NULL);
+ const char* at = atype->as_C_string();
+ if (strlen(at) == 2) {
+ assert(at[0] == '[', "must be");
+ switch (at[1]) {
+ case 'B': in_elem_bt[o] = T_BYTE; break;
+ case 'C': in_elem_bt[o] = T_CHAR; break;
+ case 'D': in_elem_bt[o] = T_DOUBLE; break;
+ case 'F': in_elem_bt[o] = T_FLOAT; break;
+ case 'I': in_elem_bt[o] = T_INT; break;
+ case 'J': in_elem_bt[o] = T_LONG; break;
+ case 'S': in_elem_bt[o] = T_SHORT; break;
+ case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
+ default: ShouldNotReachHere();
+ }
+ }
+ } else {
+ in_elem_bt[o] = T_VOID;
+ }
+ if (in_sig_bt[i] != T_VOID) {
+ assert(in_sig_bt[i] == ss.type(), "must match");
+ ss.next();
+ }
+ }
+ assert(total_in_args == o, "must match");
+
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_sig_bt[i] == T_ARRAY) {
+ // Arrays are passed as tuples (int, elem*).
+ out_sig_bt[argc++] = T_INT;
+ out_sig_bt[argc++] = T_ADDRESS;
+ } else {
+ out_sig_bt[argc++] = in_sig_bt[i];
+ }
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////
+ // Now figure out where the args must be stored and how much stack space
+ // they require (neglecting out_preserve_stack_slots but providing space
+ // for storing the first five register arguments).
+ // It's weird, see int_stk_helper.
+ ///////////////////////////////////////////////////////////////////////
+
+ //---------------------------------------------------------------------
+ // Compute framesize for the wrapper.
+ //
+ // - We need to handlize all oops passed in registers.
+ // - We must create space for them here that is disjoint from the save area.
+ // - We always just allocate 5 words for storing down these object.
+ // This allows us to simply record the base and use the Ireg number to
+ // decide which slot to use.
+ // - Note that the reg number used to index the stack slot is the inbound
+ // number, not the outbound number.
+ // - We must shuffle args to match the native convention,
+ // and to include var-args space.
+ //---------------------------------------------------------------------
+
+ //---------------------------------------------------------------------
+ // Calculate the total number of stack slots we will need:
+ // - 1) abi requirements
+ // - 2) outgoing args
+ // - 3) space for inbound oop handle area
+ // - 4) space for handlizing a klass if static method
+ // - 5) space for a lock if synchronized method
+ // - 6) workspace (save rtn value, int<->float reg moves, ...)
+ // - 7) filler slots for alignment
+ //---------------------------------------------------------------------
+ // Here is how the space we have allocated will look like.
+ // Since we use resize_frame, we do not create a new stack frame,
+ // but just extend the one we got with our own data area.
+ //
+ // If an offset or pointer name points to a separator line, it is
+ // assumed that addressing with offset 0 selects storage starting
+ // at the first byte above the separator line.
+ //
+ //
+ // ... ...
+ // | caller's frame |
+ // FP-> |---------------------|
+ // | filler slots, if any|
+ // 7| #slots == mult of 2 |
+ // |---------------------|
+ // | work space |
+ // 6| 2 slots = 8 bytes |
+ // |---------------------|
+ // 5| lock box (if sync) |
+ // |---------------------| <- lock_slot_offset
+ // 4| klass (if static) |
+ // |---------------------| <- klass_slot_offset
+ // 3| oopHandle area |
+ // | (save area for |
+ // | critical natives) |
+ // | |
+ // | |
+ // |---------------------| <- oop_handle_offset
+ // 2| outbound memory |
+ // ... ...
+ // | based arguments |
+ // |---------------------|
+ // | vararg |
+ // ... ...
+ // | area |
+ // |---------------------| <- out_arg_slot_offset
+ // 1| out_preserved_slots |
+ // ... ...
+ // | (z_abi spec) |
+ // SP-> |---------------------| <- FP_slot_offset (back chain)
+ // ... ...
+ //
+ //---------------------------------------------------------------------
+
+ // *_slot_offset indicates offset from SP in #stack slots
+ // *_offset indicates offset from SP in #bytes
+
+ int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
+ SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
+
+ // Now the space for the inbound oop handle area.
+ int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
+ if (is_critical_native) {
+ // Critical natives may have to call out so they need a save area
+ // for register arguments.
+ int double_slots = 0;
+ int single_slots = 0;
+ for (int i = 0; i < total_in_args; i++) {
+ if (in_regs[i].first()->is_Register()) {
+ const Register reg = in_regs[i].first()->as_Register();
+ switch (in_sig_bt[i]) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ case T_SHORT:
+ case T_CHAR:
+ case T_INT:
+ // Fall through.
+ case T_ARRAY:
+ case T_LONG: double_slots++; break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ if (in_regs[i].first()->is_FloatRegister()) {
+ switch (in_sig_bt[i]) {
+ case T_FLOAT: single_slots++; break;
+ case T_DOUBLE: double_slots++; break;
+ default: ShouldNotReachHere();
+ }
+ }
+ }
+ } // for
+ total_save_slots = double_slots * 2 + round_to(single_slots, 2); // Round to even.
+ }
+
+ int oop_handle_slot_offset = stack_slots;
+ stack_slots += total_save_slots; // 3)
+
+ int klass_slot_offset = 0;
+ int klass_offset = -1;
+ if (method_is_static && !is_critical_native) { // 4)
+ klass_slot_offset = stack_slots;
+ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+ stack_slots += VMRegImpl::slots_per_word;
+ }
+
+ int lock_slot_offset = 0;
+ int lock_offset = -1;
+ if (method->is_synchronized()) { // 5)
+ lock_slot_offset = stack_slots;
+ lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
+ stack_slots += VMRegImpl::slots_per_word;
+ }
+
+ int workspace_slot_offset= stack_slots; // 6)
+ stack_slots += 2;
+
+ // Now compute actual number of stack words we need.
+ // Round to align stack properly.
+ stack_slots = round_to(stack_slots, // 7)
+ frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
+ int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
+
+
+ ///////////////////////////////////////////////////////////////////////
+ // Now we can start generating code
+ ///////////////////////////////////////////////////////////////////////
+
+ unsigned int wrapper_CodeStart = __ offset();
+ unsigned int wrapper_UEPStart;
+ unsigned int wrapper_VEPStart;
+ unsigned int wrapper_FrameDone;
+ unsigned int wrapper_CRegsSet;
+ Label handle_pending_exception;
+ Label ic_miss;
+
+ //---------------------------------------------------------------------
+ // Unverified entry point (UEP)
+ //---------------------------------------------------------------------
+ wrapper_UEPStart = __ offset();
+
+ // check ic: object class <-> cached class
+ if (!method_is_static) __ nmethod_UEP(ic_miss);
+ // Fill with nops (alignment of verified entry point).
+ __ align(CodeEntryAlignment);
+
+ //---------------------------------------------------------------------
+ // Verified entry point (VEP)
+ //---------------------------------------------------------------------
+ wrapper_VEPStart = __ offset();
+
+ __ save_return_pc();
+ __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
+#ifndef USE_RESIZE_FRAME
+ __ push_frame(frame_size_in_bytes); // Create a new frame for the wrapper.
+#else
+ __ resize_frame(-frame_size_in_bytes, Z_R0_scratch); // No new frame for the wrapper.
+ // Just resize the existing one.
+#endif
+
+ wrapper_FrameDone = __ offset();
+
+ __ verify_thread();
+
+ // Native nmethod wrappers never take possession of the oop arguments.
+ // So the caller will gc the arguments.
+ // The only thing we need an oopMap for is if the call is static.
+ //
+ // An OopMap for lock (and class if static), and one for the VM call itself
+ OopMapSet *oop_maps = new OopMapSet();
+ OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+ if (is_critical_native) {
+ check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
+ oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // The Grand Shuffle
+ //
+ //////////////////////////////////////////////////////////////////////
+ //
+ // We immediately shuffle the arguments so that for any vm call we have
+ // to make from here on out (sync slow path, jvmti, etc.) we will have
+ // captured the oops from our caller and have a valid oopMap for them.
+ //
+ //--------------------------------------------------------------------
+ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
+ // (derived from JavaThread* which is in Z_thread) and, if static,
+ // the class mirror instead of a receiver. This pretty much guarantees that
+ // register layout will not match. We ignore these extra arguments during
+ // the shuffle. The shuffle is described by the two calling convention
+ // vectors we have in our possession. We simply walk the java vector to
+ // get the source locations and the c vector to get the destinations.
+ //
+ // This is a trick. We double the stack slots so we can claim
+ // the oops in the caller's frame. Since we are sure to have
+ // more args than the caller doubling is enough to make
+ // sure we can capture all the incoming oop args from the caller.
+ //--------------------------------------------------------------------
+
+ // Record sp-based slot for receiver on stack for non-static methods.
+ int receiver_offset = -1;
+
+ //--------------------------------------------------------------------
+ // We move the arguments backwards because the floating point registers
+ // destination will always be to a register with a greater or equal
+ // register number or the stack.
+ // jix is the index of the incoming Java arguments.
+ // cix is the index of the outgoing C arguments.
+ //--------------------------------------------------------------------
+
+#ifdef ASSERT
+ bool reg_destroyed[RegisterImpl::number_of_registers];
+ bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+ for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
+ reg_destroyed[r] = false;
+ }
+ for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
+ freg_destroyed[f] = false;
+ }
+#endif // ASSERT
+
+ for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
+#ifdef ASSERT
+ if (in_regs[jix].first()->is_Register()) {
+ assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
+ } else {
+ if (in_regs[jix].first()->is_FloatRegister()) {
+ assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
+ }
+ }
+ if (out_regs[cix].first()->is_Register()) {
+ reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
+ } else {
+ if (out_regs[cix].first()->is_FloatRegister()) {
+ freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
+ }
+ }
+#endif // ASSERT
+
+ switch (in_sig_bt[jix]) {
+ // Due to casting, small integers should only occur in pairs with type T_LONG.
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ // Move int and do sign extension.
+ move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
+ break;
+
+ case T_LONG :
+ long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
+ break;
+
+ case T_ARRAY:
+ if (is_critical_native) {
+ int body_arg = cix;
+ cix -= 2; // Point to length arg.
+ unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
+ break;
+ }
+ // else fallthrough
+ case T_OBJECT:
+ assert(!is_critical_native, "no oop arguments");
+ object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
+ ((jix == 0) && (!method_is_static)),
+ &receiver_offset);
+ break;
+ case T_VOID:
+ break;
+
+ case T_FLOAT:
+ float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
+ break;
+
+ case T_DOUBLE:
+ assert(jix+1 < total_in_args && in_sig_bt[jix+1] == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
+ double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
+ break;
+
+ case T_ADDRESS:
+ assert(false, "found T_ADDRESS in java args");
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ //--------------------------------------------------------------------
+ // Pre-load a static method's oop into ARG2.
+ // Used both by locking code and the normal JNI call code.
+ //--------------------------------------------------------------------
+ if (method_is_static && !is_critical_native) {
+ __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
+
+ // Now handlize the static class mirror in ARG2. It's known not-null.
+ __ z_stg(Z_ARG2, klass_offset, Z_SP);
+ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+ __ add2reg(Z_ARG2, klass_offset, Z_SP);
+ }
+
+ // Get JNIEnv* which is first argument to native.
+ if (!is_critical_native) {
+ __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // We have all of the arguments setup at this point.
+ // We MUST NOT touch any outgoing regs from this point on.
+ // So if we must call out we must push a new frame.
+ //////////////////////////////////////////////////////////////////////
+
+
+ // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
+ // Both values represent the same position.
+ __ get_PC(Z_R10); // PC into register
+ wrapper_CRegsSet = __ offset(); // and into into variable.
+
+ // Z_R10 now has the pc loaded that we will use when we finally call to native.
+
+ // We use the same pc/oopMap repeatedly when we call out.
+ oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
+
+ // Lock a synchronized method.
+
+ if (method->is_synchronized()) {
+ assert(!is_critical_native, "unhandled");
+
+ // ATTENTION: args and Z_R10 must be preserved.
+ Register r_oop = Z_R11;
+ Register r_box = Z_R12;
+ Register r_tmp1 = Z_R13;
+ Register r_tmp2 = Z_R7;
+ Label done;
+
+ // Load the oop for the object or class. R_carg2_classorobject contains
+ // either the handlized oop from the incoming arguments or the handlized
+ // class mirror (if the method is static).
+ __ z_lg(r_oop, 0, Z_ARG2);
+
+ lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
+ // Get the lock box slot's address.
+ __ add2reg(r_box, lock_offset, Z_SP);
+
+#ifdef ASSERT
+ if (UseBiasedLocking)
+ // Making the box point to itself will make it clear it went unused
+ // but also be obviously invalid.
+ __ z_stg(r_box, 0, r_box);
+#endif // ASSERT
+
+ // Try fastpath for locking.
+ // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
+ __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
+ __ z_bre(done);
+
+ //-------------------------------------------------------------------------
+ // None of the above fast optimizations worked so we have to get into the
+ // slow case of monitor enter. Inline a special case of call_VM that
+ // disallows any pending_exception.
+ //-------------------------------------------------------------------------
+
+ Register oldSP = Z_R11;
+
+ __ z_lgr(oldSP, Z_SP);
+
+ RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
+
+ // Prepare arguments for call.
+ __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
+ __ add2reg(Z_ARG2, lock_offset, oldSP);
+ __ z_lgr(Z_ARG3, Z_thread);
+
+ __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
+
+ // Do the call.
+ __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
+ __ call(Z_R1_scratch);
+
+ __ reset_last_Java_frame();
+
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
+#ifdef ASSERT
+ { Label L;
+ __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+ __ z_bre(L);
+ __ stop("no pending exception allowed on exit from IR::monitorenter");
+ __ bind(L);
+ }
+#endif
+ __ bind(done);
+ } // lock for synchronized methods
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Finally just about ready to make the JNI call.
+ //////////////////////////////////////////////////////////////////////
+
+ // Use that pc we placed in Z_R10 a while back as the current frame anchor.
+ __ set_last_Java_frame(Z_SP, Z_R10);
+
+ // Transition from _thread_in_Java to _thread_in_native.
+ __ set_thread_state(_thread_in_native);
+
+
+ //////////////////////////////////////////////////////////////////////
+ // This is the JNI call.
+ //////////////////////////////////////////////////////////////////////
+
+ __ call_c(native_func);
+
+
+ //////////////////////////////////////////////////////////////////////
+ // We have survived the call once we reach here.
+ //////////////////////////////////////////////////////////////////////
+
+
+ //--------------------------------------------------------------------
+ // Unpack native results.
+ //--------------------------------------------------------------------
+ // For int-types, we do any needed sign-extension required.
+ // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
+ // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
+ // blocking or unlocking.
+ // An OOP result (handle) is done specially in the slow-path code.
+ //--------------------------------------------------------------------
+ switch (ret_type) { //GLGLGL
+ case T_VOID: break; // Nothing to do!
+ case T_FLOAT: break; // Got it where we want it (unless slow-path)
+ case T_DOUBLE: break; // Got it where we want it (unless slow-path)
+ case T_LONG: break; // Got it where we want it (unless slow-path)
+ case T_OBJECT: break; // Really a handle.
+ // Cannot de-handlize until after reclaiming jvm_lock.
+ case T_ARRAY: break;
+
+ case T_BOOLEAN: // 0 -> false(0); !0 -> true(1)
+ __ z_lngfr(Z_RET, Z_RET); // Force sign bit on except for zero.
+ __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
+ break;
+ case T_BYTE: __ z_lgbr(Z_RET, Z_RET); break; // sign extension
+ case T_CHAR: __ z_llghr(Z_RET, Z_RET); break; // unsigned result
+ case T_SHORT: __ z_lghr(Z_RET, Z_RET); break; // sign extension
+ case T_INT: __ z_lgfr(Z_RET, Z_RET); break; // sign-extend for beauty.
+
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+
+
+ // Switch thread to "native transition" state before reading the synchronization state.
+ // This additional state is necessary because reading and testing the synchronization
+ // state is not atomic w.r.t. GC, as this scenario demonstrates:
+ // - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+ // - VM thread changes sync state to synchronizing and suspends threads for GC.
+ // - Thread A is resumed to finish this native method, but doesn't block here since it
+ // didn't see any synchronization in progress, and escapes.
+
+ // Transition from _thread_in_native to _thread_in_native_trans.
+ __ set_thread_state(_thread_in_native_trans);
+
+ // Safepoint synchronization
+ //--------------------------------------------------------------------
+ // Must we block?
+ //--------------------------------------------------------------------
+ // Block, if necessary, before resuming in _thread_in_Java state.
+ // In order for GC to work, don't clear the last_Java_sp until after blocking.
+ //--------------------------------------------------------------------
+ Label after_transition;
+ {
+ Label no_block, sync;
+
+ save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
+
+ if (os::is_MP()) {
+ if (UseMembar) {
+ // Force this write out before the read below.
+ __ z_fence();
+ } else {
+ // Write serialization page so VM thread can do a pseudo remote membar.
+ // We use the current thread pointer to calculate a thread specific
+ // offset to write to within the page. This minimizes bus traffic
+ // due to cache line collision.
+ __ serialize_memory(Z_thread, Z_R1, Z_R2);
+ }
+ }
+ __ generate_safepoint_check(sync, Z_R1, true);
+
+ __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
+ __ z_bre(no_block);
+
+ // Block. Save any potential method result value before the operation and
+ // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
+ // lets us share the oopMap we used when we went native rather than create
+ // a distinct one for this pc.
+ //
+ __ bind(sync);
+ __ z_acquire();
+
+ address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
+ : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+
+ __ call_VM_leaf(entry_point, Z_thread);
+
+ if (is_critical_native) {
+ restore_native_result(masm, ret_type, workspace_slot_offset);
+ __ z_bru(after_transition); // No thread state transition here.
+ }
+ __ bind(no_block);
+ restore_native_result(masm, ret_type, workspace_slot_offset);
+ }
+
+ //--------------------------------------------------------------------
+ // Thread state is thread_in_native_trans. Any safepoint blocking has
+ // already happened so we can now change state to _thread_in_Java.
+ //--------------------------------------------------------------------
+ // Transition from _thread_in_native_trans to _thread_in_Java.
+ __ set_thread_state(_thread_in_Java);
+ __ bind(after_transition);
+
+
+ //--------------------------------------------------------------------
+ // Reguard any pages if necessary.
+ // Protect native result from being destroyed.
+ //--------------------------------------------------------------------
+
+ Label no_reguard;
+
+ __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
+ JavaThread::stack_guard_yellow_reserved_disabled);
+
+ __ z_bre(no_reguard);
+
+ save_native_result(masm, ret_type, workspace_slot_offset);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
+ restore_native_result(masm, ret_type, workspace_slot_offset);
+
+ __ bind(no_reguard);
+
+
+ // Synchronized methods (slow path only)
+ // No pending exceptions for now.
+ //--------------------------------------------------------------------
+ // Handle possibly pending exception (will unlock if necessary).
+ // Native result is, if any is live, in Z_FRES or Z_RES.
+ //--------------------------------------------------------------------
+ // Unlock
+ //--------------------------------------------------------------------
+ if (method->is_synchronized()) {
+ const Register r_oop = Z_R11;
+ const Register r_box = Z_R12;
+ const Register r_tmp1 = Z_R13;
+ const Register r_tmp2 = Z_R7;
+ Label done;
+
+ // Get unboxed oop of class mirror or object ...
+ int offset = method_is_static ? klass_offset : receiver_offset;
+
+ assert(offset != -1, "");
+ __ z_lg(r_oop, offset, Z_SP);
+
+ // ... and address of lock object box.
+ __ add2reg(r_box, lock_offset, Z_SP);
+
+ // Try fastpath for unlocking.
+ __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
+ __ z_bre(done);
+
+ // Slow path for unlocking.
+ // Save and restore any potential method result value around the unlocking operation.
+ const Register R_exc = Z_R11;
+
+ save_native_result(masm, ret_type, workspace_slot_offset);
+
+ // Must save pending exception around the slow-path VM call. Since it's a
+ // leaf call, the pending exception (if any) can be kept in a register.
+ __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
+ assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
+
+ // Must clear pending-exception before re-entering the VM. Since this is
+ // a leaf call, pending-exception-oop can be safely kept in a register.
+ __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
+
+ // Inline a special case of call_VM that disallows any pending_exception.
+
+ // Get locked oop from the handle we passed to jni.
+ __ z_lg(Z_ARG1, offset, Z_SP);
+ __ add2reg(Z_ARG2, lock_offset, Z_SP);
+ __ z_lgr(Z_ARG3, Z_thread);
+
+ __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
+
+ __ call(Z_R1_scratch);
+
+#ifdef ASSERT
+ {
+ Label L;
+ __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+ __ z_bre(L);
+ __ stop("no pending exception allowed on exit from IR::monitorexit");
+ __ bind(L);
+ }
+#endif
+
+ // Check_forward_pending_exception jump to forward_exception if any pending
+ // exception is set. The forward_exception routine expects to see the
+ // exception in pending_exception and not in a register. Kind of clumsy,
+ // since all folks who branch to forward_exception must have tested
+ // pending_exception first and hence have it in a register already.
+ __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
+ restore_native_result(masm, ret_type, workspace_slot_offset);
+ __ z_bru(done);
+ __ z_illtrap(0x66);
+
+ __ bind(done);
+ }
+
+
+ //--------------------------------------------------------------------
+ // Clear "last Java frame" SP and PC.
+ //--------------------------------------------------------------------
+ __ verify_thread(); // Z_thread must be correct.
+
+ __ reset_last_Java_frame();
+
+ // Unpack oop result
+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+ NearLabel L;
+ __ compare64_and_branch(Z_RET, (RegisterOrConstant)0L, Assembler::bcondEqual, L);
+ __ z_lg(Z_RET, 0, Z_RET);
+ __ bind(L);
+ __ verify_oop(Z_RET);
+ }
+
+ if (CheckJNICalls) {
+ // clear_pending_jni_exception_check
+ __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
+ }
+
+ // Reset handle block.
+ if (!is_critical_native) {
+ __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
+ __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
+
+ // Check for pending exceptions.
+ __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+ __ z_brne(handle_pending_exception);
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Return
+ //////////////////////////////////////////////////////////////////////
+
+
+#ifndef USE_RESIZE_FRAME
+ __ pop_frame(); // Pop wrapper frame.
+#else
+ __ resize_frame(frame_size_in_bytes, Z_R0_scratch); // Revert stack extension.
+#endif
+ __ restore_return_pc(); // This is the way back to the caller.
+ __ z_br(Z_R14);
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Out-of-line calls to the runtime.
+ //////////////////////////////////////////////////////////////////////
+
+
+ if (!is_critical_native) {
+
+ //---------------------------------------------------------------------
+ // Handler for pending exceptions (out-of-line).
+ //---------------------------------------------------------------------
+ // Since this is a native call, we know the proper exception handler
+ // is the empty function. We just pop this frame and then jump to
+ // forward_exception_entry. Z_R14 will contain the native caller's
+ // return PC.
+ __ bind(handle_pending_exception);
+ __ pop_frame();
+ __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+ __ restore_return_pc();
+ __ z_br(Z_R1_scratch);
+
+ //---------------------------------------------------------------------
+ // Handler for a cache miss (out-of-line)
+ //---------------------------------------------------------------------
+ __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
+ }
+ __ flush();
+
+
+ //////////////////////////////////////////////////////////////////////
+ // end of code generation
+ //////////////////////////////////////////////////////////////////////
+
+
+ nmethod *nm = nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ (int)(wrapper_VEPStart-wrapper_CodeStart),
+ (int)(wrapper_FrameDone-wrapper_CodeStart),
+ stack_slots / VMRegImpl::slots_per_word,
+ (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+ in_ByteSize(lock_offset),
+ oop_maps);
+
+ if (is_critical_native) {
+ nm->set_lazy_critical_native(true);
+ }
+
+ return nm;
+#else
+ ShouldNotReachHere();
+ return NULL;
+#endif // COMPILER2
+}
+
+static address gen_c2i_adapter(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs,
+ Label &skip_fixup) {
+ // Before we get into the guts of the C2I adapter, see if we should be here
+ // at all. We've come from compiled code and are attempting to jump to the
+ // interpreter, which means the caller made a static call to get here
+ // (vcalls always get a compiled target if there is one). Check for a
+ // compiled target. If there is one, we need to patch the caller's call.
+
+ // These two defs MUST MATCH code in gen_i2c2i_adapter!
+ const Register ientry = Z_R11;
+ const Register code = Z_R11;
+
+ address c2i_entrypoint;
+ Label patch_callsite;
+
+ // Regular (verified) c2i entry point.
+ c2i_entrypoint = __ pc();
+
+ // Call patching needed?
+ __ load_and_test_long(Z_R0_scratch, method_(code));
+ __ z_lg(ientry, method_(interpreter_entry)); // Preload interpreter entry (also if patching).
+ __ z_brne(patch_callsite); // Patch required if code != NULL (compiled target exists).
+
+ __ bind(skip_fixup); // Return point from patch_callsite.
+
+ // Since all args are passed on the stack, total_args_passed*wordSize is the
+ // space we need. We need ABI scratch area but we use the caller's since
+ // it has already been allocated.
+
+ const int abi_scratch = frame::z_top_ijava_frame_abi_size;
+ int extraspace = round_to(total_args_passed, 2)*wordSize + abi_scratch;
+ Register sender_SP = Z_R10;
+ Register value = Z_R12;
+
+ // Remember the senderSP so we can pop the interpreter arguments off of the stack.
+ // In addition, frame manager expects initial_caller_sp in Z_R10.
+ __ z_lgr(sender_SP, Z_SP);
+
+ // This should always fit in 14 bit immediate.
+ __ resize_frame(-extraspace, Z_R0_scratch);
+
+ // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
+ // args. This essentially moves the callers ABI scratch area from the top to the
+ // bottom of the arg area.
+
+ int st_off = extraspace - wordSize;
+
+ // Now write the args into the outgoing interpreter space.
+ for (int i = 0; i < total_args_passed; i++) {
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "");
+ continue;
+ }
+ if (r_1->is_stack()) {
+ // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
+ // We must account for it here.
+ int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+
+ if (!r_2->is_valid()) {
+ __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
+ } else {
+ // longs are given 2 64-bit slots in the interpreter,
+ // but the data is passed in only 1 slot.
+ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+#ifdef ASSERT
+ __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
+#endif
+ st_off -= wordSize;
+ }
+ __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
+ }
+ } else {
+ if (r_1->is_Register()) {
+ if (!r_2->is_valid()) {
+ __ z_st(r_1->as_Register(), st_off, Z_SP);
+ } else {
+ // longs are given 2 64-bit slots in the interpreter, but the
+ // data is passed in only 1 slot.
+ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+#ifdef ASSERT
+ __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
+#endif
+ st_off -= wordSize;
+ }
+ __ z_stg(r_1->as_Register(), st_off, Z_SP);
+ }
+ } else {
+ assert(r_1->is_FloatRegister(), "");
+ if (!r_2->is_valid()) {
+ __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
+ } else {
+ // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
+ // data is passed in only 1 slot.
+ // One of these should get known junk...
+#ifdef ASSERT
+ __ z_lzdr(Z_F1);
+ __ z_std(Z_F1, st_off, Z_SP);
+#endif
+ st_off-=wordSize;
+ __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
+ }
+ }
+ }
+ st_off -= wordSize;
+ }
+
+
+ // Jump to the interpreter just as if interpreter was doing it.
+ __ add2reg(Z_esp, st_off, Z_SP);
+
+ // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
+ __ z_br(ientry);
+
+
+ // Prevent illegal entry to out-of-line code.
+ __ z_illtrap(0x22);
+
+ // Generate out-of-line runtime call to patch caller,
+ // then continue as interpreted.
+
+ // IF you lose the race you go interpreted.
+ // We don't see any possible endless c2i -> i2c -> c2i ...
+ // transitions no matter how rare.
+ __ bind(patch_callsite);
+
+ RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
+ __ z_bru(skip_fixup);
+
+ // end of out-of-line code
+
+ return c2i_entrypoint;
+}
+
+// On entry, the following registers are set
+//
+// Z_thread r8 - JavaThread*
+// Z_method r9 - callee's method (method to be invoked)
+// Z_esp r7 - operand (or expression) stack pointer of caller. one slot above last arg.
+// Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top
+//
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs) {
+ const Register value = Z_R12;
+ const Register ld_ptr= Z_esp;
+
+ int ld_offset = total_args_passed * wordSize;
+
+ // Cut-out for having no stack args.
+ if (comp_args_on_stack) {
+ // Sig words on the stack are greater than VMRegImpl::stack0. Those in
+ // registers are below. By subtracting stack0, we either get a negative
+ // number (all values in registers) or the maximum stack slot accessed.
+ // Convert VMRegImpl (4 byte) stack slots to words.
+ int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
+ // Round up to miminum stack alignment, in wordSize
+ comp_words_on_stack = round_to(comp_words_on_stack, 2);
+
+ __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
+ }
+
+ // Now generate the shuffle code. Pick up all register args and move the
+ // rest through register value=Z_R12.
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+ continue;
+ }
+
+ // Pick up 0, 1 or 2 words from ld_ptr.
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+ "scrambled load targets?");
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "");
+ continue;
+ }
+ if (r_1->is_FloatRegister()) {
+ if (!r_2->is_valid()) {
+ __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
+ ld_offset-=wordSize;
+ } else {
+ // Skip the unused interpreter slot.
+ __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
+ ld_offset -= 2 * wordSize;
+ }
+ } else {
+ if (r_1->is_stack()) {
+ // Must do a memory to memory move.
+ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+
+ if (!r_2->is_valid()) {
+ __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
+ } else {
+ // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
+ // data is passed in only 1 slot.
+ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+ ld_offset -= wordSize;
+ }
+ __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
+ }
+ } else {
+ if (!r_2->is_valid()) {
+ // Not sure we need to do this but it shouldn't hurt.
+ if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
+ __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
+ } else {
+ __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
+ }
+ } else {
+ // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
+ // data is passed in only 1 slot.
+ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+ ld_offset -= wordSize;
+ }
+ __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
+ }
+ }
+ ld_offset -= wordSize;
+ }
+ }
+
+ // Jump to the compiled code just as if compiled code was doing it.
+ // load target address from method oop:
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
+
+ // Store method oop into thread->callee_target.
+ // 6243940: We might end up in handle_wrong_method if
+ // the callee is deoptimized as we race thru here. If that
+ // happens we don't want to take a safepoint because the
+ // caller frame will look interpreted and arguments are now
+ // "compiled" so it is much better to make this transition
+ // invisible to the stack walking code. Unfortunately, if
+ // we try and find the callee by normal means a safepoint
+ // is possible. So we stash the desired callee in the thread
+ // and the vm will find it there should this case occur.
+ __ z_stg(Z_method, thread_(callee_target));
+
+ __ z_br(Z_R1_scratch);
+}
+
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs,
+ AdapterFingerPrint* fingerprint) {
+ __ align(CodeEntryAlignment);
+ address i2c_entry = __ pc();
+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+ address c2i_unverified_entry;
+
+ Label skip_fixup;
+ {
+ Label ic_miss;
+ const int klass_offset = oopDesc::klass_offset_in_bytes();
+ const int holder_klass_offset = CompiledICHolder::holder_klass_offset();
+ const int holder_method_offset = CompiledICHolder::holder_method_offset();
+
+ // Out-of-line call to ic_miss handler.
+ __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
+
+ // Unverified Entry Point UEP
+ __ align(CodeEntryAlignment);
+ c2i_unverified_entry = __ pc();
+
+ // Check the pointers.
+ if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
+ __ z_ltgr(Z_ARG1, Z_ARG1);
+ __ z_bre(ic_miss);
+ }
+ __ verify_oop(Z_ARG1);
+
+ // Check ic: object class <-> cached class
+ // Compress cached class for comparison. That's more efficient.
+ if (UseCompressedClassPointers) {
+ __ z_lg(Z_R11, holder_klass_offset, Z_method); // Z_R11 is overwritten a few instructions down anyway.
+ __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
+ } else {
+ __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
+ }
+ __ z_brne(ic_miss); // Cache miss: call runtime to handle this.
+
+ // This def MUST MATCH code in gen_c2i_adapter!
+ const Register code = Z_R11;
+
+ __ z_lg(Z_method, holder_method_offset, Z_method);
+ __ load_and_test_long(Z_R0, method_(code));
+ __ z_brne(ic_miss); // Cache miss: call runtime to handle this.
+
+ // Fallthru to VEP. Duplicate LTG, but saved taken branch.
+ }
+
+ address c2i_entry;
+ c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+// This function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization.
+//
+// Actually only compiled frames need to be adjusted, but it
+// doesn't harm to adjust entry and interpreter frames, too.
+//
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+ assert(callee_locals >= callee_parameters,
+ "test and remove; got more parms than locals");
+ // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
+ return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
+ frame::z_parent_ijava_frame_abi_size / BytesPerWord;
+}
+
+uint SharedRuntime::out_preserve_stack_slots() {
+ return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
+}
+
+//
+// Frame generation for deopt and uncommon trap blobs.
+//
+static void push_skeleton_frame(MacroAssembler* masm,
+ /* Unchanged */
+ Register frame_sizes_reg,
+ Register pcs_reg,
+ /* Invalidate */
+ Register frame_size_reg,
+ Register pc_reg) {
+ BLOCK_COMMENT(" push_skeleton_frame {");
+ __ z_lg(pc_reg, 0, pcs_reg);
+ __ z_lg(frame_size_reg, 0, frame_sizes_reg);
+ __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
+ Register fp = pc_reg;
+ __ push_frame(frame_size_reg, fp);
+#ifdef ASSERT
+ // The magic is required for successful walking skeletal frames.
+ __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
+ __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
+ // Fill other slots that are supposedly not necessary with eye catchers.
+ __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
+ __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
+ // The sender_sp of the bottom frame is set before pushing it.
+ // The sender_sp of non bottom frames is their caller's top_frame_sp, which
+ // is unknown here. Luckily it is not needed before filling the frame in
+ // layout_activation(), we assert this by setting an eye catcher (see
+ // comments on sender_sp in frame_s390.hpp).
+ __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
+#endif // ASSERT
+ BLOCK_COMMENT(" } push_skeleton_frame");
+}
+
+// Loop through the UnrollBlock info and create new frames.
+static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
+ /* read */
+ Register unroll_block_reg,
+ /* invalidate */
+ Register frame_sizes_reg,
+ Register number_of_frames_reg,
+ Register pcs_reg,
+ Register tmp1,
+ Register tmp2) {
+ BLOCK_COMMENT("push_skeleton_frames {");
+ // _number_of_frames is of type int (deoptimization.hpp).
+ __ z_lgf(number_of_frames_reg,
+ Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+ __ z_lg(pcs_reg,
+ Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+ __ z_lg(frame_sizes_reg,
+ Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+ // stack: (caller_of_deoptee, ...).
+
+ // If caller_of_deoptee is a compiled frame, then we extend it to make
+ // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
+ // See also Deoptimization::last_frame_adjust() above.
+ // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
+
+ __ z_lgf(Z_R1_scratch,
+ Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
+ __ z_lgr(tmp1, Z_SP); // Save the sender sp before extending the frame.
+ __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
+ // The oldest skeletal frame requires a valid sender_sp to make it walkable
+ // (it is required to find the original pc of caller_of_deoptee if it is marked
+ // for deoptimization - see nmethod::orig_pc_addr()).
+ __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
+
+ // Now push the new interpreter frames.
+ Label loop, loop_entry;
+
+ // Make sure that there is at least one entry in the array.
+ DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
+ __ asm_assert_ne("array_size must be > 0", 0x205);
+
+ __ z_bru(loop_entry);
+
+ __ bind(loop);
+
+ __ add2reg(frame_sizes_reg, wordSize);
+ __ add2reg(pcs_reg, wordSize);
+
+ __ bind(loop_entry);
+
+ // Allocate a new frame, fill in the pc.
+ push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
+
+ __ z_aghi(number_of_frames_reg, -1); // Emit AGHI, because it sets the condition code
+ __ z_brne(loop);
+
+ // Set the top frame's return pc.
+ __ add2reg(pcs_reg, wordSize);
+ __ z_lg(Z_R0_scratch, 0, pcs_reg);
+ __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
+ BLOCK_COMMENT("} push_skeleton_frames");
+}
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+ // Allocate space for the code.
+ ResourceMark rm;
+ // Setup code generation tools.
+ CodeBuffer buffer("deopt_blob", 2048, 1024);
+ InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
+ Label exec_mode_initialized;
+ OopMap* map = NULL;
+ OopMapSet *oop_maps = new OopMapSet();
+
+ unsigned int start_off = __ offset();
+ Label cont;
+
+ // --------------------------------------------------------------------------
+ // Normal entry (non-exception case)
+ //
+ // We have been called from the deopt handler of the deoptee.
+ // Z_R14 points behind the call in the deopt handler. We adjust
+ // it such that it points to the start of the deopt handler.
+ // The return_pc has been stored in the frame of the deoptee and
+ // will replace the address of the deopt_handler in the call
+ // to Deoptimization::fetch_unroll_info below.
+ // The (int) cast is necessary, because -((unsigned int)14)
+ // is an unsigned int.
+ __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
+
+ const Register exec_mode_reg = Z_tmp_1;
+
+ // stack: (deoptee, caller of deoptee, ...)
+
+ // pushes an "unpack" frame
+ // R14 contains the return address pointing into the deoptimized
+ // nmethod that was valid just before the nmethod was deoptimized.
+ // save R14 into the deoptee frame. the `fetch_unroll_info'
+ // procedure called below will read it from there.
+ map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+ // note the entry point.
+ __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
+ __ z_bru(exec_mode_initialized);
+
+#ifndef COMPILER1
+ int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
+#else
+ // --------------------------------------------------------------------------
+ // Reexecute entry
+ // - Z_R14 = Deopt Handler in nmethod
+
+ int reexecute_offset = __ offset() - start_off;
+
+ // No need to update map as each call to save_live_registers will produce identical oopmap
+ (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+ __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
+ __ z_bru(exec_mode_initialized);
+#endif
+
+
+ // --------------------------------------------------------------------------
+ // Exception entry. We reached here via a branch. Registers on entry:
+ // - Z_EXC_OOP (Z_ARG1) = exception oop
+ // - Z_EXC_PC (Z_ARG2) = the exception pc.
+
+ int exception_offset = __ offset() - start_off;
+
+ // all registers are dead at this entry point, except for Z_EXC_OOP, and
+ // Z_EXC_PC which contain the exception oop and exception pc
+ // respectively. Set them in TLS and fall thru to the
+ // unpack_with_exception_in_tls entry point.
+
+ // Store exception oop and pc in thread (location known to GC).
+ // Need this since the call to "fetch_unroll_info()" may safepoint.
+ __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
+ __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+ // fall through
+
+ int exception_in_tls_offset = __ offset() - start_off;
+
+ // new implementation because exception oop is now passed in JavaThread
+
+ // Prolog for exception case
+ // All registers must be preserved because they might be used by LinearScan
+ // Exceptiop oop and throwing PC are passed in JavaThread
+
+ // load throwing pc from JavaThread and us it as the return address of the current frame.
+ __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
+
+ // Save everything in sight.
+ (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
+
+ // Now it is safe to overwrite any register
+
+ // Clear the exception pc field in JavaThread
+ __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
+
+ // Deopt during an exception. Save exec mode for unpack_frames.
+ __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
+
+
+#ifdef ASSERT
+ // verify that there is really an exception oop in JavaThread
+ __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
+ __ verify_oop(Z_ARG1);
+
+ // verify that there is no pending exception
+ __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
+ "must not have pending exception here", __LINE__);
+#endif
+
+ // --------------------------------------------------------------------------
+ // At this point, the live registers are saved and
+ // the exec_mode_reg has been set up correctly.
+ __ bind(exec_mode_initialized);
+
+ // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
+
+ {
+ const Register unroll_block_reg = Z_tmp_2;
+
+ // we need to set `last_Java_frame' because `fetch_unroll_info' will
+ // call `last_Java_frame()'. however we can't block and no gc will
+ // occur so we don't need an oopmap. the value of the pc in the
+ // frame is not particularly important. it just needs to identify the blob.
+
+ // Don't set last_Java_pc anymore here (is implicitly NULL then).
+ // the correct PC is retrieved in pd_last_frame() in that case.
+ __ set_last_Java_frame(/*sp*/Z_SP, noreg);
+ // With EscapeAnalysis turned on, this call may safepoint
+ // despite it's marked as "leaf call"!
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
+ // Set an oopmap for the call site this describes all our saved volatile registers
+ int offs = __ offset();
+ oop_maps->add_gc_map(offs, map);
+
+ __ reset_last_Java_frame();
+ // save the return value.
+ __ z_lgr(unroll_block_reg, Z_RET);
+ // restore the return registers that have been saved
+ // (among other registers) by save_live_registers(...).
+ RegisterSaver::restore_result_registers(masm);
+
+ // reload the exec mode from the UnrollBlock (it might have changed)
+ __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+
+ // In excp_deopt_mode, restore and clear exception oop which we
+ // stored in the thread during exception entry above. The exception
+ // oop will be the return value of this stub.
+ NearLabel skip_restore_excp;
+ __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
+ __ z_lg(Z_RET, thread_(exception_oop));
+ __ clear_mem(thread_(exception_oop), 8);
+ __ bind(skip_restore_excp);
+
+ // remove the "unpack" frame
+ __ pop_frame();
+
+ // stack: (deoptee, caller of deoptee, ...).
+
+ // pop the deoptee's frame
+ __ pop_frame();
+
+ // stack: (caller_of_deoptee, ...).
+
+ // loop through the `UnrollBlock' info and create interpreter frames.
+ push_skeleton_frames(masm, true/*deopt*/,
+ unroll_block_reg,
+ Z_tmp_3,
+ Z_tmp_4,
+ Z_ARG5,
+ Z_ARG4,
+ Z_ARG3);
+
+ // stack: (skeletal interpreter frame, ..., optional skeletal
+ // interpreter frame, caller of deoptee, ...).
+ }
+
+ // push an "unpack" frame taking care of float / int return values.
+ __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
+
+ // stack: (unpack frame, skeletal interpreter frame, ..., optional
+ // skeletal interpreter frame, caller of deoptee, ...).
+
+ // spill live volatile registers since we'll do a call.
+ __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
+ __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
+
+ // let the unpacker layout information in the skeletal frames just allocated.
+ __ get_PC(Z_RET);
+ __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
+ Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
+
+ __ reset_last_Java_frame();
+
+ // restore the volatiles saved above.
+ __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
+ __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
+
+ // pop the "unpack" frame.
+ __ pop_frame();
+ __ restore_return_pc();
+
+ // stack: (top interpreter frame, ..., optional interpreter frame,
+ // caller of deoptee, ...).
+
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+
+ // return to the interpreter entry point.
+ __ z_br(Z_R14);
+
+ // Make sure all code is generated
+ masm->flush();
+
+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+
+#ifdef COMPILER2
+//------------------------------generate_uncommon_trap_blob--------------------
+void SharedRuntime::generate_uncommon_trap_blob() {
+ // Allocate space for the code
+ ResourceMark rm;
+ // Setup code generation tools
+ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
+ InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
+
+ Register unroll_block_reg = Z_tmp_1;
+ Register klass_index_reg = Z_ARG2;
+ Register unc_trap_reg = Z_ARG2;
+
+ // stack: (deoptee, caller_of_deoptee, ...).
+
+ // push a dummy "unpack" frame and call
+ // `Deoptimization::uncommon_trap' to pack the compiled frame into a
+ // vframe array and return the `UnrollBlock' information.
+
+ // save R14 to compiled frame.
+ __ save_return_pc();
+ // push the "unpack_frame".
+ __ push_frame_abi160(0);
+
+ // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
+
+ // set the "unpack" frame as last_Java_frame.
+ // `Deoptimization::uncommon_trap' expects it and considers its
+ // sender frame as the deoptee frame.
+ __ get_PC(Z_R1_scratch);
+ __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
+
+ __ z_lgr(klass_index_reg, Z_ARG1); // passed implicitly as ARG2
+ __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap); // passed implicitly as ARG3
+ BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
+
+ __ reset_last_Java_frame();
+
+ // pop the "unpack" frame
+ __ pop_frame();
+
+ // stack: (deoptee, caller_of_deoptee, ...).
+
+ // save the return value.
+ __ z_lgr(unroll_block_reg, Z_RET);
+
+ // pop the deoptee frame.
+ __ pop_frame();
+
+ // stack: (caller_of_deoptee, ...).
+
+#ifdef ASSERT
+ assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
+ assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
+ const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
+#ifndef VM_LITTLE_ENDIAN
+ + 3
+#endif
+ ;
+ if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
+ __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
+ } else {
+ __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
+ }
+ __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
+#endif
+
+ __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
+
+ // allocate new interpreter frame(s) and possibly resize the caller's frame
+ // (no more adapters !)
+ push_skeleton_frames(masm, false/*deopt*/,
+ unroll_block_reg,
+ Z_tmp_2,
+ Z_tmp_3,
+ Z_tmp_4,
+ Z_ARG5,
+ Z_ARG4);
+
+ // stack: (skeletal interpreter frame, ..., optional skeletal
+ // interpreter frame, (resized) caller of deoptee, ...).
+
+ // push a dummy "unpack" frame taking care of float return values.
+ // call `Deoptimization::unpack_frames' to layout information in the
+ // interpreter frames just created
+
+ // push the "unpack" frame
+ const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
+
+ // stack: (unpack frame, skeletal interpreter frame, ..., optional
+ // skeletal interpreter frame, (resized) caller of deoptee, ...).
+
+ // set the "unpack" frame as last_Java_frame
+ __ get_PC(Z_R1_scratch);
+ __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
+
+ // indicate it is the uncommon trap case
+ BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
+ __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
+ // let the unpacker layout information in the skeletal frames just allocated.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
+
+ __ reset_last_Java_frame();
+ // pop the "unpack" frame
+ __ pop_frame();
+ // restore LR from top interpreter frame
+ __ restore_return_pc();
+
+ // stack: (top interpreter frame, ..., optional interpreter frame,
+ // (resized) caller of deoptee, ...).
+
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+
+ // return to the interpreter entry point
+ __ z_br(Z_R14);
+
+ masm->flush();
+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
+}
+#endif // COMPILER2
+
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// and setup oopmap.
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+ assert(StubRoutines::forward_exception_entry() != NULL,
+ "must be generated before");
+
+ ResourceMark rm;
+ OopMapSet *oop_maps = new OopMapSet();
+ OopMap* map;
+
+ // Allocate space for the code. Setup code generation tools.
+ CodeBuffer buffer("handler_blob", 2048, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+
+ unsigned int start_off = __ offset();
+ address call_pc = NULL;
+ int frame_size_in_bytes;
+
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ // Make room for return address (or push it again)
+ if (!cause_return)
+ __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
+
+ // Save registers, fpu state, and flags
+ map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+ // The following is basically a call_VM. However, we need the precise
+ // address of the call in order to generate an oopmap. Hence, we do all the
+ // work outselves.
+ __ set_last_Java_frame(Z_SP, noreg);
+
+ // call into the runtime to handle the safepoint poll
+ __ call_VM_leaf(call_ptr, Z_thread);
+
+
+ // Set an oopmap for the call site. This oopmap will map all
+ // oop-registers and debug-info registers as callee-saved. This
+ // will allow deoptimization at this safepoint to find all possible
+ // debug-info recordings, as well as let GC find all oops.
+
+ oop_maps->add_gc_map((int)(__ offset()-start_off), map);
+
+ Label noException;
+
+ __ reset_last_Java_frame();
+
+ __ load_and_test_long(Z_R1, thread_(pending_exception));
+ __ z_bre(noException);
+
+ // Pending exception case, used (sporadically) by
+ // api/java_lang/Thread.State/index#ThreadState et al.
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+ // Jump to forward_exception_entry, with the issuing PC in Z_R14
+ // so it looks like the original nmethod called forward_exception_entry.
+ __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+ __ z_br(Z_R1_scratch);
+
+ // No exception case
+ __ bind(noException);
+
+ // Normal exit, restore registers and exit.
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+ __ z_br(Z_R14);
+
+ // Make sure all code is generated
+ masm->flush();
+
+ // Fill-out other meta info
+ return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
+}
+
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a Java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+ // allocate space for the code
+ ResourceMark rm;
+
+ CodeBuffer buffer(name, 1000, 512);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+
+ OopMapSet *oop_maps = new OopMapSet();
+ OopMap* map = NULL;
+
+ unsigned int start_off = __ offset();
+
+ map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+
+ // We must save a PC from within the stub as return PC
+ // C code doesn't store the LR where we expect the PC,
+ // so we would run into trouble upon stack walking.
+ __ get_PC(Z_R1_scratch);
+
+ unsigned int frame_complete = __ offset();
+
+ __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
+
+ __ call_VM_leaf(destination, Z_thread, Z_method);
+
+
+ // Set an oopmap for the call site.
+ // We need this not only for callee-saved registers, but also for volatile
+ // registers that the compiler might be keeping live across a safepoint.
+
+ oop_maps->add_gc_map((int)(frame_complete-start_off), map);
+
+ // clear last_Java_sp
+ __ reset_last_Java_frame();
+
+ // check for pending exceptions
+ Label pending;
+ __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
+ __ z_brne(pending);
+
+ __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+ // get the returned method
+ __ get_vm_result_2(Z_method);
+
+ // We are back the the original state on entry and ready to go.
+ __ z_br(Z_R1_scratch);
+
+ // Pending exception after the safepoint
+
+ __ bind(pending);
+
+ RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
+
+ // exception pending => remove activation and forward to exception handler
+
+ __ z_lgr(Z_R2, Z_R0); // pending_exception
+ __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
+ __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
+ __ z_br(Z_R1_scratch);
+
+ // -------------
+ // make sure all code is generated
+ masm->flush();
+
+ // return the blob
+ // frame_size_words or bytes??
+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
+ oop_maps, true);
+
+}
+
+//------------------------------Montgomery multiplication------------------------
+//
+
+// Subtract 0:b from carry:a. Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
+ unsigned long i, c = 8 * (unsigned long)(len - 1);
+ __asm__ __volatile__ (
+ "SLGR %[i], %[i] \n" // initialize to 0 and pre-set carry
+ "LGHI 0, 8 \n" // index increment (for BRXLG)
+ "LGR 1, %[c] \n" // index limit (for BRXLG)
+ "0: \n"
+ "LG %[c], 0(%[i],%[a]) \n"
+ "SLBG %[c], 0(%[i],%[b]) \n" // subtract with borrow
+ "STG %[c], 0(%[i],%[a]) \n"
+ "BRXLG %[i], 0, 0b \n" // while ((i+=8)<limit);
+ "SLBGR %[c], %[c] \n" // save carry - 1
+ : [i]"=&a"(i), [c]"+r"(c)
+ : [a]"a"(a), [b]"a"(b)
+ : "cc", "memory", "r0", "r1"
+ );
+ return carry + c;
+}
+
+// Multiply (unsigned) Long A by Long B, accumulating the double-
+// length result into the accumulator formed of T0, T1, and T2.
+inline void MACC(unsigned long A[], long A_ind,
+ unsigned long B[], long B_ind,
+ unsigned long &T0, unsigned long &T1, unsigned long &T2) {
+ long A_si = 8 * A_ind,
+ B_si = 8 * B_ind;
+ __asm__ __volatile__ (
+ "LG 1, 0(%[A_si],%[A]) \n"
+ "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
+ "ALGR %[T0], 1 \n"
+ "LGHI 1, 0 \n" // r1 = 0
+ "ALCGR %[T1], 0 \n"
+ "ALCGR %[T2], 1 \n"
+ : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
+ : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
+ : "cc", "r0", "r1"
+ );
+}
+
+// As above, but add twice the double-length result into the
+// accumulator.
+inline void MACC2(unsigned long A[], long A_ind,
+ unsigned long B[], long B_ind,
+ unsigned long &T0, unsigned long &T1, unsigned long &T2) {
+ const unsigned long zero = 0;
+ long A_si = 8 * A_ind,
+ B_si = 8 * B_ind;
+ __asm__ __volatile__ (
+ "LG 1, 0(%[A_si],%[A]) \n"
+ "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
+ "ALGR %[T0], 1 \n"
+ "ALCGR %[T1], 0 \n"
+ "ALCGR %[T2], %[zero] \n"
+ "ALGR %[T0], 1 \n"
+ "ALCGR %[T1], 0 \n"
+ "ALCGR %[T2], %[zero] \n"
+ : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
+ : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
+ : "cc", "r0", "r1"
+ );
+}
+
+// Fast Montgomery multiplication. The derivation of the algorithm is
+// in "A Cryptographic Library for the Motorola DSP56000,
+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
+static void
+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
+ unsigned long m[], unsigned long inv, int len) {
+ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+ int i;
+
+ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+ for (i = 0; i < len; i++) {
+ int j;
+ for (j = 0; j < i; j++) {
+ MACC(a, j, b, i-j, t0, t1, t2);
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ MACC(a, i, b, 0, t0, t1, t2);
+ m[i] = t0 * inv;
+ MACC(m, i, n, 0, t0, t1, t2);
+
+ assert(t0 == 0, "broken Montgomery multiply");
+
+ t0 = t1; t1 = t2; t2 = 0;
+ }
+
+ for (i = len; i < 2 * len; i++) {
+ int j;
+ for (j = i - len + 1; j < len; j++) {
+ MACC(a, j, b, i-j, t0, t1, t2);
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ m[i-len] = t0;
+ t0 = t1; t1 = t2; t2 = 0;
+ }
+
+ while (t0) {
+ t0 = sub(m, n, t0, len);
+ }
+}
+
+// Fast Montgomery squaring. This uses asymptotically 25% fewer
+// multiplies so it should be up to 25% faster than Montgomery
+// multiplication. However, its loop control is more complex and it
+// may actually run slower on some machines.
+static void
+montgomery_square(unsigned long a[], unsigned long n[],
+ unsigned long m[], unsigned long inv, int len) {
+ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+ int i;
+
+ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+ for (i = 0; i < len; i++) {
+ int j;
+ int end = (i+1)/2;
+ for (j = 0; j < end; j++) {
+ MACC2(a, j, a, i-j, t0, t1, t2);
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ if ((i & 1) == 0) {
+ MACC(a, j, a, j, t0, t1, t2);
+ }
+ for (; j < i; j++) {
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ m[i] = t0 * inv;
+ MACC(m, i, n, 0, t0, t1, t2);
+
+ assert(t0 == 0, "broken Montgomery square");
+
+ t0 = t1; t1 = t2; t2 = 0;
+ }
+
+ for (i = len; i < 2*len; i++) {
+ int start = i-len+1;
+ int end = start + (len - start)/2;
+ int j;
+ for (j = start; j < end; j++) {
+ MACC2(a, j, a, i-j, t0, t1, t2);
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ if ((i & 1) == 0) {
+ MACC(a, j, a, j, t0, t1, t2);
+ }
+ for (; j < len; j++) {
+ MACC(m, j, n, i-j, t0, t1, t2);
+ }
+ m[i-len] = t0;
+ t0 = t1; t1 = t2; t2 = 0;
+ }
+
+ while (t0) {
+ t0 = sub(m, n, t0, len);
+ }
+}
+
+// The threshold at which squaring is advantageous was determined
+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
+// Value seems to be ok for other platforms, too.
+#define MONTGOMERY_SQUARING_THRESHOLD 64
+
+// Copy len longwords from s to d, word-swapping as we go. The
+// destination array is reversed.
+static void reverse_words(unsigned long *s, unsigned long *d, int len) {
+ d += len;
+ while(len-- > 0) {
+ d--;
+ unsigned long s_val = *s;
+ // Swap words in a longword on little endian machines.
+#ifdef VM_LITTLE_ENDIAN
+ Unimplemented();
+#endif
+ *d = s_val;
+ s++;
+ }
+}
+
+void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+ jint len, jlong inv,
+ jint *m_ints) {
+ len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
+ assert(len % 2 == 0, "array length in montgomery_multiply must be even");
+ int longwords = len/2;
+
+ // Make very sure we don't use so much space that the stack might
+ // overflow. 512 jints corresponds to an 16384-bit integer and
+ // will use here a total of 8k bytes of stack space.
+ int total_allocation = longwords * sizeof (unsigned long) * 4;
+ guarantee(total_allocation <= 8192, "must be");
+ unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+ // Local scratch arrays
+ unsigned long
+ *a = scratch + 0 * longwords,
+ *b = scratch + 1 * longwords,
+ *n = scratch + 2 * longwords,
+ *m = scratch + 3 * longwords;
+
+ reverse_words((unsigned long *)a_ints, a, longwords);
+ reverse_words((unsigned long *)b_ints, b, longwords);
+ reverse_words((unsigned long *)n_ints, n, longwords);
+
+ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
+
+ reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
+ jint len, jlong inv,
+ jint *m_ints) {
+ len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
+ assert(len % 2 == 0, "array length in montgomery_square must be even");
+ int longwords = len/2;
+
+ // Make very sure we don't use so much space that the stack might
+ // overflow. 512 jints corresponds to an 16384-bit integer and
+ // will use here a total of 6k bytes of stack space.
+ int total_allocation = longwords * sizeof (unsigned long) * 3;
+ guarantee(total_allocation <= 8192, "must be");
+ unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+ // Local scratch arrays
+ unsigned long
+ *a = scratch + 0 * longwords,
+ *n = scratch + 1 * longwords,
+ *m = scratch + 2 * longwords;
+
+ reverse_words((unsigned long *)a_ints, a, longwords);
+ reverse_words((unsigned long *)n_ints, n, longwords);
+
+ if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
+ ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
+ } else {
+ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
+ }
+
+ reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+extern "C"
+int SpinPause() {
+ return 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2563 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "registerSaver_s390.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "nativeInst_s390.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp.
+
+#ifdef PRODUCT
+#define __ _masm->
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// -----------------------------------------------------------------------
+// Stub Code definitions
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+ //----------------------------------------------------------------------
+ // Call stubs are used to call Java from C.
+
+ //
+ // Arguments:
+ //
+ // R2 - call wrapper address : address
+ // R3 - result : intptr_t*
+ // R4 - result type : BasicType
+ // R5 - method : method
+ // R6 - frame mgr entry point : address
+ // [SP+160] - parameter block : intptr_t*
+ // [SP+172] - parameter count in words : int
+ // [SP+176] - thread : Thread*
+ //
+ address generate_call_stub(address& return_address) {
+ // Set up a new C frame, copy Java arguments, call frame manager
+ // or native_entry, and process result.
+
+ StubCodeMark mark(this, "StubRoutines", "call_stub");
+ address start = __ pc();
+
+ Register r_arg_call_wrapper_addr = Z_ARG1;
+ Register r_arg_result_addr = Z_ARG2;
+ Register r_arg_result_type = Z_ARG3;
+ Register r_arg_method = Z_ARG4;
+ Register r_arg_entry = Z_ARG5;
+
+ // offsets to fp
+ #define d_arg_thread 176
+ #define d_arg_argument_addr 160
+ #define d_arg_argument_count 168+4
+
+ Register r_entryframe_fp = Z_tmp_1;
+ Register r_top_of_arguments_addr = Z_ARG4;
+ Register r_new_arg_entry = Z_R14;
+
+ // macros for frame offsets
+ #define call_wrapper_address_offset \
+ _z_entry_frame_locals_neg(call_wrapper_address)
+ #define result_address_offset \
+ _z_entry_frame_locals_neg(result_address)
+ #define result_type_offset \
+ _z_entry_frame_locals_neg(result_type)
+ #define arguments_tos_address_offset \
+ _z_entry_frame_locals_neg(arguments_tos_address)
+
+ {
+ //
+ // STACK on entry to call_stub:
+ //
+ // F1 [C_FRAME]
+ // ...
+ //
+
+ Register r_argument_addr = Z_tmp_3;
+ Register r_argumentcopy_addr = Z_tmp_4;
+ Register r_argument_size_in_bytes = Z_ARG5;
+ Register r_frame_size = Z_R1;
+
+ Label arguments_copied;
+
+ // Save non-volatile registers to ABI of caller frame.
+ BLOCK_COMMENT("save registers, push frame {");
+ __ z_stmg(Z_R6, Z_R14, 16, Z_SP);
+ __ z_std(Z_F8, 96, Z_SP);
+ __ z_std(Z_F9, 104, Z_SP);
+ __ z_std(Z_F10, 112, Z_SP);
+ __ z_std(Z_F11, 120, Z_SP);
+ __ z_std(Z_F12, 128, Z_SP);
+ __ z_std(Z_F13, 136, Z_SP);
+ __ z_std(Z_F14, 144, Z_SP);
+ __ z_std(Z_F15, 152, Z_SP);
+
+ //
+ // Push ENTRY_FRAME including arguments:
+ //
+ // F0 [TOP_IJAVA_FRAME_ABI]
+ // [outgoing Java arguments]
+ // [ENTRY_FRAME_LOCALS]
+ // F1 [C_FRAME]
+ // ...
+ //
+
+ // Calculate new frame size and push frame.
+ #define abi_plus_locals_size \
+ (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size)
+ if (abi_plus_locals_size % BytesPerWord == 0) {
+ // Preload constant part of frame size.
+ __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord);
+ // Keep copy of our frame pointer (caller's SP).
+ __ z_lgr(r_entryframe_fp, Z_SP);
+ // Add space required by arguments to frame size.
+ __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP);
+ // Move Z_ARG5 early, it will be used as a local.
+ __ z_lgr(r_new_arg_entry, r_arg_entry);
+ // Convert frame size from words to bytes.
+ __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord);
+ __ push_frame(r_frame_size, r_entryframe_fp,
+ false/*don't copy SP*/, true /*frame size sign inverted*/);
+ } else {
+ guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)");
+ }
+ BLOCK_COMMENT("} save, push");
+
+ // Load argument registers for call.
+ BLOCK_COMMENT("prepare/copy arguments {");
+ __ z_lgr(Z_method, r_arg_method);
+ __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp);
+
+ // Calculate top_of_arguments_addr which will be tos (not prepushed) later.
+ // Wimply use SP + frame::top_ijava_frame_size.
+ __ add2reg(r_top_of_arguments_addr,
+ frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP);
+
+ // Initialize call_stub locals (step 1).
+ if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) &&
+ (result_address_offset + BytesPerWord == result_type_offset) &&
+ (result_type_offset + BytesPerWord == arguments_tos_address_offset)) {
+
+ __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr,
+ call_wrapper_address_offset, r_entryframe_fp);
+ } else {
+ __ z_stg(r_arg_call_wrapper_addr,
+ call_wrapper_address_offset, r_entryframe_fp);
+ __ z_stg(r_arg_result_addr,
+ result_address_offset, r_entryframe_fp);
+ __ z_stg(r_arg_result_type,
+ result_type_offset, r_entryframe_fp);
+ __ z_stg(r_top_of_arguments_addr,
+ arguments_tos_address_offset, r_entryframe_fp);
+ }
+
+ // Copy Java arguments.
+
+ // Any arguments to copy?
+ __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count));
+ __ z_bre(arguments_copied);
+
+ // Prepare loop and copy arguments in reverse order.
+ {
+ // Calculate argument size in bytes.
+ __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord);
+
+ // Get addr of first incoming Java argument.
+ __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp);
+
+ // Let r_argumentcopy_addr point to last outgoing Java argument.
+ __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively.
+
+ // Let r_argument_addr point to last incoming Java argument.
+ __ add2reg_with_index(r_argument_addr, -BytesPerWord,
+ r_argument_size_in_bytes, r_argument_addr);
+
+ // Now loop while Z_R1 > 0 and copy arguments.
+ {
+ Label next_argument;
+ __ bind(next_argument);
+ // Mem-mem move.
+ __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr);
+ __ add2reg(r_argument_addr, -BytesPerWord);
+ __ add2reg(r_argumentcopy_addr, BytesPerWord);
+ __ z_brct(Z_R1, next_argument);
+ }
+ } // End of argument copy loop.
+
+ __ bind(arguments_copied);
+ }
+ BLOCK_COMMENT("} arguments");
+
+ BLOCK_COMMENT("call {");
+ {
+ // Call frame manager or native entry.
+
+ //
+ // Register state on entry to frame manager / native entry:
+ //
+ // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed)
+ // Lesp = (SP) + copied_arguments_offset - 8
+ // Z_method - method
+ // Z_thread - JavaThread*
+ //
+
+ // Here, the usual SP is the initial_caller_sp.
+ __ z_lgr(Z_R10, Z_SP);
+
+ // Z_esp points to the slot below the last argument.
+ __ z_lgr(Z_esp, r_top_of_arguments_addr);
+
+ //
+ // Stack on entry to frame manager / native entry:
+ //
+ // F0 [TOP_IJAVA_FRAME_ABI]
+ // [outgoing Java arguments]
+ // [ENTRY_FRAME_LOCALS]
+ // F1 [C_FRAME]
+ // ...
+ //
+
+ // Do a light-weight C-call here, r_new_arg_entry holds the address
+ // of the interpreter entry point (frame manager or native entry)
+ // and save runtime-value of return_pc in return_address
+ // (call by reference argument).
+ return_address = __ call_stub(r_new_arg_entry);
+ }
+ BLOCK_COMMENT("} call");
+
+ {
+ BLOCK_COMMENT("restore registers {");
+ // Returned from frame manager or native entry.
+ // Now pop frame, process result, and return to caller.
+
+ //
+ // Stack on exit from frame manager / native entry:
+ //
+ // F0 [ABI]
+ // ...
+ // [ENTRY_FRAME_LOCALS]
+ // F1 [C_FRAME]
+ // ...
+ //
+ // Just pop the topmost frame ...
+ //
+
+ Label ret_is_object;
+ Label ret_is_long;
+ Label ret_is_float;
+ Label ret_is_double;
+
+ // Restore frame pointer.
+ __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP);
+ // Pop frame. Done here to minimize stalls.
+ __ z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+
+ // Reload some volatile registers which we've spilled before the call
+ // to frame manager / native entry.
+ // Access all locals via frame pointer, because we know nothing about
+ // the topmost frame's size.
+ __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
+ __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp);
+
+ // Restore non-volatiles.
+ __ z_lmg(Z_R6, Z_R14, 16, Z_SP);
+ __ z_ld(Z_F8, 96, Z_SP);
+ __ z_ld(Z_F9, 104, Z_SP);
+ __ z_ld(Z_F10, 112, Z_SP);
+ __ z_ld(Z_F11, 120, Z_SP);
+ __ z_ld(Z_F12, 128, Z_SP);
+ __ z_ld(Z_F13, 136, Z_SP);
+ __ z_ld(Z_F14, 144, Z_SP);
+ __ z_ld(Z_F15, 152, Z_SP);
+ BLOCK_COMMENT("} restore");
+
+ //
+ // Stack on exit from call_stub:
+ //
+ // 0 [C_FRAME]
+ // ...
+ //
+ // No call_stub frames left.
+ //
+
+ // All non-volatiles have been restored at this point!!
+
+ //------------------------------------------------------------------------
+ // The following code makes some assumptions on the T_<type> enum values.
+ // The enum is defined in globalDefinitions.hpp.
+ // The validity of the assumptions is tested as far as possible.
+ // The assigned values should not be shuffled
+ // T_BOOLEAN==4 - lowest used enum value
+ // T_NARROWOOP==16 - largest used enum value
+ //------------------------------------------------------------------------
+ BLOCK_COMMENT("process result {");
+ Label firstHandler;
+ int handlerLen= 8;
+#ifdef ASSERT
+ char assertMsg[] = "check BasicType definition in globalDefinitions.hpp";
+ __ z_chi(r_arg_result_type, T_BOOLEAN);
+ __ asm_assert_low(assertMsg, 0x0234);
+ __ z_chi(r_arg_result_type, T_NARROWOOP);
+ __ asm_assert_high(assertMsg, 0x0235);
+#endif
+ __ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset.
+ __ z_larl(Z_R1, firstHandler); // location of first handler
+ __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long.
+ __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1);
+
+ __ align(handlerLen);
+ __ bind(firstHandler);
+ // T_BOOLEAN:
+ guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_CHAR:
+ guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_FLOAT:
+ guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_ste(Z_FRET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_DOUBLE:
+ guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_std(Z_FRET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_BYTE:
+ guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_SHORT:
+ guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_INT:
+ guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_LONG:
+ guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_stg(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_OBJECT:
+ guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_stg(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_ARRAY:
+ guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_stg(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_VOID:
+ guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_stg(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_ADDRESS:
+ guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_stg(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ // T_NARROWOOP:
+ guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp");
+ __ z_st(Z_RET, 0, r_arg_result_addr);
+ __ z_br(Z_R14); // Return to caller.
+ __ align(handlerLen);
+ BLOCK_COMMENT("} process result");
+ }
+ return start;
+ }
+
+ // Return point for a Java call if there's an exception thrown in
+ // Java code. The exception is caught and transformed into a
+ // pending exception stored in JavaThread that can be tested from
+ // within the VM.
+ address generate_catch_exception() {
+ StubCodeMark mark(this, "StubRoutines", "catch_exception");
+
+ address start = __ pc();
+
+ //
+ // Registers alive
+ //
+ // Z_thread
+ // Z_ARG1 - address of pending exception
+ // Z_ARG2 - return address in call stub
+ //
+
+ const Register exception_file = Z_R0;
+ const Register exception_line = Z_R1;
+
+ __ load_const_optimized(exception_file, (void*)__FILE__);
+ __ load_const_optimized(exception_line, (void*)__LINE__);
+
+ __ z_stg(Z_ARG1, thread_(pending_exception));
+ // Store into `char *'.
+ __ z_stg(exception_file, thread_(exception_file));
+ // Store into `int'.
+ __ z_st(exception_line, thread_(exception_line));
+
+ // Complete return to VM.
+ assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
+
+ // Continue in call stub.
+ __ z_br(Z_ARG2);
+
+ return start;
+ }
+
+ // Continuation point for runtime calls returning with a pending
+ // exception. The pending exception check happened in the runtime
+ // or native call stub. The pending exception in Thread is
+ // converted into a Java-level exception.
+ //
+ // Read:
+ // Z_R14: pc the runtime library callee wants to return to.
+ // Since the exception occurred in the callee, the return pc
+ // from the point of view of Java is the exception pc.
+ //
+ // Invalidate:
+ // Volatile registers (except below).
+ //
+ // Update:
+ // Z_ARG1: exception
+ // (Z_R14 is unchanged and is live out).
+ //
+ address generate_forward_exception() {
+ StubCodeMark mark(this, "StubRoutines", "forward_exception");
+ address start = __ pc();
+
+ #define pending_exception_offset in_bytes(Thread::pending_exception_offset())
+#ifdef ASSERT
+ // Get pending exception oop.
+ __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
+
+ // Make sure that this code is only executed if there is a pending exception.
+ {
+ Label L;
+ __ z_ltgr(Z_ARG1, Z_ARG1);
+ __ z_brne(L);
+ __ stop("StubRoutines::forward exception: no pending exception (1)");
+ __ bind(L);
+ }
+
+ __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop");
+#endif
+
+ __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2.
+ __ save_return_pc();
+ __ push_frame_abi160(0);
+ // Find exception handler.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
+ Z_thread,
+ Z_ARG2);
+ // Copy handler's address.
+ __ z_lgr(Z_R1, Z_RET);
+ __ pop_frame();
+ __ restore_return_pc();
+
+ // Set up the arguments for the exception handler:
+ // - Z_ARG1: exception oop
+ // - Z_ARG2: exception pc
+
+ // Load pending exception oop.
+ __ z_lg(Z_ARG1, pending_exception_offset, Z_thread);
+
+ // The exception pc is the return address in the caller,
+ // must load it into Z_ARG2
+ __ z_lgr(Z_ARG2, Z_R14);
+
+#ifdef ASSERT
+ // Make sure exception is set.
+ { Label L;
+ __ z_ltgr(Z_ARG1, Z_ARG1);
+ __ z_brne(L);
+ __ stop("StubRoutines::forward exception: no pending exception (2)");
+ __ bind(L);
+ }
+#endif
+ // Clear the pending exception.
+ __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *));
+ // Jump to exception handler
+ __ z_br(Z_R1 /*handler address*/);
+
+ return start;
+
+ #undef pending_exception_offset
+ }
+
+ // Continuation point for throwing of implicit exceptions that are
+ // not handled in the current activation. Fabricates an exception
+ // oop and initiates normal exception dispatching in this
+ // frame. Only callee-saved registers are preserved (through the
+ // normal RegisterMap handling). If the compiler
+ // needs all registers to be preserved between the fault point and
+ // the exception handler then it must assume responsibility for that
+ // in AbstractCompiler::continuation_for_implicit_null_exception or
+ // continuation_for_implicit_division_by_zero_exception. All other
+ // implicit exceptions (e.g., NullPointerException or
+ // AbstractMethodError on entry) are either at call sites or
+ // otherwise assume that stack unwinding will be initiated, so
+ // caller saved registers were assumed volatile in the compiler.
+
+ // Note that we generate only this stub into a RuntimeStub, because
+ // it needs to be properly traversed and ignored during GC, so we
+ // change the meaning of the "__" macro within this method.
+
+ // Note: the routine set_pc_not_at_call_for_caller in
+ // SharedRuntime.cpp requires that this code be generated into a
+ // RuntimeStub.
+#undef __
+#define __ masm->
+
+ address generate_throw_exception(const char* name, address runtime_entry,
+ bool restore_saved_exception_pc,
+ Register arg1 = noreg, Register arg2 = noreg) {
+ int insts_size = 256;
+ int locs_size = 0;
+ CodeBuffer code(name, insts_size, locs_size);
+ MacroAssembler* masm = new MacroAssembler(&code);
+ int framesize_in_bytes;
+ address start = __ pc();
+
+ __ save_return_pc();
+ framesize_in_bytes = __ push_frame_abi160(0);
+
+ address frame_complete_pc = __ pc();
+ if (restore_saved_exception_pc) {
+ __ unimplemented("StubGenerator::throw_exception", 74);
+ }
+
+ // Note that we always have a runtime stub frame on the top of stack at this point.
+ __ get_PC(Z_R1);
+ __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
+
+ // Do the call.
+ BLOCK_COMMENT("call runtime_entry");
+ __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2);
+
+ __ reset_last_Java_frame();
+
+#ifdef ASSERT
+ // Make sure that this code is only executed if there is a pending exception.
+ { Label L;
+ __ z_lg(Z_R0,
+ in_bytes(Thread::pending_exception_offset()),
+ Z_thread);
+ __ z_ltgr(Z_R0, Z_R0);
+ __ z_brne(L);
+ __ stop("StubRoutines::throw_exception: no pending exception");
+ __ bind(L);
+ }
+#endif
+
+ __ pop_frame();
+ __ restore_return_pc();
+
+ __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
+ __ z_br(Z_R1);
+
+ RuntimeStub* stub =
+ RuntimeStub::new_runtime_stub(name, &code,
+ frame_complete_pc - start,
+ framesize_in_bytes/wordSize,
+ NULL /*oop_maps*/, false);
+
+ return stub->entry_point();
+ }
+
+#undef __
+#ifdef PRODUCT
+#define __ _masm->
+#else
+#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+ //----------------------------------------------------------------------
+ // The following routine generates a subroutine to throw an asynchronous
+ // UnknownError when an unsafe access gets a fault that could not be
+ // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
+ //
+ // Arguments:
+ // trapping PC: ??
+ //
+ // Results:
+ // Posts an asynchronous exception, skips the trapping instruction.
+ //
+ address generate_handler_for_unsafe_access() {
+ StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
+ {
+ address start = __ pc();
+ __ unimplemented("StubRoutines::handler_for_unsafe_access", 86);
+ return start;
+ }
+ }
+
+ // Support for uint StubRoutine::zarch::partial_subtype_check(Klass
+ // sub, Klass super);
+ //
+ // Arguments:
+ // ret : Z_RET, returned
+ // sub : Z_ARG2, argument, not changed
+ // super: Z_ARG3, argument, not changed
+ //
+ // raddr: Z_R14, blown by call
+ //
+ address generate_partial_subtype_check() {
+ StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
+ Label miss;
+
+ address start = __ pc();
+
+ const Register Rsubklass = Z_ARG2; // subklass
+ const Register Rsuperklass = Z_ARG3; // superklass
+
+ // No args, but tmp registers that are killed.
+ const Register Rlength = Z_ARG4; // cache array length
+ const Register Rarray_ptr = Z_ARG5; // Current value from cache array.
+
+ if (UseCompressedOops) {
+ assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
+ }
+
+ // Always take the slow path (see SPARC).
+ __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass,
+ Rarray_ptr, Rlength, NULL, &miss);
+
+ // Match falls through here.
+ __ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC.
+ __ z_br(Z_R14);
+
+ __ BIND(miss);
+ __ load_const_optimized(Z_RET, 1); // One indicates a miss.
+ __ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR.
+ __ z_br(Z_R14);
+
+ return start;
+ }
+
+ // Return address of code to be called from code generated by
+ // MacroAssembler::verify_oop.
+ //
+ // Don't generate, rather use C++ code.
+ address generate_verify_oop_subroutine() {
+ // Don't generate a StubCodeMark, because no code is generated!
+ // Generating the mark triggers notifying the oprofile jvmti agent
+ // about the dynamic code generation, but the stub without
+ // code (code_size == 0) confuses opjitconv
+ // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
+
+ address start = 0;
+ return start;
+ }
+
+ // Generate pre-write barrier for array.
+ //
+ // Input:
+ // addr - register containing starting address
+ // count - register containing element count
+ //
+ // The input registers are overwritten.
+ void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+
+ BarrierSet* const bs = Universe::heap()->barrier_set();
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCTLogging:
+ // With G1, don't generate the call if we statically know that the target in uninitialized.
+ if (!dest_uninitialized) {
+ // Is marking active?
+ Label filtered;
+ Register Rtmp1 = Z_R0;
+ const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_active());
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
+ } else {
+ guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
+ }
+ __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
+
+ // __ push_frame_abi160(0);
+ (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count);
+ (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
+ // __ pop_frame();
+
+ __ bind(filtered);
+ }
+ break;
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ case BarrierSet::ModRef:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ // Generate post-write barrier for array.
+ //
+ // Input:
+ // addr - register containing starting address
+ // count - register containing element count
+ //
+ // The input registers are overwritten.
+ void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) {
+ BarrierSet* const bs = Universe::heap()->barrier_set();
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCTLogging:
+ {
+ if (branchToEnd) {
+ // __ push_frame_abi160(0);
+ (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
+ (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers);
+ // __ pop_frame();
+ } else {
+ // Tail call: call c and return to stub caller.
+ address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
+ if (Z_ARG1 != addr) __ z_lgr(Z_ARG1, addr);
+ if (Z_ARG2 != count) __ z_lgr(Z_ARG2, count);
+ __ load_const(Z_R1, entry_point);
+ __ z_br(Z_R1); // Branch without linking, callee will return to stub caller.
+ }
+ }
+ break;
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ // These cases formerly known as
+ // void array_store_check(Register addr, Register count, bool branchToEnd).
+ {
+ NearLabel doXC, done;
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+ assert_different_registers(Z_R0, Z_R1, addr, count);
+
+ // Nothing to do if count <= 0.
+ if (branchToEnd) {
+ __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done);
+ } else {
+ __ z_ltgr(count, count);
+ __ z_bcr(Assembler::bcondNotPositive, Z_R14);
+ }
+
+ // Note: We can't combine the shifts. We could lose a carry
+ // from calculating the array end address.
+ // count = (count-1)*BytesPerHeapOop + addr
+ // Count holds addr of last oop in array then.
+ __ z_sllg(count, count, LogBytesPerHeapOop);
+ __ add2reg_with_index(count, -BytesPerHeapOop, count, addr);
+
+ // Get base address of card table.
+ __ load_const_optimized(Z_R1, (address)ct->byte_map_base);
+
+ // count = (count>>shift) - (addr>>shift)
+ __ z_srlg(addr, addr, CardTableModRefBS::card_shift);
+ __ z_srlg(count, count, CardTableModRefBS::card_shift);
+
+ // Prefetch first elements of card table for update.
+ if (VM_Version::has_Prefetch()) {
+ __ z_pfd(0x02, 0, addr, Z_R1);
+ }
+
+ // Special case: clear just one byte.
+ __ clear_reg(Z_R0, true, false); // Used for doOneByte.
+ __ z_sgr(count, addr); // Count = n-1 now, CC used for brc below.
+ __ z_stc(Z_R0, 0, addr, Z_R1); // Must preserve CC from z_sgr.
+ if (branchToEnd) {
+ __ z_brz(done);
+ } else {
+ __ z_bcr(Assembler::bcondZero, Z_R14);
+ }
+
+ __ z_cghi(count, 255);
+ __ z_brnh(doXC);
+
+ // MVCLE: clear a long area.
+ // Start addr of card table range = base + addr.
+ // # bytes in card table range = (count + 1)
+ __ add2reg_with_index(Z_R0, 0, Z_R1, addr);
+ __ add2reg(Z_R1, 1, count);
+
+ // dirty hack:
+ // There are just two callers. Both pass
+ // count in Z_ARG3 = Z_R4
+ // addr in Z_ARG2 = Z_R3
+ // ==> use Z_ARG2 as src len reg = 0
+ // Z_ARG1 as src addr (ignored)
+ assert(count == Z_ARG3, "count: unexpected register number");
+ assert(addr == Z_ARG2, "addr: unexpected register number");
+ __ clear_reg(Z_ARG2, true, false);
+
+ __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0);
+
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_bcr(Assembler::bcondAlways, Z_R14);
+ }
+
+ // XC: clear a short area.
+ Label XC_template; // Instr template, never exec directly!
+ __ bind(XC_template);
+ __ z_xc(0, 0, addr, 0, addr);
+
+ __ bind(doXC);
+ // start addr of card table range = base + addr
+ // end addr of card table range = base + addr + count
+ __ add2reg_with_index(addr, 0, Z_R1, addr);
+
+ if (VM_Version::has_ExecuteExtensions()) {
+ __ z_exrl(count, XC_template); // Execute XC with var. len.
+ } else {
+ __ z_larl(Z_R1, XC_template);
+ __ z_ex(count, 0, Z_R0, Z_R1); // Execute XC with var. len.
+ }
+ if (!branchToEnd) {
+ __ z_br(Z_R14);
+ }
+
+ __ bind(done);
+ }
+ break;
+ case BarrierSet::ModRef:
+ if (!branchToEnd) { __ z_br(Z_R14); }
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+
+ // This is to test that the count register contains a positive int value.
+ // Required because C2 does not respect int to long conversion for stub calls.
+ void assert_positive_int(Register count) {
+#ifdef ASSERT
+ __ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0.
+ __ asm_assert_eq("missing zero extend", 0xAFFE);
+#endif
+ }
+
+ // Generate overlap test for array copy stubs.
+ // If no actual overlap is detected, control is transferred to the
+ // "normal" copy stub (entry address passed in disjoint_copy_target).
+ // Otherwise, execution continues with the code generated by the
+ // caller of array_overlap_test.
+ //
+ // Input:
+ // Z_ARG1 - from
+ // Z_ARG2 - to
+ // Z_ARG3 - element count
+ void array_overlap_test(address disjoint_copy_target, int log2_elem_size) {
+ __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh,
+ disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
+
+ Register index = Z_ARG3;
+ if (log2_elem_size > 0) {
+ __ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count
+ index = Z_R1;
+ }
+ __ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range.
+
+ __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh,
+ disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
+
+ // Destructive overlap: let caller generate code for that.
+ }
+
+ // Generate stub for disjoint array copy. If "aligned" is true, the
+ // "from" and "to" addresses are assumed to be heapword aligned.
+ //
+ // Arguments for generated stub:
+ // from: Z_ARG1
+ // to: Z_ARG2
+ // count: Z_ARG3 treated as signed
+ void generate_disjoint_copy(bool aligned, int element_size,
+ bool branchToEnd,
+ bool restoreArgs) {
+ // This is the zarch specific stub generator for general array copy tasks.
+ // It has the following prereqs and features:
+ //
+ // - No destructive overlap allowed (else unpredictable results).
+ // - Destructive overlap does not exist if the leftmost byte of the target
+ // does not coincide with any of the source bytes (except the leftmost).
+ //
+ // Register usage upon entry:
+ // Z_ARG1 == Z_R2 : address of source array
+ // Z_ARG2 == Z_R3 : address of target array
+ // Z_ARG3 == Z_R4 : length of operands (# of elements on entry)
+ //
+ // Register usage within the generator:
+ // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len).
+ // Used as pair register operand in complex moves, scratch registers anyway.
+ // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg).
+ // Same as R0/R1, but no scratch register.
+ // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine,
+ // but they might get temporarily overwritten.
+
+ Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore.
+
+ {
+ Register llen_reg = Z_R1; // Holds left operand len (odd reg).
+ Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg.
+ Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg.
+ Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg.
+
+ Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop.
+ Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after).
+ Register dst_reg = Z_ARG2; // Holds left (target) operand addr.
+ Register src_reg = Z_ARG1; // Holds right (source) operand addr.
+
+ Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate;
+ Label doMVCUnrolled;
+ NearLabel doMVC, doMVCgeneral, done;
+ Label MVC_template;
+ address pcMVCblock_b, pcMVCblock_e;
+
+ bool usedMVCLE = true;
+ bool usedMVCLOOP = true;
+ bool usedMVCUnrolled = false;
+ bool usedMVC = false;
+ bool usedMVCgeneral = false;
+
+ int stride;
+ Register stride_reg;
+ Register ix_reg;
+
+ assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2");
+ unsigned int log2_size = exact_log2(element_size);
+
+ switch (element_size) {
+ case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break;
+ case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break;
+ case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break;
+ case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break;
+ default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break;
+ }
+
+ assert_positive_int(len_reg);
+
+ BLOCK_COMMENT("preparation {");
+
+ // No copying if len <= 0.
+ if (branchToEnd) {
+ __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done);
+ } else {
+ if (VM_Version::has_CompareBranch()) {
+ __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14);
+ } else {
+ __ z_ltgr(len_reg, len_reg);
+ __ z_bcr(Assembler::bcondNotPositive, Z_R14);
+ }
+ }
+
+ // Prefetch just one cache line. Speculative opt for short arrays.
+ // Do not use Z_R1 in prefetch. Is undefined here.
+ if (VM_Version::has_Prefetch()) {
+ __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
+ __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
+ }
+
+ BLOCK_COMMENT("} preparation");
+
+ // Save args only if really needed.
+ // Keep len test local to branch. Is generated only once.
+
+ BLOCK_COMMENT("mode selection {");
+
+ // Special handling for arrays with only a few elements.
+ // Nothing fancy: just an executed MVC.
+ if (log2_size > 0) {
+ __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1.
+ }
+ if (element_size != 8) {
+ __ z_cghi(len_reg, 256/element_size);
+ __ z_brnh(doMVC);
+ usedMVC = true;
+ }
+ if (element_size == 8) { // Long and oop arrays are always aligned.
+ __ z_cghi(len_reg, 256/element_size);
+ __ z_brnh(doMVCUnrolled);
+ usedMVCUnrolled = true;
+ }
+
+ // Prefetch another cache line. We, for sure, have more than one line to copy.
+ if (VM_Version::has_Prefetch()) {
+ __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access.
+ __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access.
+ }
+
+ if (restoreArgs) {
+ // Remember entry value of ARG2 to restore all arguments later from that knowledge.
+ __ z_lgr(save_reg, dst_reg);
+ }
+
+ __ z_cghi(len_reg, 4096/element_size);
+ if (log2_size == 0) {
+ __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes
+ }
+ __ z_brnh(doMVCLOOP);
+
+ // Fall through to MVCLE case.
+
+ BLOCK_COMMENT("} mode selection");
+
+ // MVCLE: for long arrays
+ // DW aligned: Best performance for sizes > 4kBytes.
+ // unaligned: Least complex for sizes > 256 bytes.
+ if (usedMVCLE) {
+ BLOCK_COMMENT("mode MVCLE {");
+
+ // Setup registers for mvcle.
+ //__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg.
+ __ z_lgr(laddr_reg, dst_reg); // r0 <- r3
+ __ z_lgr(raddr_reg, src_reg); // r4 <- r2
+ __ z_lgr(rlen_reg, llen_reg); // r5 <- r1
+
+ __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache
+ // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache.
+ // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0);
+
+ if (restoreArgs) {
+ // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs.
+ // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required.
+ // Len_reg (Z_ARG3) is destroyed and must be restored.
+ __ z_slgr(laddr_reg, dst_reg); // copied #bytes
+ if (log2_size > 0) {
+ __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements.
+ } else {
+ __ z_lgr(Z_ARG3, laddr_reg);
+ }
+ }
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_br(Z_R14);
+ }
+ BLOCK_COMMENT("} mode MVCLE");
+ }
+ // No fallthru possible here.
+
+ // MVCUnrolled: for short, aligned arrays.
+
+ if (usedMVCUnrolled) {
+ BLOCK_COMMENT("mode MVC unrolled {");
+ stride = 8;
+
+ // Generate unrolled MVC instructions.
+ for (int ii = 32; ii > 1; ii--) {
+ __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_br(Z_R14);
+ }
+ }
+
+ pcMVCblock_b = __ pc();
+ __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_br(Z_R14);
+ }
+
+ pcMVCblock_e = __ pc();
+ Label MVC_ListEnd;
+ __ bind(MVC_ListEnd);
+
+ // This is an absolute fast path:
+ // - Array len in bytes must be not greater than 256.
+ // - Array len in bytes must be an integer mult of DW
+ // to save expensive handling of trailing bytes.
+ // - Argument restore is not done,
+ // i.e. previous code must not alter arguments (this code doesn't either).
+
+ __ bind(doMVCUnrolled);
+
+ // Avoid mul, prefer shift where possible.
+ // Combine shift right (for #DW) with shift left (for block size).
+ // Set CC for zero test below (asm_assert).
+ // Note: #bytes comes in Z_R1, #DW in len_reg.
+ unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b;
+ unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning).
+
+ if (log2_size > 0) { // Len was scaled into Z_R1.
+ switch (MVCblocksize) {
+
+ case 8: logMVCblocksize = 3;
+ __ z_ltgr(Z_R0, Z_R1); // #bytes is index
+ break; // reasonable size, use shift
+
+ case 16: logMVCblocksize = 4;
+ __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size);
+ break; // reasonable size, use shift
+
+ default: logMVCblocksize = 0;
+ __ z_ltgr(Z_R0, len_reg); // #DW for mul
+ break; // all other sizes: use mul
+ }
+ } else {
+ guarantee(log2_size, "doMVCUnrolled: only for DW entities");
+ }
+
+ // This test (and branch) is redundant. Previous code makes sure that
+ // - element count > 0
+ // - element size == 8.
+ // Thus, len reg should never be zero here. We insert an asm_assert() here,
+ // just to double-check and to be on the safe side.
+ __ asm_assert(false, "zero len cannot occur", 99);
+
+ __ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block.
+ // Avoid mul, prefer shift where possible.
+ if (logMVCblocksize == 0) {
+ __ z_mghi(Z_R0, MVCblocksize);
+ }
+ __ z_slgr(Z_R1, Z_R0);
+ __ z_br(Z_R1);
+ BLOCK_COMMENT("} mode MVC unrolled");
+ }
+ // No fallthru possible here.
+
+ // MVC execute template
+ // Must always generate. Usage may be switched on below.
+ // There is no suitable place after here to put the template.
+ __ bind(MVC_template);
+ __ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly!
+
+
+ // MVC Loop: for medium-sized arrays
+
+ // Only for DW aligned arrays (src and dst).
+ // #bytes to copy must be at least 256!!!
+ // Non-aligned cases handled separately.
+ stride = 256;
+ stride_reg = Z_R1; // Holds #bytes when control arrives here.
+ ix_reg = Z_ARG3; // Alias for len_reg.
+
+
+ if (usedMVCLOOP) {
+ BLOCK_COMMENT("mode MVC loop {");
+ __ bind(doMVCLOOP);
+
+ __ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive).
+ __ z_llill(stride_reg, stride);
+ __ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride.
+
+ __ bind(doMVCLOOPiterate);
+ __ z_mvc(0, stride-1, dst_reg, 0, src_reg);
+ __ add2reg(dst_reg, stride);
+ __ add2reg(src_reg, stride);
+ __ bind(doMVCLOOPcount);
+ __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate);
+
+ // Don 't use add2reg() here, since we must set the condition code!
+ __ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied".
+
+ if (restoreArgs) {
+ __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
+ __ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero.
+
+ // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg.
+ __ z_slgr(dst_reg, save_reg); // copied #bytes
+ __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored)
+ if (log2_size) {
+ __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3.
+ } else {
+ __ z_lgr(Z_ARG3, dst_reg);
+ }
+ __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored.
+
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_br(Z_R14);
+ }
+
+ } else {
+ if (branchToEnd) {
+ __ z_brz(done); // CC set by aghi instr.
+ } else {
+ __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero.
+ }
+
+ __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
+ // __ z_bru(doMVCgeneral); // fallthru
+ }
+ usedMVCgeneral = true;
+ BLOCK_COMMENT("} mode MVC loop");
+ }
+ // Fallthru to doMVCgeneral
+
+ // MVCgeneral: for short, unaligned arrays, after other copy operations
+
+ // Somewhat expensive due to use of EX instruction, but simple.
+ if (usedMVCgeneral) {
+ BLOCK_COMMENT("mode MVC general {");
+ __ bind(doMVCgeneral);
+
+ __ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE.
+ if (VM_Version::has_ExecuteExtensions()) {
+ __ z_exrl(len_reg, MVC_template); // Execute MVC with variable length.
+ } else {
+ __ z_larl(Z_R1, MVC_template); // Get addr of instr template.
+ __ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length.
+ } // penalty: 9 ticks
+
+ if (restoreArgs) {
+ // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg
+ __ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk
+ __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk
+ __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet.
+ if (log2_size) {
+ __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3
+ } else {
+ __ z_lgr(Z_ARG3, dst_reg);
+ }
+ __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored.
+ }
+
+ if (usedMVC) {
+ if (branchToEnd) {
+ __ z_bru(done);
+ } else {
+ __ z_br(Z_R14);
+ }
+ } else {
+ if (!branchToEnd) __ z_br(Z_R14);
+ }
+ BLOCK_COMMENT("} mode MVC general");
+ }
+ // Fallthru possible if following block not generated.
+
+ // MVC: for short, unaligned arrays
+
+ // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks.
+ // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4.
+ if (usedMVC) {
+ BLOCK_COMMENT("mode MVC {");
+ __ bind(doMVC);
+
+ // get #bytes-1 for EXECUTE
+ if (log2_size) {
+ __ add2reg(Z_R1, -1); // Length was scaled into Z_R1.
+ } else {
+ __ add2reg(Z_R1, -1, len_reg); // Length was not scaled.
+ }
+
+ if (VM_Version::has_ExecuteExtensions()) {
+ __ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length.
+ } else {
+ __ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary.
+ __ z_larl(Z_R5, MVC_template); // Get addr of instr template.
+ __ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length.
+ __ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary.
+ }
+
+ if (!branchToEnd) {
+ __ z_br(Z_R14);
+ }
+ BLOCK_COMMENT("} mode MVC");
+ }
+
+ __ bind(done);
+
+ switch (element_size) {
+ case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break;
+ case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break;
+ case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break;
+ case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break;
+ default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break;
+ }
+ }
+ }
+
+ // Generate stub for conjoint array copy. If "aligned" is true, the
+ // "from" and "to" addresses are assumed to be heapword aligned.
+ //
+ // Arguments for generated stub:
+ // from: Z_ARG1
+ // to: Z_ARG2
+ // count: Z_ARG3 treated as signed
+ void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) {
+
+ // This is the zarch specific stub generator for general array copy tasks.
+ // It has the following prereqs and features:
+ //
+ // - Destructive overlap exists and is handled by reverse copy.
+ // - Destructive overlap exists if the leftmost byte of the target
+ // does coincide with any of the source bytes (except the leftmost).
+ // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride)
+ // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine.
+ // - Z_ARG3 is USED but preserved by the stub routine.
+ // - Z_ARG4 is used as index register and is thus KILLed.
+ //
+ {
+ Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size).
+ Register data_reg = Z_R0; // Holds value of currently processed element.
+ Register ix_reg = Z_ARG4; // Holds byte index of currently processed element.
+ Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays.
+ Register dst_reg = Z_ARG2; // Holds left operand addr.
+ Register src_reg = Z_ARG1; // Holds right operand addr.
+
+ assert(256%element_size == 0, "Element size must be power of 2.");
+ assert(element_size <= 8, "Can't handle more than DW units.");
+
+ switch (element_size) {
+ case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break;
+ case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break;
+ case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break;
+ case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break;
+ default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break;
+ }
+
+ assert_positive_int(len_reg);
+
+ if (VM_Version::has_Prefetch()) {
+ __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
+ __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
+ }
+
+ unsigned int log2_size = exact_log2(element_size);
+ if (log2_size) {
+ __ z_sllg(ix_reg, len_reg, log2_size);
+ } else {
+ __ z_lgr(ix_reg, len_reg);
+ }
+
+ // Optimize reverse copy loop.
+ // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks.
+ // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic.
+ // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length.
+
+ Label countLoop1;
+ Label copyLoop1;
+ Label skipBY;
+ Label skipHW;
+ int stride = -8;
+
+ __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop.
+
+ if (element_size == 8) // Nothing to do here.
+ __ z_bru(countLoop1);
+ else { // Do not generate dead code.
+ __ z_tmll(ix_reg, 7); // Check the "odd" bits.
+ __ z_bre(countLoop1); // There are none, very good!
+ }
+
+ if (log2_size == 0) { // Handle leftover Byte.
+ __ z_tmll(ix_reg, 1);
+ __ z_bre(skipBY);
+ __ z_lb(data_reg, -1, ix_reg, src_reg);
+ __ z_stcy(data_reg, -1, ix_reg, dst_reg);
+ __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI.
+ __ bind(skipBY);
+ // fallthru
+ }
+ if (log2_size <= 1) { // Handle leftover HW.
+ __ z_tmll(ix_reg, 2);
+ __ z_bre(skipHW);
+ __ z_lhy(data_reg, -2, ix_reg, src_reg);
+ __ z_sthy(data_reg, -2, ix_reg, dst_reg);
+ __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI.
+ __ bind(skipHW);
+ __ z_tmll(ix_reg, 4);
+ __ z_bre(countLoop1);
+ // fallthru
+ }
+ if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied.
+ __ z_ly(data_reg, -4, ix_reg, src_reg);
+ __ z_sty(data_reg, -4, ix_reg, dst_reg);
+ __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI.
+ __ z_bru(countLoop1);
+ }
+
+ // Control can never get to here. Never! Never ever!
+ __ z_illtrap(0x99);
+ __ bind(copyLoop1);
+ __ z_lg(data_reg, 0, ix_reg, src_reg);
+ __ z_stg(data_reg, 0, ix_reg, dst_reg);
+ __ bind(countLoop1);
+ __ z_brxhg(ix_reg, stride_reg, copyLoop1);
+
+ if (!branchToEnd)
+ __ z_br(Z_R14);
+
+ switch (element_size) {
+ case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break;
+ case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break;
+ case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break;
+ case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break;
+ default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break;
+ }
+ }
+ }
+
+ // Generate stub for disjoint byte copy. If "aligned" is true, the
+ // "from" and "to" addresses are assumed to be heapword aligned.
+ address generate_disjoint_byte_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+
+ // This is the zarch specific stub generator for byte array copy.
+ // Refer to generate_disjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ generate_disjoint_copy(aligned, 1, false, false);
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_disjoint_short_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for short array copy.
+ // Refer to generate_disjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ generate_disjoint_copy(aligned, 2, false, false);
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_disjoint_int_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for int array copy.
+ // Refer to generate_disjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ generate_disjoint_copy(aligned, 4, false, false);
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_disjoint_long_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for long array copy.
+ // Refer to generate_disjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ generate_disjoint_copy(aligned, 8, false, false);
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for oop array copy.
+ // Refer to generate_disjoint_copy for a list of prereqs and features.
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ unsigned int size = UseCompressedOops ? 4 : 8;
+
+ gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
+
+ generate_disjoint_copy(aligned, size, true, true);
+
+ gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
+
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_conjoint_byte_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for overlapping byte array copy.
+ // Refer to generate_conjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy()
+ : StubRoutines::jbyte_disjoint_arraycopy();
+
+ array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint.
+ generate_conjoint_copy(aligned, 1, false);
+
+ return __ addr_at(start_off);
+ }
+
+
+ address generate_conjoint_short_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for overlapping short array copy.
+ // Refer to generate_conjoint_copy for a list of prereqs and features:
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy()
+ : StubRoutines::jshort_disjoint_arraycopy();
+
+ array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint.
+ generate_conjoint_copy(aligned, 2, false);
+
+ return __ addr_at(start_off);
+ }
+
+ address generate_conjoint_int_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for overlapping int array copy.
+ // Refer to generate_conjoint_copy for a list of prereqs and features:
+
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy()
+ : StubRoutines::jint_disjoint_arraycopy();
+
+ array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint.
+ generate_conjoint_copy(aligned, 4, false);
+
+ return __ addr_at(start_off);
+ }
+
+ address generate_conjoint_long_copy(bool aligned, const char * name) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for overlapping long array copy.
+ // Refer to generate_conjoint_copy for a list of prereqs and features:
+
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy()
+ : StubRoutines::jlong_disjoint_arraycopy();
+
+ array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint.
+ generate_conjoint_copy(aligned, 8, false);
+
+ return __ addr_at(start_off);
+ }
+
+ address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
+ StubCodeMark mark(this, "StubRoutines", name);
+ // This is the zarch specific stub generator for overlapping oop array copy.
+ // Refer to generate_conjoint_copy for a list of prereqs and features.
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+ unsigned int size = UseCompressedOops ? 4 : 8;
+ unsigned int shift = UseCompressedOops ? 2 : 3;
+
+ address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized)
+ : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized);
+
+ // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier.
+ array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint.
+
+ gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized);
+
+ generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3.
+
+ gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false);
+
+ return __ addr_at(start_off);
+ }
+
+
+ void generate_arraycopy_stubs() {
+
+ // Note: the disjoint stubs must be generated first, some of
+ // the conjoint stubs use them.
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy");
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy (false, "jint_disjoint_arraycopy");
+ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy");
+ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy", false);
+ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy_uninit", true);
+
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy");
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy (true, "arrayof_jint_disjoint_arraycopy");
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy");
+ StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy", false);
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy_uninit", true);
+
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy (false, "jbyte_arraycopy");
+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy (false, "jint_arraycopy");
+ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy (false, "jlong_arraycopy");
+ StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy (false, "oop_arraycopy", false);
+ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy (false, "oop_arraycopy_uninit", true);
+
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy");
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy (true, "arrayof_jint_arraycopy");
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy");
+ StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy", false);
+ StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy_uninit", true);
+ }
+
+ void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
+
+ // safefetch signatures:
+ // int SafeFetch32(int* adr, int errValue);
+ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+ //
+ // arguments:
+ // Z_ARG1 = adr
+ // Z_ARG2 = errValue
+ //
+ // result:
+ // Z_RET = *adr or errValue
+
+ StubCodeMark mark(this, "StubRoutines", name);
+
+ // entry point
+ // Load *adr into Z_ARG2, may fault.
+ *entry = *fault_pc = __ pc();
+ switch (size) {
+ case 4:
+ // Sign extended int32_t.
+ __ z_lgf(Z_ARG2, 0, Z_ARG1);
+ break;
+ case 8:
+ // int64_t
+ __ z_lg(Z_ARG2, 0, Z_ARG1);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ // Return errValue or *adr.
+ *continuation_pc = __ pc();
+ __ z_lgr(Z_RET, Z_ARG2);
+ __ z_br(Z_R14);
+
+ }
+
+ // Call interface for AES_encryptBlock, AES_decryptBlock stubs.
+ //
+ // Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
+ // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored.
+ // For in-place encryption/decryption, ARG1 and ARG2 can point
+ // to the same piece of storage.
+ // Z_ARG3 - Crypto key address (expanded key). The first n bits of
+ // the expanded key constitute the original AES-<n> key (see below).
+ //
+ // Z_RET - return value. First unprocessed byte offset in src buffer.
+ //
+ // Some remarks:
+ // The crypto key, as passed from the caller to these encryption stubs,
+ // is a so-called expanded key. It is derived from the original key
+ // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule
+ // With the expanded key, the cipher/decipher task is decomposed in
+ // multiple, less complex steps, called rounds. Sun SPARC and Intel
+ // processors obviously implement support for those less complex steps.
+ // z/Architecture provides instructions for full cipher/decipher complexity.
+ // Therefore, we need the original, not the expanded key here.
+ // Luckily, the first n bits of an AES-<n> expanded key are formed
+ // by the original key itself. That takes us out of trouble. :-)
+ // The key length (in bytes) relation is as follows:
+ // original expanded rounds key bit keylen
+ // key bytes key bytes length in words
+ // 16 176 11 128 44
+ // 24 208 13 192 52
+ // 32 240 15 256 60
+ //
+ // The crypto instructions used in the AES* stubs have some specific register requirements.
+ // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction
+ // description in the "z/Architecture Principles of Operation" manual for details.
+ // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key
+ // (KM instruction) and the chaining value (KMC instruction).
+ // dst must designate an even-numbered register, holding the address of the output message.
+ // src must designate an even/odd register pair, holding the address/length of the original message
+
+ // Helper function which generates code to
+ // - load the function code in register fCode (== Z_R0)
+ // - load the data block length (depends on cipher function) in register srclen if requested.
+ // - is_decipher switches between cipher/decipher function codes
+ // - set_len requests (if true) loading the data block length in register srclen
+ void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) {
+
+ BLOCK_COMMENT("Set fCode {"); {
+ Label fCode_set;
+ int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
+ bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk)
+ && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
+ // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256.
+ __ z_cghi(keylen, 52);
+ __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
+ if (!identical_dataBlk_len) {
+ __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk);
+ }
+ __ z_brh(fCode_set); // keyLen > 52: AES256
+
+ __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode);
+ if (!identical_dataBlk_len) {
+ __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk);
+ }
+ __ z_bre(fCode_set); // keyLen == 52: AES192
+
+ __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
+ if (!identical_dataBlk_len) {
+ __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
+ }
+ // __ z_brl(fCode_set); // keyLen < 52: AES128 // fallthru
+ __ bind(fCode_set);
+ if (identical_dataBlk_len) {
+ __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);
+ }
+ }
+ BLOCK_COMMENT("} Set fCode");
+ }
+
+ // Push a parameter block for the cipher/decipher instruction on the stack.
+ // NOTE:
+ // Before returning, the stub has to copy the chaining value from
+ // the parmBlk, where it was updated by the crypto instruction, back
+ // to the chaining value array the address of which was passed in the cv argument.
+ // As all the available registers are used and modified by KMC, we need to save
+ // the key length across the KMC instruction. We do so by spilling it to the stack,
+ // just preceding the parmBlk (at (parmBlk - 8)).
+ void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) {
+ const int AES_parmBlk_align = 32;
+ const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align.
+ int cv_len, key_len;
+ int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
+ Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
+
+ BLOCK_COMMENT("push parmBlk {");
+ if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); }
+ if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256
+ if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192
+ if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128
+
+ // Security net: requested AES function not available on this CPU.
+ // NOTE:
+ // As of now (March 2015), this safety net is not required. JCE policy files limit the
+ // cryptographic strength of the keys used to 128 bit. If we have AES hardware support
+ // at all, we have at least AES-128.
+ __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0);
+
+ if (VM_Version::has_Crypto_AES128()) {
+ __ bind(parmBlk_128);
+ cv_len = VM_Version::Cipher::_AES128_dataBlk;
+ key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len;
+ __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+ __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // align parameter block
+
+ // Resize the frame to accommodate for the aligned parameter block and other stuff.
+ // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+ __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use.
+ __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert.
+ __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc..
+ __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+ __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address.
+
+ __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv.
+ __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key.
+ __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode);
+ if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
+ __ z_bru(parmBlk_set); // Fallthru otherwise.
+ }
+ }
+
+ if (VM_Version::has_Crypto_AES192()) {
+ __ bind(parmBlk_192);
+ cv_len = VM_Version::Cipher::_AES192_dataBlk;
+ key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len;
+ __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+ __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block.
+
+ // Resize the frame to accommodate for the aligned parameter block and other stuff.
+ // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+ __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use.
+ __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert.
+ __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc..
+ __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+ __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address.
+
+ __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv.
+ __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key.
+ __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode);
+ if (VM_Version::has_Crypto_AES256()) {
+ __ z_bru(parmBlk_set); // Fallthru otherwise.
+ }
+ }
+
+ if (VM_Version::has_Crypto_AES256()) {
+ __ bind(parmBlk_256);
+ cv_len = VM_Version::Cipher::_AES256_dataBlk;
+ key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len;
+ __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP);
+ __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block.
+
+ // Resize the frame to accommodate for the aligned parameter block and other stuff.
+ // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk).
+ __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use.
+ __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert.
+ __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc..
+ __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP.
+ __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address.
+
+ __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv.
+ __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key.
+ __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode);
+ // __ z_bru(parmBlk_set); // fallthru
+ }
+
+ __ bind(parmBlk_set);
+ BLOCK_COMMENT("} push parmBlk");
+ }
+
+ // Pop a parameter block from the stack. The chaining value portion of the parameter block
+ // is copied back to the cv array as it is needed for subsequent cipher steps.
+ // The keylen value as well as the original SP (before resizing) was pushed to the stack
+ // when pushing the parameter block.
+ void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) {
+
+ BLOCK_COMMENT("pop parmBlk {");
+ bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) &&
+ (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk);
+ if (identical_dataBlk_len) {
+ int cv_len = VM_Version::Cipher::_AES128_dataBlk;
+ __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv.
+ } else {
+ int cv_len;
+ Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
+ __ z_lg(keylen, -8, parmBlk); // restore keylen
+ __ z_cghi(keylen, 52);
+ if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256
+ if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192
+ // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru
+
+ // Security net: there is no one here. If we would need it, we should have
+ // fallen into it already when pushing the parameter block.
+ if (VM_Version::has_Crypto_AES128()) {
+ __ bind(parmBlk_128);
+ cv_len = VM_Version::Cipher::_AES128_dataBlk;
+ __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv.
+ if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) {
+ __ z_bru(parmBlk_set);
+ }
+ }
+
+ if (VM_Version::has_Crypto_AES192()) {
+ __ bind(parmBlk_192);
+ cv_len = VM_Version::Cipher::_AES192_dataBlk;
+ __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv.
+ if (VM_Version::has_Crypto_AES256()) {
+ __ z_bru(parmBlk_set);
+ }
+ }
+
+ if (VM_Version::has_Crypto_AES256()) {
+ __ bind(parmBlk_256);
+ cv_len = VM_Version::Cipher::_AES256_dataBlk;
+ __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv.
+ // __ z_bru(parmBlk_set); // fallthru
+ }
+ __ bind(parmBlk_set);
+ }
+ __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute.
+ BLOCK_COMMENT("} pop parmBlk");
+ }
+
+ // Compute AES encrypt function.
+ address generate_AES_encryptBlock(const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ Register from = Z_ARG1; // source byte array
+ Register to = Z_ARG2; // destination byte array
+ Register key = Z_ARG3; // expanded key array
+
+ const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length.
+ const Register fCode = Z_R0; // crypto function code
+ const Register parmBlk = Z_R1; // parameter block address (points to crypto key)
+ const Register src = Z_ARG1; // is Z_R2
+ const Register srclen = Z_ARG2; // Overwrites destination address.
+ const Register dst = Z_ARG3; // Overwrites expanded key address.
+
+ // Read key len of expanded key (in 4-byte words).
+ __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ // Copy arguments to registers as required by crypto instruction.
+ __ z_lgr(parmBlk, key); // crypto key (in T_INT array).
+ // __ z_lgr(src, from); // Copy not needed, src/from are identical.
+ __ z_lgr(dst, to); // Copy destination address to even register.
+
+ // Construct function code in Z_R0, data block length in Z_ARG2.
+ generate_load_AES_fCode(keylen, fCode, srclen, false);
+
+ __ km(dst, src); // Cipher the message.
+
+ __ z_br(Z_R14);
+
+ return __ addr_at(start_off);
+ }
+
+ // Compute AES decrypt function.
+ address generate_AES_decryptBlock(const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ Register from = Z_ARG1; // source byte array
+ Register to = Z_ARG2; // destination byte array
+ Register key = Z_ARG3; // expanded key array, not preset at entry!!!
+
+ const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length.
+ const Register fCode = Z_R0; // crypto function code
+ const Register parmBlk = Z_R1; // parameter block address (points to crypto key)
+ const Register src = Z_ARG1; // is Z_R2
+ const Register srclen = Z_ARG2; // Overwrites destination address.
+ const Register dst = Z_ARG3; // Overwrites key address.
+
+ // Read key len of expanded key (in 4-byte words).
+ __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ // Copy arguments to registers as required by crypto instruction.
+ __ z_lgr(parmBlk, key); // Copy crypto key address.
+ // __ z_lgr(src, from); // Copy not needed, src/from are identical.
+ __ z_lgr(dst, to); // Copy destination address to even register.
+
+ // Construct function code in Z_R0, data block length in Z_ARG2.
+ generate_load_AES_fCode(keylen, fCode, srclen, true);
+
+ __ km(dst, src); // Cipher the message.
+
+ __ z_br(Z_R14);
+
+ return __ addr_at(start_off);
+ }
+
+ // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate
+ // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires
+ // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some
+ // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing.
+ // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller.
+ // *** WARNING ***
+ // Please note that we do not formally allocate stack space, nor do we
+ // update the stack pointer. Therefore, no function calls are allowed
+ // and nobody else must use the stack range where the parameter block
+ // is located.
+ // We align the parameter block to the next available octoword.
+ //
+ // Compute chained AES encrypt function.
+ address generate_cipherBlockChaining_AES_encrypt(const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ Register from = Z_ARG1; // source byte array (clear text)
+ Register to = Z_ARG2; // destination byte array (ciphered)
+ Register key = Z_ARG3; // expanded key array.
+ Register cv = Z_ARG4; // chaining value
+ const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
+ // in Z_RET upon completion of this stub. Is 32-bit integer.
+
+ const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only.
+ const Register fCode = Z_R0; // crypto function code
+ const Register parmBlk = Z_R1; // parameter block address (points to crypto key)
+ const Register src = Z_ARG1; // is Z_R2
+ const Register srclen = Z_ARG2; // Overwrites destination address.
+ const Register dst = Z_ARG3; // Overwrites key address.
+
+ // Read key len of expanded key (in 4-byte words).
+ __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
+ // Construct function code in Z_R0.
+ generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false);
+
+ // Prepare other registers for instruction.
+ // __ z_lgr(src, from); // Not needed, registers are the same.
+ __ z_lgr(dst, to);
+ __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required.
+
+ __ kmc(dst, src); // Cipher the message.
+
+ generate_pop_parmBlk(keylen, parmBlk, key, cv);
+
+ __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required.
+ __ z_br(Z_R14);
+
+ return __ addr_at(start_off);
+ }
+
+ // Compute chained AES encrypt function.
+ address generate_cipherBlockChaining_AES_decrypt(const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ Register from = Z_ARG1; // source byte array (ciphered)
+ Register to = Z_ARG2; // destination byte array (clear text)
+ Register key = Z_ARG3; // expanded key array, not preset at entry!!!
+ Register cv = Z_ARG4; // chaining value
+ const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned
+ // in Z_RET upon completion of this stub.
+
+ const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only.
+ const Register fCode = Z_R0; // crypto function code
+ const Register parmBlk = Z_R1; // parameter block address (points to crypto key)
+ const Register src = Z_ARG1; // is Z_R2
+ const Register srclen = Z_ARG2; // Overwrites destination address.
+ const Register dst = Z_ARG3; // Overwrites key address.
+
+ // Read key len of expanded key (in 4-byte words).
+ __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block.
+ // Construct function code in Z_R0.
+ generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true);
+
+ // Prepare other registers for instruction.
+ // __ z_lgr(src, from); // Not needed, registers are the same.
+ __ z_lgr(dst, to);
+ __ z_lgr(srclen, msglen);
+
+ __ kmc(dst, src); // Decipher the message.
+
+ generate_pop_parmBlk(keylen, parmBlk, key, cv);
+
+ __ z_lgr(Z_RET, msglen);
+ __ z_br(Z_R14);
+
+ return __ addr_at(start_off);
+ }
+
+
+ // Call interface for all SHA* stubs.
+ //
+ // Z_ARG1 - source data block. Ptr to leftmost byte to be processed.
+ // Z_ARG2 - current SHA state. Ptr to state area. This area serves as
+ // parameter block as required by the crypto instruction.
+ // Z_ARG3 - current byte offset in source data block.
+ // Z_ARG4 - last byte offset in source data block.
+ // (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed.
+ //
+ // Z_RET - return value. First unprocessed byte offset in src buffer.
+ //
+ // A few notes on the call interface:
+ // - All stubs, whether they are single-block or multi-block, are assumed to
+ // digest an integer multiple of the data block length of data. All data
+ // blocks are digested using the intermediate message digest (KIMD) instruction.
+ // Special end processing, as done by the KLMD instruction, seems to be
+ // emulated by the calling code.
+ //
+ // - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is
+ // already accounted for.
+ //
+ // - The current SHA state (the intermediate message digest value) is contained
+ // in an area addressed by Z_ARG2. The area size depends on the SHA variant
+ // and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I
+ //
+ // - The single-block stub is expected to digest exactly one data block, starting
+ // at the address passed in Z_ARG1.
+ //
+ // - The multi-block stub is expected to digest all data blocks which start in
+ // the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference
+ // (srcLimit-srcOff), rounded up to the next multiple of the data block length,
+ // gives the number of blocks to digest. It must be assumed that the calling code
+ // provides for a large enough source data buffer.
+ //
+ // Compute SHA-1 function.
+ address generate_SHA1_stub(bool multiBlock, const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added).
+ const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs.
+ const Register srcOff = Z_ARG3; // int
+ const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int
+
+ const Register SHAState_local = Z_R1;
+ const Register SHAState_save = Z_ARG3;
+ const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before.
+ Label useKLMD, rtn;
+
+ __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code
+ __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block
+
+ if (multiBlock) { // Process everything from offset to limit.
+
+ // The following description is valid if we get a raw (unpimped) source data buffer,
+ // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+ // the calling convention for these stubs is different. We leave the description in
+ // to inform the reader what must be happening hidden in the calling code.
+ //
+ // The data block to be processed can have arbitrary length, i.e. its length does not
+ // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+ // two different paths. If the length is an integer multiple, we use KIMD, saving us
+ // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+ // to the stack, execute a KLMD instruction on it and copy the result back to the
+ // caller's SHA state location.
+
+ // Total #srcBuff blocks to process.
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
+ __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
+ __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
+ __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit.
+ } else {
+ __ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int.
+ __ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long.
+ __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff);
+ __ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value.
+ __ z_agr(srcLimit, srcBufLen);
+ }
+
+ // Integral #blocks to digest?
+ // As a result of the calculations above, srcBufLen MUST be an integer
+ // multiple of _SHA1_dataBlk, or else we are in big trouble.
+ // We insert an asm_assert into the KLMD case to guard against that.
+ __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1);
+ __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+ // Process all full blocks.
+ __ kimd(srcBuff);
+
+ __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer.
+ } else { // Process one data block only.
+ __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process
+ __ kimd(srcBuff);
+ __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed.
+ }
+
+ __ bind(rtn);
+ __ z_br(Z_R14);
+
+ if (multiBlock) {
+ __ bind(useKLMD);
+
+#if 1
+ // Security net: this stub is believed to be called for full-sized data blocks only
+ // NOTE: The following code is believed to be correct, but is is not tested.
+ __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+ }
+
+ return __ addr_at(start_off);
+ }
+
+ // Compute SHA-256 function.
+ address generate_SHA256_stub(bool multiBlock, const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ const Register srcBuff = Z_ARG1;
+ const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter.
+ const Register SHAState_local = Z_R1;
+ const Register SHAState_save = Z_ARG3;
+ const Register srcOff = Z_ARG3;
+ const Register srcLimit = Z_ARG4;
+ const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before.
+ Label useKLMD, rtn;
+
+ __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code
+ __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block
+
+ if (multiBlock) { // Process everything from offset to limit.
+ // The following description is valid if we get a raw (unpimped) source data buffer,
+ // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+ // the calling convention for these stubs is different. We leave the description in
+ // to inform the reader what must be happening hidden in the calling code.
+ //
+ // The data block to be processed can have arbitrary length, i.e. its length does not
+ // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+ // two different paths. If the length is an integer multiple, we use KIMD, saving us
+ // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+ // to the stack, execute a KLMD instruction on it and copy the result back to the
+ // caller's SHA state location.
+
+ // total #srcBuff blocks to process
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
+ __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
+ __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
+ __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit.
+ } else {
+ __ z_lgfr(srcBufLen, srcLimit); // exact difference
+ __ z_sgfr(srcBufLen, srcOff);
+ __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff);
+ __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value.
+ __ z_agr(srcLimit, srcBufLen);
+ }
+
+ // Integral #blocks to digest?
+ // As a result of the calculations above, srcBufLen MUST be an integer
+ // multiple of _SHA1_dataBlk, or else we are in big trouble.
+ // We insert an asm_assert into the KLMD case to guard against that.
+ __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1);
+ __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+ // Process all full blocks.
+ __ kimd(srcBuff);
+
+ __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer.
+ } else { // Process one data block only.
+ __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process
+ __ kimd(srcBuff);
+ __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer.
+ }
+
+ __ bind(rtn);
+ __ z_br(Z_R14);
+
+ if (multiBlock) {
+ __ bind(useKLMD);
+#if 1
+ // Security net: this stub is believed to be called for full-sized data blocks only.
+ // NOTE:
+ // The following code is believed to be correct, but is is not tested.
+ __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+ }
+
+ return __ addr_at(start_off);
+ }
+
+ // Compute SHA-512 function.
+ address generate_SHA512_stub(bool multiBlock, const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ const Register srcBuff = Z_ARG1;
+ const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter.
+ const Register SHAState_local = Z_R1;
+ const Register SHAState_save = Z_ARG3;
+ const Register srcOff = Z_ARG3;
+ const Register srcLimit = Z_ARG4;
+ const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before.
+ Label useKLMD, rtn;
+
+ __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code
+ __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block
+
+ if (multiBlock) { // Process everything from offset to limit.
+ // The following description is valid if we get a raw (unpimped) source data buffer,
+ // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above,
+ // the calling convention for these stubs is different. We leave the description in
+ // to inform the reader what must be happening hidden in the calling code.
+ //
+ // The data block to be processed can have arbitrary length, i.e. its length does not
+ // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement
+ // two different paths. If the length is an integer multiple, we use KIMD, saving us
+ // to copy the SHA state back and forth. If the length is odd, we copy the SHA state
+ // to the stack, execute a KLMD instruction on it and copy the result back to the
+ // caller's SHA state location.
+
+ // total #srcBuff blocks to process
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference
+ __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
+ __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value.
+ __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit.
+ } else {
+ __ z_lgfr(srcBufLen, srcLimit); // exact difference
+ __ z_sgfr(srcBufLen, srcOff);
+ __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up
+ __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff);
+ __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value.
+ __ z_agr(srcLimit, srcBufLen);
+ }
+
+ // integral #blocks to digest?
+ // As a result of the calculations above, srcBufLen MUST be an integer
+ // multiple of _SHA1_dataBlk, or else we are in big trouble.
+ // We insert an asm_assert into the KLMD case to guard against that.
+ __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1);
+ __ z_brc(Assembler::bcondNotAllZero, useKLMD);
+
+ // Process all full blocks.
+ __ kimd(srcBuff);
+
+ __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer.
+ } else { // Process one data block only.
+ __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process
+ __ kimd(srcBuff);
+ __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer.
+ }
+
+ __ bind(rtn);
+ __ z_br(Z_R14);
+
+ if (multiBlock) {
+ __ bind(useKLMD);
+#if 1
+ // Security net: this stub is believed to be called for full-sized data blocks only
+ // NOTE:
+ // The following code is believed to be correct, but is is not tested.
+ __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0);
+#endif
+ }
+
+ return __ addr_at(start_off);
+ }
+
+
+
+ // Arguments:
+ // Z_ARG1 - int crc
+ // Z_ARG2 - byte* buf
+ // Z_ARG3 - int length (of buffer)
+ //
+ // Result:
+ // Z_RET - int crc result
+ //
+ // Compute CRC32 function.
+ address generate_CRC32_updateBytes(const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
+
+ // arguments to kernel_crc32:
+ Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int.
+ Register data = Z_ARG2; // source byte array
+ Register dataLen = Z_ARG3; // #bytes to process, int
+ Register table = Z_ARG4; // crc table address
+ const Register t0 = Z_R10; // work reg for kernel* emitters
+ const Register t1 = Z_R11; // work reg for kernel* emitters
+ const Register t2 = Z_R12; // work reg for kernel* emitters
+ const Register t3 = Z_R13; // work reg for kernel* emitters
+
+ assert_different_registers(crc, data, dataLen, table);
+
+ // We pass these values as ints, not as longs as required by C calling convention.
+ // Crc used as int.
+ __ z_llgfr(dataLen, dataLen);
+
+ StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+
+ __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+ __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers.
+ __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3);
+ __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack.
+ __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+
+ __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits.
+ __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1.
+
+ return __ addr_at(start_off);
+ }
+
+
+ // Arguments:
+ // Z_ARG1 - x address
+ // Z_ARG2 - x length
+ // Z_ARG3 - y address
+ // Z_ARG4 - y length
+ // Z_ARG5 - z address
+ // 160[Z_SP] - z length
+ address generate_multiplyToLen() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+
+ address start = __ pc();
+
+ const Register x = Z_ARG1;
+ const Register xlen = Z_ARG2;
+ const Register y = Z_ARG3;
+ const Register ylen = Z_ARG4;
+ const Register z = Z_ARG5;
+ // zlen is passed on the stack:
+ // Address zlen(Z_SP, _z_abi(remaining_cargs));
+
+ // Next registers will be saved on stack in multiply_to_len().
+ const Register tmp1 = Z_tmp_1;
+ const Register tmp2 = Z_tmp_2;
+ const Register tmp3 = Z_tmp_3;
+ const Register tmp4 = Z_tmp_4;
+ const Register tmp5 = Z_R9;
+
+ BLOCK_COMMENT("Entry:");
+
+ __ z_llgfr(xlen, xlen);
+ __ z_llgfr(ylen, ylen);
+
+ __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5);
+
+ __ z_br(Z_R14); // Return to caller.
+
+ return start;
+ }
+
+ void generate_initial() {
+ // Generates all stubs and initializes the entry points.
+
+ // Entry points that exist in all platforms.
+ // Note: This is code that could be shared among different
+ // platforms - however the benefit seems to be smaller than the
+ // disadvantage of having a much more complicated generator
+ // structure. See also comment in stubRoutines.hpp.
+ StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
+ StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+ // Build this early so it's available for the interpreter.
+ StubRoutines::_throw_StackOverflowError_entry =
+ generate_throw_exception("StackOverflowError throw_exception",
+ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
+
+ //----------------------------------------------------------------------
+ // Entry points that are platform specific.
+ // Build this early so it's available for the interpreter.
+ StubRoutines::_throw_StackOverflowError_entry =
+ generate_throw_exception("StackOverflowError throw_exception",
+ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
+
+ if (UseCRC32Intrinsics) {
+ // We have no CRC32 table on z/Architecture.
+ StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table;
+ StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
+ }
+
+ // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+ StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table;
+ }
+
+
+ void generate_all() {
+ // Generates all stubs and initializes the entry points.
+
+ StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check();
+
+ // These entry points require SharedInfo::stack0 to be set up in non-core builds.
+ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
+ StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);
+ StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
+
+ StubRoutines::zarch::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
+
+ // Support for verify_oop (must happen after universe_init).
+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine();
+
+ // Arraycopy stubs used by compilers.
+ generate_arraycopy_stubs();
+
+ // safefetch stubs
+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc);
+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc);
+
+ // Generate AES intrinsics code.
+ if (UseAESIntrinsics) {
+ StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock");
+ StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock");
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining");
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining");
+ }
+
+ // Generate SHA1/SHA256/SHA512 intrinsics code.
+ if (UseSHA1Intrinsics) {
+ StubRoutines::_sha1_implCompress = generate_SHA1_stub(false, "SHA1_singleBlock");
+ StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(true, "SHA1_multiBlock");
+ }
+ if (UseSHA256Intrinsics) {
+ StubRoutines::_sha256_implCompress = generate_SHA256_stub(false, "SHA256_singleBlock");
+ StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true, "SHA256_multiBlock");
+ }
+ if (UseSHA512Intrinsics) {
+ StubRoutines::_sha512_implCompress = generate_SHA512_stub(false, "SHA512_singleBlock");
+ StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true, "SHA512_multiBlock");
+ }
+
+#ifdef COMPILER2
+ if (UseMultiplyToLenIntrinsic) {
+ StubRoutines::_multiplyToLen = generate_multiplyToLen();
+ }
+ if (UseMontgomeryMultiplyIntrinsic) {
+ StubRoutines::_montgomeryMultiply
+ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
+ }
+ if (UseMontgomerySquareIntrinsic) {
+ StubRoutines::_montgomerySquare
+ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
+ }
+#endif
+ }
+
+ public:
+ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+ // Replace the standard masm with a special one:
+ _masm = new MacroAssembler(code);
+
+ _stub_count = !all ? 0x100 : 0x200;
+ if (all) {
+ generate_all();
+ } else {
+ generate_initial();
+ }
+ }
+
+ private:
+ int _stub_count;
+ void stub_prolog(StubCodeDesc* cdesc) {
+#ifdef ASSERT
+ // Put extra information in the stub code, to make it more readable.
+ // Write the high part of the address.
+ // [RGV] Check if there is a dependency on the size of this prolog.
+ __ emit_32((intptr_t)cdesc >> 32);
+ __ emit_32((intptr_t)cdesc);
+ __ emit_32(++_stub_count);
+#endif
+ align(true);
+ }
+
+ void align(bool at_header = false) {
+ // z/Architecture cache line size is 256 bytes.
+ // There is no obvious benefit in aligning stub
+ // code to cache lines. Use CodeEntryAlignment instead.
+ const unsigned int icache_line_size = CodeEntryAlignment;
+ const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment);
+
+ if (at_header) {
+ while ((intptr_t)(__ pc()) % icache_line_size != 0) {
+ __ emit_16(0);
+ }
+ } else {
+ while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
+ __ z_nop();
+ }
+ }
+ }
+
+};
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+ StubGenerator g(code, all);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::zarch::_handler_for_unsafe_access_entry = NULL;
+
+address StubRoutines::zarch::_partial_subtype_check = NULL;
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+address StubRoutines::zarch::_trot_table_addr = NULL;
+
+int StubRoutines::zarch::_atomic_memory_operation_lock = StubRoutines::zarch::unlocked;
+
+#define __ masm->
+
+void StubRoutines::zarch::generate_load_crc_table_addr(MacroAssembler* masm, Register table) {
+
+ __ load_absolute_address(table, StubRoutines::_crc_table_adr);
+#ifdef ASSERT
+ assert(_crc_table_adr != NULL, "CRC lookup table address must be initialized by now");
+ {
+ Label L;
+ __ load_const_optimized(Z_R0, StubRoutines::_crc_table_adr);
+ __ z_cgr(table, Z_R0); // safety net
+ __ z_bre(L);
+ __ z_illtrap();
+ __ asm_assert_eq("crc_table: external word relocation required for load_absolute_address", 0x33);
+ __ bind(L);
+ }
+ {
+ Label L;
+ __ load_const_optimized(Z_R0, 0x77073096UL);
+ __ z_cl(Z_R0, Address(table, 4)); // safety net
+ __ z_bre(L);
+ __ z_l(Z_R0, Address(table, 4)); // Load data from memory, we know the constant we compared against.
+ __ z_illtrap();
+ __ asm_assert_eq("crc_table: address or contents seems to be messed up", 0x22);
+ __ bind(L);
+ }
+#endif
+}
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+void StubRoutines::zarch::generate_load_trot_table_addr(MacroAssembler* masm, Register table) {
+
+ RelocationHolder rspec = external_word_Relocation::spec((address)_trot_table);
+ __ relocate(rspec);
+ __ load_absolute_address(table, _trot_table_addr);
+#ifdef ASSERT
+ assert(_trot_table_addr != NULL, "Translate table address must be initialized by now");
+ assert((p2i(_trot_table_addr) & (TROT_ALIGNMENT-1)) == 0, "Translate table alignment error");
+ for (int i = 0; i < 256; i++) {
+ assert(i == *((jshort*)(_trot_table_addr+2*i)), "trot_table[%d] = %d", i, *((jshort*)(_trot_table_addr+2*i)));
+ }
+ {
+ Label L;
+ __ load_const_optimized(Z_R0, StubRoutines::zarch::_trot_table_addr);
+ __ z_cgr(table, Z_R0); // safety net
+ __ z_bre(L);
+ __ z_illtrap();
+ __ asm_assert_eq("crc_table: external word relocation does not work for load_absolute_address", 0x33);
+ __ bind(L);
+ }
+ {
+ Label L;
+ __ load_const_optimized(Z_R0, 0x0004000500060007UL);
+ __ z_clg(Z_R0, Address(table, 8)); // safety net
+ __ z_bre(L);
+ __ z_lg(Z_R0, Address(table, 8)); // Load data from memory, we know the constant we compared against.
+ __ z_illtrap();
+ __ asm_assert_eq("trot_table: address or contents seems to be messed up", 0x22);
+ __ bind(L);
+ }
+#endif
+}
+
+
+/**
+ * trot_table[]
+ */
+
+jlong StubRoutines::zarch::_trot_table[TROT_COLUMN_SIZE] = {
+ 0x0000000100020003UL, 0x0004000500060007UL, 0x00080009000a000bUL, 0x000c000d000e000fUL,
+ 0x0010001100120013UL, 0x0014001500160017UL, 0x00180019001a001bUL, 0x001c001d001e001fUL,
+ 0x0020002100220023UL, 0x0024002500260027UL, 0x00280029002a002bUL, 0x002c002d002e002fUL,
+ 0x0030003100320033UL, 0x0034003500360037UL, 0x00380039003a003bUL, 0x003c003d003e003fUL,
+ 0x0040004100420043UL, 0x0044004500460047UL, 0x00480049004a004bUL, 0x004c004d004e004fUL,
+ 0x0050005100520053UL, 0x0054005500560057UL, 0x00580059005a005bUL, 0x005c005d005e005fUL,
+ 0x0060006100620063UL, 0x0064006500660067UL, 0x00680069006a006bUL, 0x006c006d006e006fUL,
+ 0x0070007100720073UL, 0x0074007500760077UL, 0x00780079007a007bUL, 0x007c007d007e007fUL,
+ 0x0080008100820083UL, 0x0084008500860087UL, 0x00880089008a008bUL, 0x008c008d008e008fUL,
+ 0x0090009100920093UL, 0x0094009500960097UL, 0x00980099009a009bUL, 0x009c009d009e009fUL,
+ 0x00a000a100a200a3UL, 0x00a400a500a600a7UL, 0x00a800a900aa00abUL, 0x00ac00ad00ae00afUL,
+ 0x00b000b100b200b3UL, 0x00b400b500b600b7UL, 0x00b800b900ba00bbUL, 0x00bc00bd00be00bfUL,
+ 0x00c000c100c200c3UL, 0x00c400c500c600c7UL, 0x00c800c900ca00cbUL, 0x00cc00cd00ce00cfUL,
+ 0x00d000d100d200d3UL, 0x00d400d500d600d7UL, 0x00d800d900da00dbUL, 0x00dc00dd00de00dfUL,
+ 0x00e000e100e200e3UL, 0x00e400e500e600e7UL, 0x00e800e900ea00ebUL, 0x00ec00ed00ee00efUL,
+ 0x00f000f100f200f3UL, 0x00f400f500f600f7UL, 0x00f800f900fa00fbUL, 0x00fc00fd00fe00ffUL
+ };
+
+
+// crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.h
+juint StubRoutines::zarch::_crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = {
+ {
+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+ 0x2d02ef8dUL
+#ifdef CRC32_BYFOUR
+ },
+ {
+ 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+ 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+ 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+ 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+ 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+ 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+ 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+ 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+ 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+ 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+ 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+ 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+ 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+ 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+ 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+ 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+ 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+ 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+ 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+ 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+ 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+ 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+ 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+ 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+ 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+ 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+ 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+ 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+ 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+ 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+ 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+ 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+ 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+ 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+ 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+ 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+ 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+ 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+ 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+ 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+ 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+ 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+ 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+ 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+ 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+ 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+ 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+ 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+ 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+ 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+ 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+ 0x9324fd72UL
+ },
+ {
+ 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+ 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+ 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+ 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+ 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+ 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+ 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+ 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+ 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+ 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+ 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+ 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+ 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+ 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+ 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+ 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+ 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+ 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+ 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+ 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+ 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+ 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+ 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+ 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+ 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+ 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+ 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+ 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+ 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+ 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+ 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+ 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+ 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+ 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+ 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+ 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+ 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+ 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+ 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+ 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+ 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+ 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+ 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+ 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+ 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+ 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+ 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+ 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+ 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+ 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+ 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+ 0xbe9834edUL
+ },
+ {
+ 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+ 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+ 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+ 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+ 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+ 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+ 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+ 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+ 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+ 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+ 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+ 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+ 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+ 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+ 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+ 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+ 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+ 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+ 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+ 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+ 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+ 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+ 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+ 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+ 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+ 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+ 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+ 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+ 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+ 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+ 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+ 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+ 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+ 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+ 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+ 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+ 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+ 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+ 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+ 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+ 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+ 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+ 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+ 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+ 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+ 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+ 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+ 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+ 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+ 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+ 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+ 0xde0506f1UL
+ },
+ {
+ 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+ 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+ 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+ 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+ 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+ 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+ 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+ 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+ 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+ 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+ 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+ 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+ 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+ 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+ 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+ 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+ 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+ 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+ 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+ 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+ 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+ 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+ 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+ 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+ 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+ 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+ 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+ 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+ 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+ 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+ 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+ 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+ 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+ 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+ 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+ 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+ 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+ 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+ 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+ 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+ 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+ 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+ 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+ 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+ 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+ 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+ 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+ 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+ 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+ 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+ 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+ 0x8def022dUL
+ },
+ {
+ 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+ 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+ 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+ 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+ 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+ 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+ 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+ 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+ 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+ 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+ 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+ 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+ 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+ 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+ 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+ 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+ 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+ 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+ 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+ 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+ 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+ 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+ 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+ 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+ 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+ 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+ 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+ 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+ 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+ 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+ 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+ 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+ 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+ 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+ 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+ 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+ 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+ 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+ 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+ 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+ 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+ 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+ 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+ 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+ 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+ 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+ 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+ 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+ 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+ 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+ 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+ 0x72fd2493UL
+ },
+ {
+ 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+ 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+ 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+ 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+ 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+ 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+ 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+ 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+ 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+ 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+ 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+ 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+ 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+ 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+ 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+ 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+ 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+ 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+ 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+ 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+ 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+ 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+ 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+ 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+ 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+ 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+ 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+ 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+ 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+ 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+ 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+ 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+ 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+ 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+ 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+ 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+ 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+ 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+ 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+ 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+ 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+ 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+ 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+ 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+ 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+ 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+ 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+ 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+ 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+ 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+ 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+ 0xed3498beUL
+ },
+ {
+ 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+ 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+ 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+ 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+ 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+ 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+ 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+ 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+ 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+ 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+ 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+ 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+ 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+ 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+ 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+ 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+ 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+ 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+ 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+ 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+ 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+ 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+ 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+ 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+ 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+ 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+ 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+ 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+ 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+ 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+ 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+ 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+ 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+ 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+ 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+ 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+ 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+ 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+ 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+ 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+ 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+ 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+ 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+ 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+ 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+ 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+ 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+ 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+ 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+ 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+ 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+ 0xf10605deUL
+#endif
+ }
+};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
+#define CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to extend it.
+
+static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
+
+enum { // Platform dependent constants.
+ // TODO: May be able to shrink this a lot
+ code_size1 = 20000, // Simply increase if too small (assembler will crash if too small).
+ code_size2 = 20000 // Simply increase if too small (assembler will crash if too small).
+};
+
+// MethodHandles adapters
+enum method_handles_platform_dependent_constants {
+ method_handles_adapters_code_size = 5000
+};
+
+#define CRC32_COLUMN_SIZE 256
+#define CRC32_BYFOUR
+#ifdef CRC32_BYFOUR
+ #define CRC32_TABLES 8
+#else
+ #define CRC32_TABLES 1
+#endif
+
+// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+#define TROT_ALIGNMENT 8 // Required by instruction,
+ // guaranteed by jlong table element type.
+#define TROT_COLUMN_SIZE (256*sizeof(jchar)/sizeof(jlong))
+
+class zarch {
+ friend class StubGenerator;
+
+ public:
+ enum { nof_instance_allocators = 10 };
+
+ // allocator lock values
+ enum {
+ unlocked = 0,
+ locked = 1
+ };
+
+ private:
+ static address _handler_for_unsafe_access_entry;
+
+ static int _atomic_memory_operation_lock;
+
+ static address _partial_subtype_check;
+ static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
+
+ // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+ static address _trot_table_addr;
+ static jlong _trot_table[TROT_COLUMN_SIZE];
+
+ public:
+ // Global lock for everyone who needs to use atomic_compare_and_exchange
+ // or atomic_increment -- should probably use more locks for more
+ // scalability -- for instance one for each eden space or group of.
+
+ // Address of the lock for atomic_compare_and_exchange.
+ static int* atomic_memory_operation_lock_addr() { return &_atomic_memory_operation_lock; }
+
+ // Accessor and mutator for _atomic_memory_operation_lock.
+ static int atomic_memory_operation_lock() { return _atomic_memory_operation_lock; }
+ static void set_atomic_memory_operation_lock(int value) { _atomic_memory_operation_lock = value; }
+
+ static address handler_for_unsafe_access_entry() { return _handler_for_unsafe_access_entry; }
+
+ static address partial_subtype_check() { return _partial_subtype_check; }
+
+ static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
+
+ // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
+ static void generate_load_trot_table_addr(MacroAssembler* masm, Register table);
+};
+
+#endif // CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,2398 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/abstractInterpreter.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+
+// Size of interpreter code. Increase if too small. Interpreter will
+// fail with a guarantee ("not enough space for interpreter generation");
+// if too small.
+// Run with +PrintInterpreter to get the VM to print out the size.
+// Max size with JVMTI
+int TemplateInterpreter::InterpreterCodeSize = 320*K;
+
+#undef __
+#ifdef PRODUCT
+ #define __ _masm->
+#else
+ #define __ _masm->
+// #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#endif
+
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":")
+
+#define oop_tmp_offset _z_ijava_state_neg(oop_tmp)
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
+ //
+ // New slow_signature handler that respects the z/Architecture
+ // C calling conventions.
+ //
+ // We get called by the native entry code with our output register
+ // area == 8. First we call InterpreterRuntime::get_result_handler
+ // to copy the pointer to the signature string temporarily to the
+ // first C-argument and to return the result_handler in
+ // Z_RET. Since native_entry will copy the jni-pointer to the
+ // first C-argument slot later on, it's OK to occupy this slot
+ // temporarily. Then we copy the argument list on the java
+ // expression stack into native varargs format on the native stack
+ // and load arguments into argument registers. Integer arguments in
+ // the varargs vector will be sign-extended to 8 bytes.
+ //
+ // On entry:
+ // Z_ARG1 - intptr_t* Address of java argument list in memory.
+ // Z_state - cppInterpreter* Address of interpreter state for
+ // this method
+ // Z_method
+ //
+ // On exit (just before return instruction):
+ // Z_RET contains the address of the result_handler.
+ // Z_ARG2 is not updated for static methods and contains "this" otherwise.
+ // Z_ARG3-Z_ARG5 contain the first 3 arguments of types other than float and double.
+ // Z_FARG1-Z_FARG4 contain the first 4 arguments of type float or double.
+
+ const int LogSizeOfCase = 3;
+
+ const int max_fp_register_arguments = Argument::n_float_register_parameters;
+ const int max_int_register_arguments = Argument::n_register_parameters - 2; // First 2 are reserved.
+
+ const Register arg_java = Z_tmp_2;
+ const Register arg_c = Z_tmp_3;
+ const Register signature = Z_R1_scratch; // Is a string.
+ const Register fpcnt = Z_R0_scratch;
+ const Register argcnt = Z_tmp_4;
+ const Register intSlot = Z_tmp_1;
+ const Register sig_end = Z_tmp_1; // Assumed end of signature (only used in do_object).
+ const Register target_sp = Z_tmp_1;
+ const FloatRegister floatSlot = Z_F1;
+
+ const int d_signature = _z_abi(gpr6); // Only spill space, register contents not affected.
+ const int d_fpcnt = _z_abi(gpr7); // Only spill space, register contents not affected.
+
+ unsigned int entry_offset = __ offset();
+
+ BLOCK_COMMENT("slow_signature_handler {");
+
+ // We use target_sp for storing arguments in the C frame.
+ __ save_return_pc();
+
+ __ z_stmg(Z_R10,Z_R13,-32,Z_SP);
+ __ push_frame_abi160(32);
+
+ __ z_lgr(arg_java, Z_ARG1);
+
+ Register method = Z_ARG2; // Directly load into correct argument register.
+
+ __ get_method(method);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), Z_thread, method);
+
+ // Move signature to callee saved register.
+ // Don't directly write to stack. Frame is used by VM call.
+ __ z_lgr(Z_tmp_1, Z_RET);
+
+ // Reload method. Register may have been altered by VM call.
+ __ get_method(method);
+
+ // Get address of result handler.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), Z_thread, method);
+
+ // Save signature address to stack.
+ __ z_stg(Z_tmp_1, d_signature, Z_SP);
+
+ // Don't overwrite return value (Z_RET, Z_ARG1) in rest of the method !
+
+ {
+ Label isStatic;
+
+ // Test if static.
+ // We can test the bit directly.
+ // Path is Z_method->_access_flags._flags.
+ // We only support flag bits in the least significant byte (assert !).
+ // Therefore add 3 to address that byte within "_flags".
+ // Reload method. VM call above may have destroyed register contents
+ __ get_method(method);
+ __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT);
+ method = noreg; // end of life
+ __ z_btrue(isStatic);
+
+ // For non-static functions, pass "this" in Z_ARG2 and copy it to 2nd C-arg slot.
+ // Need to box the Java object here, so we use arg_java
+ // (address of current Java stack slot) as argument and
+ // don't dereference it as in case of ints, floats, etc..
+ __ z_lgr(Z_ARG2, arg_java);
+ __ add2reg(arg_java, -BytesPerWord);
+ __ bind(isStatic);
+ }
+
+ // argcnt == 0 corresponds to 3rd C argument.
+ // arg #1 (result handler) and
+ // arg #2 (this, for non-statics), unused else
+ // are reserved and pre-filled above.
+ // arg_java points to the corresponding Java argument here. It
+ // has been decremented by one argument (this) in case of non-static.
+ __ clear_reg(argcnt, true, false); // Don't set CC.
+ __ z_lg(target_sp, 0, Z_SP);
+ __ add2reg(arg_c, _z_abi(remaining_cargs), target_sp);
+ // No floating-point args parsed so far.
+ __ clear_mem(Address(Z_SP, d_fpcnt), 8);
+
+ NearLabel move_intSlot_to_ARG, move_floatSlot_to_FARG;
+ NearLabel loop_start, loop_start_restore, loop_end;
+ NearLabel do_int, do_long, do_float, do_double;
+ NearLabel do_dontreachhere, do_object, do_array, do_boxed;
+
+#ifdef ASSERT
+ // Signature needs to point to '(' (== 0x28) at entry.
+ __ z_lg(signature, d_signature, Z_SP);
+ __ z_cli(0, signature, (int) '(');
+ __ z_brne(do_dontreachhere);
+#endif
+
+ __ bind(loop_start_restore);
+ __ z_lg(signature, d_signature, Z_SP); // Restore signature ptr, destroyed by move_XX_to_ARG.
+
+ BIND(loop_start);
+ // Advance to next argument type token from the signature.
+ __ add2reg(signature, 1);
+
+ // Use CLI, works well on all CPU versions.
+ __ z_cli(0, signature, (int) ')');
+ __ z_bre(loop_end); // end of signature
+ __ z_cli(0, signature, (int) 'L');
+ __ z_bre(do_object); // object #9
+ __ z_cli(0, signature, (int) 'F');
+ __ z_bre(do_float); // float #7
+ __ z_cli(0, signature, (int) 'J');
+ __ z_bre(do_long); // long #6
+ __ z_cli(0, signature, (int) 'B');
+ __ z_bre(do_int); // byte #1
+ __ z_cli(0, signature, (int) 'Z');
+ __ z_bre(do_int); // boolean #2
+ __ z_cli(0, signature, (int) 'C');
+ __ z_bre(do_int); // char #3
+ __ z_cli(0, signature, (int) 'S');
+ __ z_bre(do_int); // short #4
+ __ z_cli(0, signature, (int) 'I');
+ __ z_bre(do_int); // int #5
+ __ z_cli(0, signature, (int) 'D');
+ __ z_bre(do_double); // double #8
+ __ z_cli(0, signature, (int) '[');
+ __ z_bre(do_array); // array #10
+
+ __ bind(do_dontreachhere);
+
+ __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120);
+
+ // Array argument
+ BIND(do_array);
+
+ {
+ Label start_skip, end_skip;
+
+ __ bind(start_skip);
+
+ // Advance to next type tag from signature.
+ __ add2reg(signature, 1);
+
+ // Use CLI, works well on all CPU versions.
+ __ z_cli(0, signature, (int) '[');
+ __ z_bre(start_skip); // Skip further brackets.
+
+ __ z_cli(0, signature, (int) '9');
+ __ z_brh(end_skip); // no optional size
+
+ __ z_cli(0, signature, (int) '0');
+ __ z_brnl(start_skip); // Skip optional size.
+
+ __ bind(end_skip);
+
+ __ z_cli(0, signature, (int) 'L');
+ __ z_brne(do_boxed); // If not array of objects: go directly to do_boxed.
+ }
+
+ // OOP argument
+ BIND(do_object);
+ // Pass by an object's type name.
+ {
+ Label L;
+
+ __ add2reg(sig_end, 4095, signature); // Assume object type name is shorter than 4k.
+ __ load_const_optimized(Z_R0, (int) ';'); // Type name terminator (must be in Z_R0!).
+ __ MacroAssembler::search_string(sig_end, signature);
+ __ z_brl(L);
+ __ z_illtrap(); // No semicolon found: internal error or object name too long.
+ __ bind(L);
+ __ z_lgr(signature, sig_end);
+ // fallthru to do_boxed
+ }
+
+ // Need to box the Java object here, so we use arg_java
+ // (address of current Java stack slot) as argument and
+ // don't dereference it as in case of ints, floats, etc..
+
+ // UNBOX argument
+ // Load reference and check for NULL.
+ Label do_int_Entry4Boxed;
+ __ bind(do_boxed);
+ {
+ __ load_and_test_long(intSlot, Address(arg_java));
+ __ z_bre(do_int_Entry4Boxed);
+ __ z_lgr(intSlot, arg_java);
+ __ z_bru(do_int_Entry4Boxed);
+ }
+
+ // INT argument
+
+ // (also for byte, boolean, char, short)
+ // Use lgf for load (sign-extend) and stg for store.
+ BIND(do_int);
+ __ z_lgf(intSlot, 0, arg_java);
+
+ __ bind(do_int_Entry4Boxed);
+ __ add2reg(arg_java, -BytesPerWord);
+ // If argument fits into argument register, go and handle it, otherwise continue.
+ __ compare32_and_branch(argcnt, max_int_register_arguments,
+ Assembler::bcondLow, move_intSlot_to_ARG);
+ __ z_stg(intSlot, 0, arg_c);
+ __ add2reg(arg_c, BytesPerWord);
+ __ z_bru(loop_start);
+
+ // LONG argument
+
+ BIND(do_long);
+ __ add2reg(arg_java, -2*BytesPerWord); // Decrement first to have positive displacement for lg.
+ __ z_lg(intSlot, BytesPerWord, arg_java);
+ // If argument fits into argument register, go and handle it, otherwise continue.
+ __ compare32_and_branch(argcnt, max_int_register_arguments,
+ Assembler::bcondLow, move_intSlot_to_ARG);
+ __ z_stg(intSlot, 0, arg_c);
+ __ add2reg(arg_c, BytesPerWord);
+ __ z_bru(loop_start);
+
+ // FLOAT argumen
+
+ BIND(do_float);
+ __ z_le(floatSlot, 0, arg_java);
+ __ add2reg(arg_java, -BytesPerWord);
+ assert(max_fp_register_arguments <= 255, "always true"); // safety net
+ __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments);
+ __ z_brl(move_floatSlot_to_FARG);
+ __ z_ste(floatSlot, 4, arg_c);
+ __ add2reg(arg_c, BytesPerWord);
+ __ z_bru(loop_start);
+
+ // DOUBLE argument
+
+ BIND(do_double);
+ __ add2reg(arg_java, -2*BytesPerWord); // Decrement first to have positive displacement for lg.
+ __ z_ld(floatSlot, BytesPerWord, arg_java);
+ assert(max_fp_register_arguments <= 255, "always true"); // safety net
+ __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments);
+ __ z_brl(move_floatSlot_to_FARG);
+ __ z_std(floatSlot, 0, arg_c);
+ __ add2reg(arg_c, BytesPerWord);
+ __ z_bru(loop_start);
+
+ // Method exit, all arguments proocessed.
+ __ bind(loop_end);
+ __ pop_frame();
+ __ restore_return_pc();
+ __ z_lmg(Z_R10,Z_R13,-32,Z_SP);
+ __ z_br(Z_R14);
+
+ // Copy int arguments.
+
+ Label iarg_caselist; // Distance between each case has to be a power of 2
+ // (= 1 << LogSizeOfCase).
+ __ align(16);
+ BIND(iarg_caselist);
+ __ z_lgr(Z_ARG3, intSlot); // 4 bytes
+ __ z_bru(loop_start_restore); // 4 bytes
+
+ __ z_lgr(Z_ARG4, intSlot);
+ __ z_bru(loop_start_restore);
+
+ __ z_lgr(Z_ARG5, intSlot);
+ __ z_bru(loop_start_restore);
+
+ __ align(16);
+ __ bind(move_intSlot_to_ARG);
+ __ z_stg(signature, d_signature, Z_SP); // Spill since signature == Z_R1_scratch.
+ __ z_larl(Z_R1_scratch, iarg_caselist);
+ __ z_sllg(Z_R0_scratch, argcnt, LogSizeOfCase);
+ __ add2reg(argcnt, 1);
+ __ z_agr(Z_R1_scratch, Z_R0_scratch);
+ __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+
+ // Copy float arguments.
+
+ Label farg_caselist; // Distance between each case has to be a power of 2
+ // (= 1 << logSizeOfCase, padded with nop.
+ __ align(16);
+ BIND(farg_caselist);
+ __ z_ldr(Z_FARG1, floatSlot); // 2 bytes
+ __ z_bru(loop_start_restore); // 4 bytes
+ __ z_nop(); // 2 bytes
+
+ __ z_ldr(Z_FARG2, floatSlot);
+ __ z_bru(loop_start_restore);
+ __ z_nop();
+
+ __ z_ldr(Z_FARG3, floatSlot);
+ __ z_bru(loop_start_restore);
+ __ z_nop();
+
+ __ z_ldr(Z_FARG4, floatSlot);
+ __ z_bru(loop_start_restore);
+ __ z_nop();
+
+ __ align(16);
+ __ bind(move_floatSlot_to_FARG);
+ __ z_stg(signature, d_signature, Z_SP); // Spill since signature == Z_R1_scratch.
+ __ z_lg(Z_R0_scratch, d_fpcnt, Z_SP); // Need old value for indexing.
+ __ add2mem_64(Address(Z_SP, d_fpcnt), 1, Z_R1_scratch); // Increment index.
+ __ z_larl(Z_R1_scratch, farg_caselist);
+ __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogSizeOfCase);
+ __ z_agr(Z_R1_scratch, Z_R0_scratch);
+ __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+
+ BLOCK_COMMENT("} slow_signature_handler");
+
+ return __ addr_at(entry_offset);
+}
+
+address TemplateInterpreterGenerator::generate_result_handler_for (BasicType type) {
+ address entry = __ pc();
+
+ assert(Z_tos == Z_RET, "Result handler: must move result!");
+ assert(Z_ftos == Z_FRET, "Result handler: must move float result!");
+
+ switch (type) {
+ case T_BOOLEAN:
+ __ c2bool(Z_tos);
+ break;
+ case T_CHAR:
+ __ and_imm(Z_tos, 0xffff);
+ break;
+ case T_BYTE:
+ __ z_lbr(Z_tos, Z_tos);
+ break;
+ case T_SHORT:
+ __ z_lhr(Z_tos, Z_tos);
+ break;
+ case T_INT:
+ case T_LONG:
+ case T_VOID:
+ case T_FLOAT:
+ case T_DOUBLE:
+ break;
+ case T_OBJECT:
+ // Retrieve result from frame...
+ __ mem2reg_opt(Z_tos, Address(Z_fp, oop_tmp_offset));
+ // and verify it.
+ __ verify_oop(Z_tos);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ __ z_br(Z_R14); // Return from result handler.
+ return entry;
+}
+
+// Abstract method entry.
+// Attempt to execute abstract method. Throw exception.
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
+ unsigned int entry_offset = __ offset();
+
+ // Caller could be the call_stub or a compiled method (x86 version is wrong!).
+
+ BLOCK_COMMENT("abstract_entry {");
+
+ // Implement call of InterpreterRuntime::throw_AbstractMethodError.
+ __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1);
+ __ save_return_pc(); // Save Z_R14.
+ __ push_frame_abi160(0); // Without new frame the RT call could overwrite the saved Z_R14.
+
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError), Z_thread);
+
+ __ pop_frame();
+ __ restore_return_pc(); // Restore Z_R14.
+ __ reset_last_Java_frame();
+
+ // Restore caller sp for c2i case.
+ __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+ // branch to SharedRuntime::generate_forward_exception() which handles all possible callers,
+ // i.e. call stub, compiled method, interpreted method.
+ __ load_absolute_address(Z_tmp_1, StubRoutines::forward_exception_entry());
+ __ z_br(Z_tmp_1);
+
+ BLOCK_COMMENT("} abstract_entry");
+
+ return __ addr_at(entry_offset);
+}
+
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+ if (UseG1GC) {
+ // Inputs:
+ // Z_ARG1 - receiver
+ //
+ // What we do:
+ // - Load the referent field address.
+ // - Load the value in the referent field.
+ // - Pass that value to the pre-barrier.
+ //
+ // In the case of G1 this will record the value of the
+ // referent in an SATB buffer if marking is active.
+ // This will cause concurrent marking to mark the referent
+ // field as live.
+
+ Register scratch1 = Z_tmp_2;
+ Register scratch2 = Z_tmp_3;
+ Register pre_val = Z_RET; // return value
+ // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+ Register Rargp = Z_esp;
+
+ Label slow_path;
+ address entry = __ pc();
+
+ const int referent_offset = java_lang_ref_Reference::referent_offset;
+ guarantee(referent_offset > 0, "referent offset not initialized");
+
+ BLOCK_COMMENT("Reference_get {");
+
+ // If the receiver is null then it is OK to jump to the slow path.
+ __ load_and_test_long(pre_val, Address(Rargp, Interpreter::stackElementSize)); // Get receiver.
+ __ z_bre(slow_path);
+
+ // Load the value of the referent field.
+ __ load_heap_oop(pre_val, referent_offset, pre_val);
+
+ // Restore caller sp for c2i case.
+ __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+ // Generate the G1 pre-barrier code to log the value of
+ // the referent field in an SATB buffer.
+ // Note:
+ // With these parameters the write_barrier_pre does not
+ // generate instructions to load the previous value.
+ __ g1_write_barrier_pre(noreg, // obj
+ noreg, // offset
+ pre_val, // pre_val
+ noreg, // no new val to preserve
+ scratch1, // tmp
+ scratch2, // tmp
+ true); // pre_val_needed
+
+ __ z_br(Z_R14);
+
+ // Branch to previously generated regular method entry.
+ __ bind(slow_path);
+
+ address meth_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
+ __ jump_to_entry(meth_entry, Z_R1);
+
+ BLOCK_COMMENT("} Reference_get");
+
+ return entry;
+ }
+#endif // INCLUDE_ALL_GCS
+
+ return NULL;
+}
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+ address entry = __ pc();
+
+ DEBUG_ONLY(__ verify_esp(Z_esp, Z_ARG5));
+
+ // Restore bcp under the assumption that the current frame is still
+ // interpreted.
+ __ restore_bcp();
+
+ // Expression stack must be empty before entering the VM if an
+ // exception happened.
+ __ empty_expression_stack();
+ // Throw exception.
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+ return entry;
+}
+
+//
+// Args:
+// Z_ARG3: aberrant index
+//
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char * name) {
+ address entry = __ pc();
+ address excp = CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException);
+
+ // Expression stack must be empty before entering the VM if an
+ // exception happened.
+ __ empty_expression_stack();
+
+ // Setup parameters.
+ // Leave out the name and use register for array to create more detailed exceptions.
+ __ load_absolute_address(Z_ARG2, (address) name);
+ __ call_VM(noreg, excp, Z_ARG2, Z_ARG3);
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+ address entry = __ pc();
+
+ // Object is at TOS.
+ __ pop_ptr(Z_ARG2);
+
+ // Expression stack must be empty before entering the VM if an
+ // exception happened.
+ __ empty_expression_stack();
+
+ __ call_VM(Z_ARG1,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),
+ Z_ARG2);
+
+ DEBUG_ONLY(__ should_not_reach_here();)
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) {
+ assert(!pass_oop || message == NULL, "either oop or message but not both");
+ address entry = __ pc();
+
+ BLOCK_COMMENT("exception_handler_common {");
+
+ // Expression stack must be empty before entering the VM if an
+ // exception happened.
+ __ empty_expression_stack();
+ if (name != NULL) {
+ __ load_absolute_address(Z_ARG2, (address)name);
+ } else {
+ __ clear_reg(Z_ARG2, true, false);
+ }
+
+ if (pass_oop) {
+ __ call_VM(Z_tos,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception),
+ Z_ARG2, Z_tos /*object (see TT::aastore())*/);
+ } else {
+ if (message != NULL) {
+ __ load_absolute_address(Z_ARG3, (address)message);
+ } else {
+ __ clear_reg(Z_ARG3, true, false);
+ }
+ __ call_VM(Z_tos,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
+ Z_ARG2, Z_ARG3);
+ }
+ // Throw exception.
+ __ load_absolute_address(Z_R1_scratch, Interpreter::throw_exception_entry());
+ __ z_br(Z_R1_scratch);
+
+ BLOCK_COMMENT("} exception_handler_common");
+
+ return entry;
+}
+
+// Unused, should never pass by.
+address TemplateInterpreterGenerator::generate_continuation_for (TosState state) {
+ address entry = __ pc();
+ __ should_not_reach_here();
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for (TosState state, int step, size_t index_size) {
+ address entry = __ pc();
+
+ BLOCK_COMMENT("return_entry {");
+
+ // Pop i2c extension or revert top-2-parent-resize done by interpreted callees.
+ Register sp_before_i2c_extension = Z_bcp;
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+ __ z_lg(sp_before_i2c_extension, Address(Z_fp, _z_ijava_state_neg(top_frame_sp)));
+ __ resize_frame_absolute(sp_before_i2c_extension, Z_locals/*tmp*/, true/*load_fp*/);
+
+ // TODO(ZASM): necessary??
+ // // and NULL it as marker that esp is now tos until next java call
+ // __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+
+ if (state == atos) {
+ __ profile_return_type(Z_tmp_1, Z_tos, Z_tmp_2);
+ }
+
+ Register cache = Z_tmp_1;
+ Register size = Z_tmp_1;
+ Register offset = Z_tmp_2;
+ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::flags_offset());
+ __ get_cache_and_index_at_bcp(cache, offset, 1, index_size);
+
+ // #args is in rightmost byte of the _flags field.
+ __ z_llgc(size, Address(cache, offset, flags_offset+(sizeof(size_t)-1)));
+ __ z_sllg(size, size, Interpreter::logStackElementSize); // Each argument size in bytes.
+ __ z_agr(Z_esp, size); // Pop arguments.
+ __ dispatch_next(state, step);
+
+ BLOCK_COMMENT("} return_entry");
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for (TosState state,
+ int step) {
+ address entry = __ pc();
+
+ BLOCK_COMMENT("deopt_entry {");
+
+ // TODO(ZASM): necessary? NULL last_sp until next java call
+ // __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+
+ // Handle exceptions.
+ {
+ Label L;
+ __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception));
+ __ z_bre(L);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_pending_exception));
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+ __ dispatch_next(state, step);
+
+ BLOCK_COMMENT("} deopt_entry");
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for (TosState state,
+ address runtime_entry) {
+ address entry = __ pc();
+ __ push(state);
+ __ call_VM(noreg, runtime_entry);
+ __ dispatch_via(vtos, Interpreter::_normal_table.table_for (vtos));
+ return entry;
+}
+
+//
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+// Increment invocation count & check for overflow.
+//
+// Note: checking for negative value instead of overflow
+// so we have a 'sticky' overflow test.
+//
+// Z_ARG2: method (see generate_fixed_frame())
+//
+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
+ Label done;
+ Register method = Z_ARG2; // Generate_fixed_frame() copies Z_method into Z_ARG2.
+ Register m_counters = Z_ARG4;
+
+ BLOCK_COMMENT("counter_incr {");
+
+ // Note: In tiered we increment either counters in method or in MDO depending
+ // if we are profiling or not.
+ if (TieredCompilation) {
+ int increment = InvocationCounter::count_increment;
+ if (ProfileInterpreter) {
+ NearLabel no_mdo;
+ Register mdo = m_counters;
+ // Are we profiling?
+ __ load_and_test_long(mdo, method2_(method, method_data));
+ __ branch_optimized(Assembler::bcondZero, no_mdo);
+ // Increment counter in the MDO.
+ const Address mdo_invocation_counter(mdo, MethodData::invocation_counter_offset() +
+ InvocationCounter::counter_offset());
+ const Address mask(mdo, MethodData::invoke_mask_offset());
+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask,
+ Z_R1_scratch, false, Assembler::bcondZero,
+ overflow);
+ __ z_bru(done);
+ __ bind(no_mdo);
+ }
+
+ // Increment counter in MethodCounters.
+ const Address invocation_counter(m_counters,
+ MethodCounters::invocation_counter_offset() +
+ InvocationCounter::counter_offset());
+ // Get address of MethodCounters object.
+ __ get_method_counters(method, m_counters, done);
+ const Address mask(m_counters, MethodCounters::invoke_mask_offset());
+ __ increment_mask_and_jump(invocation_counter,
+ increment, mask,
+ Z_R1_scratch, false, Assembler::bcondZero,
+ overflow);
+ } else {
+ Register counter_sum = Z_ARG3; // The result of this piece of code.
+ Register tmp = Z_R1_scratch;
+#ifdef ASSERT
+ {
+ NearLabel ok;
+ __ get_method(tmp);
+ __ compare64_and_branch(method, tmp, Assembler::bcondEqual, ok);
+ __ z_illtrap(0x66);
+ __ bind(ok);
+ }
+#endif
+
+ // Get address of MethodCounters object.
+ __ get_method_counters(method, m_counters, done);
+ // Update standard invocation counters.
+ __ increment_invocation_counter(m_counters, counter_sum);
+ if (ProfileInterpreter) {
+ __ add2mem_32(Address(m_counters, MethodCounters::interpreter_invocation_counter_offset()), 1, tmp);
+ if (profile_method != NULL) {
+ const Address profile_limit(m_counters, MethodCounters::interpreter_profile_limit_offset());
+ __ z_cl(counter_sum, profile_limit);
+ __ branch_optimized(Assembler::bcondLow, *profile_method_continue);
+ // If no method data exists, go to profile_method.
+ __ test_method_data_pointer(tmp, *profile_method);
+ }
+ }
+
+ const Address invocation_limit(m_counters, MethodCounters::interpreter_invocation_limit_offset());
+ __ z_cl(counter_sum, invocation_limit);
+ __ branch_optimized(Assembler::bcondNotLow, *overflow);
+ }
+
+ __ bind(done);
+
+ BLOCK_COMMENT("} counter_incr");
+}
+
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+ // InterpreterRuntime::frequency_counter_overflow takes two
+ // arguments, the first (thread) is passed by call_VM, the second
+ // indicates if the counter overflow occurs at a backwards branch
+ // (NULL bcp). We pass zero for it. The call returns the address
+ // of the verified entry point for the method or NULL if the
+ // compilation did not complete (either went background or bailed
+ // out).
+ __ clear_reg(Z_ARG2);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
+ Z_ARG2);
+ __ z_bru(do_continue);
+}
+
+void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_size, Register tmp1) {
+ Register tmp2 = Z_R1_scratch;
+ const int page_size = os::vm_page_size();
+ NearLabel after_frame_check;
+
+ BLOCK_COMMENT("counter_overflow {");
+
+ assert_different_registers(frame_size, tmp1);
+
+ // Stack banging is sufficient overflow check if frame_size < page_size.
+ if (Immediate::is_uimm(page_size, 15)) {
+ __ z_chi(frame_size, page_size);
+ __ z_brl(after_frame_check);
+ } else {
+ __ load_const_optimized(tmp1, page_size);
+ __ compareU32_and_branch(frame_size, tmp1, Assembler::bcondLow, after_frame_check);
+ }
+
+ // Get the stack base, and in debug, verify it is non-zero.
+ __ z_lg(tmp1, thread_(stack_base));
+#ifdef ASSERT
+ address reentry = NULL;
+ NearLabel base_not_zero;
+ __ compareU64_and_branch(tmp1, (intptr_t)0L, Assembler::bcondNotEqual, base_not_zero);
+ reentry = __ stop_chain_static(reentry, "stack base is zero in generate_stack_overflow_check");
+ __ bind(base_not_zero);
+#endif
+
+ // Get the stack size, and in debug, verify it is non-zero.
+ assert(sizeof(size_t) == sizeof(intptr_t), "wrong load size");
+ __ z_lg(tmp2, thread_(stack_size));
+#ifdef ASSERT
+ NearLabel size_not_zero;
+ __ compareU64_and_branch(tmp2, (intptr_t)0L, Assembler::bcondNotEqual, size_not_zero);
+ reentry = __ stop_chain_static(reentry, "stack size is zero in generate_stack_overflow_check");
+ __ bind(size_not_zero);
+#endif
+
+ // Compute the beginning of the protected zone minus the requested frame size.
+ __ z_sgr(tmp1, tmp2);
+ __ add2reg(tmp1, JavaThread::stack_guard_zone_size());
+
+ // Add in the size of the frame (which is the same as subtracting it from the
+ // SP, which would take another register.
+ __ z_agr(tmp1, frame_size);
+
+ // The frame is greater than one page in size, so check against
+ // the bottom of the stack.
+ __ compareU64_and_branch(Z_SP, tmp1, Assembler::bcondHigh, after_frame_check);
+
+ // The stack will overflow, throw an exception.
+
+ // Restore SP to sender's sp. This is necessary if the sender's frame is an
+ // extended compiled frame (see gen_c2i_adapter()) and safer anyway in case of
+ // JSR292 adaptations.
+ __ resize_frame_absolute(Z_R10, tmp1, true/*load_fp*/);
+
+ // Note also that the restored frame is not necessarily interpreted.
+ // Use the shared runtime version of the StackOverflowError.
+ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+ AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry());
+ __ load_absolute_address(tmp1, StubRoutines::throw_StackOverflowError_entry());
+ __ z_br(tmp1);
+
+ // If you get to here, then there is enough stack space.
+ __ bind(after_frame_check);
+
+ BLOCK_COMMENT("} counter_overflow");
+}
+
+// Allocate monitor and lock method (asm interpreter).
+//
+// Args:
+// Z_locals: locals
+
+void TemplateInterpreterGenerator::lock_method(void) {
+
+ BLOCK_COMMENT("lock_method {");
+
+ // Synchronize method.
+ const Register method = Z_tmp_2;
+ __ get_method(method);
+
+#ifdef ASSERT
+ address reentry = NULL;
+ {
+ Label L;
+ __ testbit(method2_(method, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+ __ z_btrue(L);
+ reentry = __ stop_chain_static(reentry, "method doesn't need synchronization");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+ // Get synchronization object.
+ const Register object = Z_tmp_2;
+
+ {
+ Label done;
+ Label static_method;
+
+ __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT);
+ __ z_btrue(static_method);
+
+ // non-static method: Load receiver obj from stack.
+ __ mem2reg_opt(object, Address(Z_locals, Interpreter::local_offset_in_bytes(0)));
+ __ z_bru(done);
+
+ __ bind(static_method);
+
+ // Lock the java mirror.
+ __ load_mirror(object, method);
+#ifdef ASSERT
+ {
+ NearLabel L;
+ __ compare64_and_branch(object, (intptr_t) 0, Assembler::bcondNotEqual, L);
+ reentry = __ stop_chain_static(reentry, "synchronization object is NULL");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+ __ bind(done);
+ }
+
+ __ add_monitor_to_stack(true, Z_ARG3, Z_ARG4, Z_ARG5); // Allocate monitor elem.
+ // Store object and lock it.
+ __ get_monitors(Z_tmp_1);
+ __ reg2mem_opt(object, Address(Z_tmp_1, BasicObjectLock::obj_offset_in_bytes()));
+ __ lock_object(Z_tmp_1, object);
+
+ BLOCK_COMMENT("} lock_method");
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+//
+// Registers alive
+// Z_thread - JavaThread*
+// Z_SP - old stack pointer
+// Z_method - callee's method
+// Z_esp - parameter list (slot 'above' last param)
+// Z_R14 - return pc, to be stored in caller's frame
+// Z_R10 - sender sp, note: Z_tmp_1 is Z_R10!
+//
+// Registers updated
+// Z_SP - new stack pointer
+// Z_esp - callee's operand stack pointer
+// points to the slot above the value on top
+// Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+// Z_bcp - the bytecode pointer
+// Z_fp - the frame pointer, thereby killing Z_method
+// Z_ARG2 - copy of Z_method
+//
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+
+ // stack layout
+ //
+ // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (see note below)
+ // [F1's operand stack (unused)]
+ // [F1's outgoing Java arguments] <-- Z_esp
+ // [F1's operand stack (non args)]
+ // [monitors] (optional)
+ // [IJAVA_STATE]
+ //
+ // F2 [PARENT_IJAVA_FRAME_ABI]
+ // ...
+ //
+ // 0x000
+ //
+ // Note: Z_R10, the sender sp, will be below Z_SP if F1 was extended by a c2i adapter.
+
+ //=============================================================================
+ // Allocate space for locals other than the parameters, the
+ // interpreter state, monitors, and the expression stack.
+
+ const Register local_count = Z_ARG5;
+ const Register fp = Z_tmp_2;
+
+ BLOCK_COMMENT("generate_fixed_frame {");
+
+ {
+ // local registers
+ const Register top_frame_size = Z_ARG2;
+ const Register sp_after_resize = Z_ARG3;
+ const Register max_stack = Z_ARG4;
+
+ // local_count = method->constMethod->max_locals();
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ z_llgh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_locals_offset()));
+
+ if (native_call) {
+ // If we're calling a native method, we replace max_stack (which is
+ // zero) with space for the worst-case signature handler varargs
+ // vector, which is:
+ // max_stack = max(Argument::n_register_parameters, parameter_count+2);
+ //
+ // We add two slots to the parameter_count, one for the jni
+ // environment and one for a possible native mirror. We allocate
+ // space for at least the number of ABI registers, even though
+ // InterpreterRuntime::slow_signature_handler won't write more than
+ // parameter_count+2 words when it creates the varargs vector at the
+ // top of the stack. The generated slow signature handler will just
+ // load trash into registers beyond the necessary number. We're
+ // still going to cut the stack back by the ABI register parameter
+ // count so as to get SP+16 pointing at the ABI outgoing parameter
+ // area, so we need to allocate at least that much even though we're
+ // going to throw it away.
+ //
+
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ z_llgh(max_stack, Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset()));
+ __ add2reg(max_stack, 2);
+
+ NearLabel passing_args_on_stack;
+
+ // max_stack in bytes
+ __ z_sllg(max_stack, max_stack, LogBytesPerWord);
+
+ int argument_registers_in_bytes = Argument::n_register_parameters << LogBytesPerWord;
+ __ compare64_and_branch(max_stack, argument_registers_in_bytes, Assembler::bcondNotLow, passing_args_on_stack);
+
+ __ load_const_optimized(max_stack, argument_registers_in_bytes);
+
+ __ bind(passing_args_on_stack);
+ } else {
+ // !native_call
+ __ z_lg(max_stack, method_(const));
+
+ // Calculate number of non-parameter locals (in slots):
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ z_sh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset()));
+
+ // max_stack = method->max_stack();
+ __ z_llgh(max_stack, Address(max_stack, ConstMethod::max_stack_offset()));
+ // max_stack in bytes
+ __ z_sllg(max_stack, max_stack, LogBytesPerWord);
+ }
+
+ // Resize (i.e. normally shrink) the top frame F1 ...
+ // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10
+ // F1's operand stack (free)
+ // ...
+ // F1's operand stack (free) <-- Z_esp
+ // F1's outgoing Java arg m
+ // ...
+ // F1's outgoing Java arg 0
+ // ...
+ //
+ // ... into a parent frame (Z_R10 holds F1's SP before any modification, see also above)
+ //
+ // +......................+
+ // : : <-- Z_R10, saved below as F0's z_ijava_state.sender_sp
+ // : :
+ // F1 [PARENT_IJAVA_FRAME_ABI] <-- Z_SP \
+ // F0's non arg local | = delta
+ // ... |
+ // F0's non arg local <-- Z_esp /
+ // F1's outgoing Java arg m
+ // ...
+ // F1's outgoing Java arg 0
+ // ...
+ //
+ // then push the new top frame F0.
+ //
+ // F0 [TOP_IJAVA_FRAME_ABI] = frame::z_top_ijava_frame_abi_size \
+ // [operand stack] = max_stack | = top_frame_size
+ // [IJAVA_STATE] = frame::z_ijava_state_size /
+
+ // sp_after_resize = Z_esp - delta
+ //
+ // delta = PARENT_IJAVA_FRAME_ABI + (locals_count - params_count)
+
+ __ add2reg(sp_after_resize, (Interpreter::stackElementSize) - (frame::z_parent_ijava_frame_abi_size), Z_esp);
+ __ z_sllg(Z_R0_scratch, local_count, LogBytesPerWord); // Params have already been subtracted from local_count.
+ __ z_slgr(sp_after_resize, Z_R0_scratch);
+
+ // top_frame_size = TOP_IJAVA_FRAME_ABI + max_stack + size of interpreter state
+ __ add2reg(top_frame_size,
+ frame::z_top_ijava_frame_abi_size +
+ frame::z_ijava_state_size +
+ frame::interpreter_frame_monitor_size() * wordSize,
+ max_stack);
+
+ // Check if there's room for the new frame...
+ Register frame_size = max_stack; // Reuse the regiser for max_stack.
+ __ z_lgr(frame_size, Z_SP);
+ __ z_sgr(frame_size, sp_after_resize);
+ __ z_agr(frame_size, top_frame_size);
+ generate_stack_overflow_check(frame_size, fp/*tmp1*/);
+
+ DEBUG_ONLY(__ z_cg(Z_R14, _z_abi16(return_pc), Z_SP));
+ __ asm_assert_eq("killed Z_R14", 0);
+ __ resize_frame_absolute(sp_after_resize, fp, true);
+ __ save_return_pc(Z_R14);
+
+ // ... and push the new frame F0.
+ __ push_frame(top_frame_size, fp, true /*copy_sp*/, false);
+ }
+
+ //=============================================================================
+ // Initialize the new frame F0: initialize interpreter state.
+
+ {
+ // locals
+ const Register local_addr = Z_ARG4;
+
+ BLOCK_COMMENT("generate_fixed_frame: initialize interpreter state {");
+
+#ifdef ASSERT
+ // Set the magic number (using local_addr as tmp register).
+ __ load_const_optimized(local_addr, frame::z_istate_magic_number);
+ __ z_stg(local_addr, _z_ijava_state_neg(magic), fp);
+#endif
+
+ // Save sender SP from F1 (i.e. before it was potentially modified by an
+ // adapter) into F0's interpreter state. We us it as well to revert
+ // resizing the frame above.
+ __ z_stg(Z_R10, _z_ijava_state_neg(sender_sp), fp);
+
+ // Load cp cache and save it at the and of this block.
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstMethod::constants_offset()));
+ __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstantPool::cache_offset_in_bytes()));
+
+ // z_ijava_state->method = method;
+ __ z_stg(Z_method, _z_ijava_state_neg(method), fp);
+
+ // Point locals at the first argument. Method's locals are the
+ // parameters on top of caller's expression stack.
+ // Tos points past last Java argument.
+
+ __ z_lg(Z_locals, Address(Z_method, Method::const_offset()));
+ __ z_llgh(Z_locals /*parameter_count words*/,
+ Address(Z_locals, ConstMethod::size_of_parameters_offset()));
+ __ z_sllg(Z_locals /*parameter_count bytes*/, Z_locals /*parameter_count*/, LogBytesPerWord);
+ __ z_agr(Z_locals, Z_esp);
+ // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0)
+ // z_ijava_state->locals = Z_esp + parameter_count bytes
+ __ z_stg(Z_locals, _z_ijava_state_neg(locals), fp);
+
+ // z_ijava_state->oop_temp = NULL;
+ __ store_const(Address(fp, oop_tmp_offset), 0);
+
+ // Initialize z_ijava_state->mdx.
+ Register Rmdp = Z_bcp;
+ // native_call: assert that mdo == NULL
+ const bool check_for_mdo = !native_call DEBUG_ONLY(|| native_call);
+ if (ProfileInterpreter && check_for_mdo) {
+#ifdef FAST_DISPATCH
+ // FAST_DISPATCH and ProfileInterpreter are mutually exclusive since
+ // they both use I2.
+ assert(0, "FAST_DISPATCH and +ProfileInterpreter are mutually exclusive");
+#endif // FAST_DISPATCH
+ Label get_continue;
+
+ __ load_and_test_long(Rmdp, method_(method_data));
+ __ z_brz(get_continue);
+ DEBUG_ONLY(if (native_call) __ stop("native methods don't have a mdo"));
+ __ add2reg(Rmdp, in_bytes(MethodData::data_offset()));
+ __ bind(get_continue);
+ }
+ __ z_stg(Rmdp, _z_ijava_state_neg(mdx), fp);
+
+ // Initialize z_ijava_state->bcp and Z_bcp.
+ if (native_call) {
+ __ clear_reg(Z_bcp); // Must initialize. Will get written into frame where GC reads it.
+ } else {
+ __ z_lg(Z_bcp, method_(const));
+ __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset()));
+ }
+ __ z_stg(Z_bcp, _z_ijava_state_neg(bcp), fp);
+
+ // no monitors and empty operand stack
+ // => z_ijava_state->monitors points to the top slot in IJAVA_STATE.
+ // => Z_ijava_state->esp points one slot above into the operand stack.
+ // z_ijava_state->monitors = fp - frame::z_ijava_state_size - Interpreter::stackElementSize;
+ // z_ijava_state->esp = Z_esp = z_ijava_state->monitors;
+ __ add2reg(Z_esp, -frame::z_ijava_state_size, fp);
+ __ z_stg(Z_esp, _z_ijava_state_neg(monitors), fp);
+ __ add2reg(Z_esp, -Interpreter::stackElementSize);
+ __ z_stg(Z_esp, _z_ijava_state_neg(esp), fp);
+
+ // z_ijava_state->cpoolCache = Z_R1_scratch (see load above);
+ __ z_stg(Z_R1_scratch, _z_ijava_state_neg(cpoolCache), fp);
+
+ // Get mirror and store it in the frame as GC root for this Method*.
+ __ load_mirror(Z_R1_scratch, Z_method);
+ __ z_stg(Z_R1_scratch, _z_ijava_state_neg(mirror), fp);
+
+ BLOCK_COMMENT("} generate_fixed_frame: initialize interpreter state");
+
+ //=============================================================================
+ if (!native_call) {
+ // Fill locals with 0x0s.
+ NearLabel locals_zeroed;
+ NearLabel doXC;
+
+ // Local_count is already num_locals_slots - num_param_slots.
+ __ compare64_and_branch(local_count, (intptr_t)0L, Assembler::bcondNotHigh, locals_zeroed);
+
+ // Advance local_addr to point behind locals (creates positive incr. in loop).
+ __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset()));
+ __ z_llgh(Z_R0_scratch,
+ Address(Z_R1_scratch, ConstMethod::size_of_locals_offset()));
+ if (Z_R0_scratch == Z_R0) {
+ __ z_aghi(Z_R0_scratch, -1);
+ } else {
+ __ add2reg(Z_R0_scratch, -1);
+ }
+ __ z_lgr(local_addr/*locals*/, Z_locals);
+ __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogBytesPerWord);
+ __ z_sllg(local_count, local_count, LogBytesPerWord); // Local_count are non param locals.
+ __ z_sgr(local_addr, Z_R0_scratch);
+
+ if (VM_Version::has_Prefetch()) {
+ __ z_pfd(0x02, 0, Z_R0, local_addr);
+ __ z_pfd(0x02, 256, Z_R0, local_addr);
+ }
+
+ // Can't optimise for Z10 using "compare and branch" (immediate value is too big).
+ __ z_cghi(local_count, 256);
+ __ z_brnh(doXC);
+
+ // MVCLE: Initialize if quite a lot locals.
+ // __ bind(doMVCLE);
+ __ z_lgr(Z_R0_scratch, local_addr);
+ __ z_lgr(Z_R1_scratch, local_count);
+ __ clear_reg(Z_ARG2); // Src len of MVCLE is zero.
+
+ __ MacroAssembler::move_long_ext(Z_R0_scratch, Z_ARG1, 0);
+ __ z_bru(locals_zeroed);
+
+ Label XC_template;
+ __ bind(XC_template);
+ __ z_xc(0, 0, local_addr, 0, local_addr);
+
+ __ bind(doXC);
+ __ z_bctgr(local_count, Z_R0); // Get #bytes-1 for EXECUTE.
+ if (VM_Version::has_ExecuteExtensions()) {
+ __ z_exrl(local_count, XC_template); // Execute XC with variable length.
+ } else {
+ __ z_larl(Z_R1_scratch, XC_template);
+ __ z_ex(local_count, 0, Z_R0, Z_R1_scratch); // Execute XC with variable length.
+ }
+
+ __ bind(locals_zeroed);
+ }
+
+ }
+ // Finally set the frame pointer, destroying Z_method.
+ assert(Z_fp == Z_method, "maybe set Z_fp earlier if other register than Z_method");
+ // Oprofile analysis suggests to keep a copy in a register to be used by
+ // generate_counter_incr().
+ __ z_lgr(Z_ARG2, Z_method);
+ __ z_lgr(Z_fp, fp);
+
+ BLOCK_COMMENT("} generate_fixed_frame");
+}
+
+// Various method entries
+
+// Math function, frame manager must set up an interpreter state, etc.
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+
+ if (!InlineIntrinsics) { return NULL; } // Generate a vanilla entry.
+
+ // Only support absolute value and square root.
+ if (kind != Interpreter::java_lang_math_abs && kind != Interpreter::java_lang_math_sqrt) {
+ return NULL;
+ }
+
+ BLOCK_COMMENT("math_entry {");
+
+ address math_entry = __ pc();
+
+ if (kind == Interpreter::java_lang_math_abs) {
+ // Load operand from stack.
+ __ mem2freg_opt(Z_FRET, Address(Z_esp, Interpreter::stackElementSize));
+ __ z_lpdbr(Z_FRET);
+ } else {
+ // sqrt
+ // Can use memory operand directly.
+ __ z_sqdb(Z_FRET, Interpreter::stackElementSize, Z_esp);
+ }
+
+ // Restore caller sp for c2i case.
+ __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+ // We are done, return.
+ __ z_br(Z_R14);
+
+ BLOCK_COMMENT("} math_entry");
+
+ return math_entry;
+}
+
+// Interpreter stub for calling a native method. (asm interpreter).
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+ // Determine code generation flags.
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // Interpreter entry for ordinary Java methods.
+ //
+ // Registers alive
+ // Z_SP - stack pointer
+ // Z_thread - JavaThread*
+ // Z_method - callee's method (method to be invoked)
+ // Z_esp - operand (or expression) stack pointer of caller. one slot above last arg.
+ // Z_R10 - sender sp (before modifications, e.g. by c2i adapter
+ // and as well by generate_fixed_frame below)
+ // Z_R14 - return address to caller (call_stub or c2i_adapter)
+ //
+ // Registers updated
+ // Z_SP - stack pointer
+ // Z_fp - callee's framepointer
+ // Z_esp - callee's operand stack pointer
+ // points to the slot above the value on top
+ // Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+ // Z_tos - integer result, if any
+ // z_ftos - floating point result, if any
+ //
+ // Stack layout at this point:
+ //
+ // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if
+ // frame was extended by c2i adapter)
+ // [outgoing Java arguments] <-- Z_esp
+ // ...
+ // PARENT [PARENT_IJAVA_FRAME_ABI]
+ // ...
+ //
+
+ address entry_point = __ pc();
+
+ // Make sure registers are different!
+ assert_different_registers(Z_thread, Z_method, Z_esp);
+
+ BLOCK_COMMENT("native_entry {");
+
+ // Make sure method is native and not abstract.
+#ifdef ASSERT
+ address reentry = NULL;
+ { Label L;
+ __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT);
+ __ z_btrue(L);
+ reentry = __ stop_chain_static(reentry, "tried to execute non-native method as native");
+ __ bind(L);
+ }
+ { Label L;
+ __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT);
+ __ z_bfalse(L);
+ reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+#ifdef ASSERT
+ // Save the return PC into the callers frame for assertion in generate_fixed_frame.
+ __ save_return_pc(Z_R14);
+#endif
+
+ // Generate the code to allocate the interpreter stack frame.
+ generate_fixed_frame(true);
+
+ const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset());
+ // Since at this point in the method invocation the exception handler
+ // would try to exit the monitor of synchronized methods which hasn't
+ // been entered yet, we set the thread local variable
+ // _do_not_unlock_if_synchronized to true. If any exception was thrown by
+ // runtime, exception handling i.e. unlock_if_synchronized_method will
+ // check this thread local flag.
+ __ z_mvi(do_not_unlock_if_synchronized, true);
+
+ // Increment invocation count and check for overflow.
+ NearLabel invocation_counter_overflow;
+ if (inc_counter) {
+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+ }
+
+ Label continue_after_compile;
+ __ bind(continue_after_compile);
+
+ bang_stack_shadow_pages(true);
+
+ // Reset the _do_not_unlock_if_synchronized flag.
+ __ z_mvi(do_not_unlock_if_synchronized, false);
+
+ // Check for synchronized methods.
+ // This mst happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ if (synchronized) {
+ lock_method();
+ } else {
+ // No synchronization necessary.
+#ifdef ASSERT
+ { Label L;
+ __ get_method(Z_R1_scratch);
+ __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+ __ z_bfalse(L);
+ reentry = __ stop_chain_static(reentry, "method needs synchronization");
+ __ bind(L);
+ }
+#endif // ASSERT
+ }
+
+ // start execution
+
+ // jvmti support
+ __ notify_method_entry();
+
+ //=============================================================================
+ // Get and call the signature handler.
+ const Register Rmethod = Z_tmp_2;
+ const Register signature_handler_entry = Z_tmp_1;
+ const Register Rresult_handler = Z_tmp_3;
+ Label call_signature_handler;
+
+ assert_different_registers(Z_fp, Rmethod, signature_handler_entry, Rresult_handler);
+ assert(Rresult_handler->is_nonvolatile(), "Rresult_handler must be in a non-volatile register");
+
+ // Reload method.
+ __ get_method(Rmethod);
+
+ // Check for signature handler.
+ __ load_and_test_long(signature_handler_entry, method2_(Rmethod, signature_handler));
+ __ z_brne(call_signature_handler);
+
+ // Method has never been called. Either generate a specialized
+ // handler or point to the slow one.
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call),
+ Rmethod);
+
+ // Reload method.
+ __ get_method(Rmethod);
+
+ // Reload signature handler, it must have been created/assigned in the meantime.
+ __ z_lg(signature_handler_entry, method2_(Rmethod, signature_handler));
+
+ __ bind(call_signature_handler);
+
+ // We have a TOP_IJAVA_FRAME here, which belongs to us.
+ __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1/*tmp*/);
+
+ // Call signature handler and pass locals address in Z_ARG1.
+ __ z_lgr(Z_ARG1, Z_locals);
+ __ call_stub(signature_handler_entry);
+ // Save result handler returned by signature handler.
+ __ z_lgr(Rresult_handler, Z_RET);
+
+ // Reload method (the slow signature handler may block for GC).
+ __ get_method(Rmethod);
+
+ // Pass mirror handle if static call.
+ {
+ Label method_is_not_static;
+ __ testbit(method2_(Rmethod, access_flags), JVM_ACC_STATIC_BIT);
+ __ z_bfalse(method_is_not_static);
+ // Get mirror.
+ __ load_mirror(Z_R1, Rmethod);
+ // z_ijava_state.oop_temp = pool_holder->klass_part()->java_mirror();
+ __ z_stg(Z_R1, oop_tmp_offset, Z_fp);
+ // Pass handle to mirror as 2nd argument to JNI method.
+ __ add2reg(Z_ARG2, oop_tmp_offset, Z_fp);
+ __ bind(method_is_not_static);
+ }
+
+ // Pass JNIEnv address as first parameter.
+ __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
+
+ // Note: last java frame has been set above already. The pc from there
+ // is precise enough.
+
+ // Get native function entry point before we change the thread state.
+ __ z_lg(Z_R1/*native_method_entry*/, method2_(Rmethod, native_function));
+
+ //=============================================================================
+ // Transition from _thread_in_Java to _thread_in_native. As soon as
+ // we make this change the safepoint code needs to be certain that
+ // the last Java frame we established is good. The pc in that frame
+ // just need to be near here not an actual return address.
+#ifdef ASSERT
+ {
+ NearLabel L;
+ __ mem2reg_opt(Z_R14, Address(Z_thread, JavaThread::thread_state_offset()), false /*32 bits*/);
+ __ compareU32_and_branch(Z_R14, _thread_in_Java, Assembler::bcondEqual, L);
+ reentry = __ stop_chain_static(reentry, "Wrong thread state in native stub");
+ __ bind(L);
+ }
+#endif
+
+ // Memory ordering: Z does not reorder store/load with subsequent load. That's strong enough.
+ __ set_thread_state(_thread_in_native);
+
+ //=============================================================================
+ // Call the native method. Argument registers must not have been
+ // overwritten since "__ call_stub(signature_handler);" (except for
+ // ARG1 and ARG2 for static methods).
+
+ __ call_c(Z_R1/*native_method_entry*/);
+
+ // NOTE: frame::interpreter_frame_result() depends on these stores.
+ __ z_stg(Z_RET, _z_ijava_state_neg(lresult), Z_fp);
+ __ freg2mem_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult)));
+ const Register Rlresult = signature_handler_entry;
+ assert(Rlresult->is_nonvolatile(), "Rlresult must be in a non-volatile register");
+ __ z_lgr(Rlresult, Z_RET);
+
+ // Z_method may no longer be valid, because of GC.
+
+ // Block, if necessary, before resuming in _thread_in_Java state.
+ // In order for GC to work, don't clear the last_Java_sp until after
+ // blocking.
+
+ //=============================================================================
+ // Switch thread to "native transition" state before reading the
+ // synchronization state. This additional state is necessary
+ // because reading and testing the synchronization state is not
+ // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
+ // in _thread_in_native state, loads _not_synchronized and is
+ // preempted. VM thread changes sync state to synchronizing and
+ // suspends threads for GC. Thread A is resumed to finish this
+ // native method, but doesn't block here since it didn't see any
+ // synchronization is progress, and escapes.
+
+ __ set_thread_state(_thread_in_native_trans);
+ if (UseMembar) {
+ __ z_fence();
+ } else {
+ // Write serialization page so VM thread can do a pseudo remote
+ // membar. We use the current thread pointer to calculate a thread
+ // specific offset to write to within the page. This minimizes bus
+ // traffic due to cache line collision.
+ __ serialize_memory(Z_thread, Z_R1, Z_R0);
+ }
+ // Now before we return to java we must look for a current safepoint
+ // (a new safepoint can not start since we entered native_trans).
+ // We must check here because a current safepoint could be modifying
+ // the callers registers right this moment.
+
+ // Check for safepoint operation in progress and/or pending suspend requests.
+ {
+ Label Continue, do_safepoint;
+ __ generate_safepoint_check(do_safepoint, Z_R1, true);
+ // Check for suspend.
+ __ load_and_test_int(Z_R0/*suspend_flags*/, thread_(suspend_flags));
+ __ z_bre(Continue); // 0 -> no flag set -> not suspended
+ __ bind(do_safepoint);
+ __ z_lgr(Z_ARG1, Z_thread);
+ __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+ __ bind(Continue);
+ }
+
+ //=============================================================================
+ // Back in Interpreter Frame.
+
+ // We are in thread_in_native_trans here and back in the normal
+ // interpreter frame. We don't have to do anything special about
+ // safepoints and we can switch to Java mode anytime we are ready.
+
+ // Note: frame::interpreter_frame_result has a dependency on how the
+ // method result is saved across the call to post_method_exit. For
+ // native methods it assumes that the non-FPU/non-void result is
+ // saved in z_ijava_state.lresult and a FPU result in z_ijava_state.fresult. If
+ // this changes then the interpreter_frame_result implementation
+ // will need to be updated too.
+
+ //=============================================================================
+ // Back in Java.
+
+ // Memory ordering: Z does not reorder store/load with subsequent
+ // load. That's strong enough.
+ __ set_thread_state(_thread_in_Java);
+
+ __ reset_last_Java_frame();
+
+ // We reset the JNI handle block only after unboxing the result; see below.
+
+ // The method register is junk from after the thread_in_native transition
+ // until here. Also can't call_VM until the bcp has been
+ // restored. Need bcp for throwing exception below so get it now.
+ __ get_method(Rmethod);
+
+ // Restore Z_bcp to have legal interpreter frame,
+ // i.e., bci == 0 <=> Z_bcp == code_base().
+ __ z_lg(Z_bcp, Address(Rmethod, Method::const_offset())); // get constMethod
+ __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset())); // get codebase
+
+ if (CheckJNICalls) {
+ // clear_pending_jni_exception_check
+ __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
+ }
+
+ // Check if the native method returns an oop, and if so, move it
+ // from the jni handle to z_ijava_state.oop_temp. This is
+ // necessary, because we reset the jni handle block below.
+ // NOTE: frame::interpreter_frame_result() depends on this, too.
+ { NearLabel no_oop_result, store_oop_result;
+ __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT));
+ __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_result);
+ __ compareU64_and_branch(Rlresult, (intptr_t)0L, Assembler::bcondEqual, store_oop_result);
+ __ z_lg(Rlresult, 0, Rlresult); // unbox
+ __ bind(store_oop_result);
+ __ z_stg(Rlresult, oop_tmp_offset, Z_fp);
+ __ verify_oop(Rlresult);
+ __ bind(no_oop_result);
+ }
+
+ // Reset handle block.
+ __ z_lg(Z_R1/*active_handles*/, thread_(active_handles));
+ __ clear_mem(Address(Z_R1, JNIHandleBlock::top_offset_in_bytes()), 4);
+
+ // Bandle exceptions (exception handling will handle unlocking!).
+ {
+ Label L;
+ __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception));
+ __ z_bre(L);
+ __ MacroAssembler::call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_pending_exception));
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+
+ if (synchronized) {
+ Register Rfirst_monitor = Z_ARG2;
+ __ add2reg(Rfirst_monitor, -(frame::z_ijava_state_size + (int)sizeof(BasicObjectLock)), Z_fp);
+#ifdef ASSERT
+ NearLabel ok;
+ __ z_lg(Z_R1, _z_ijava_state_neg(monitors), Z_fp);
+ __ compareU64_and_branch(Rfirst_monitor, Z_R1, Assembler::bcondEqual, ok);
+ reentry = __ stop_chain_static(reentry, "native_entry:unlock: inconsistent z_ijava_state.monitors");
+ __ bind(ok);
+#endif
+ __ unlock_object(Rfirst_monitor);
+ }
+
+ // JVMTI support. Result has already been saved above to the frame.
+ __ notify_method_exit(true/*native_method*/, ilgl, InterpreterMacroAssembler::NotifyJVMTI);
+
+ // Move native method result back into proper registers and return.
+ // C++ interpreter does not use result handler. So do we need to here? TODO(ZASM): check if correct.
+ { NearLabel no_oop_or_null;
+ __ mem2freg_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult)));
+ __ load_and_test_long(Z_RET, Address(Z_fp, _z_ijava_state_neg(lresult)));
+ __ z_bre(no_oop_or_null); // No unboxing if the result is NULL.
+ __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT));
+ __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_or_null);
+ __ z_lg(Z_RET, oop_tmp_offset, Z_fp);
+ __ verify_oop(Z_RET);
+ __ bind(no_oop_or_null);
+ }
+
+ // Pop the native method's interpreter frame.
+ __ pop_interpreter_frame(Z_R14 /*return_pc*/, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/);
+
+ // Return to caller.
+ __ z_br(Z_R14);
+
+ if (inc_counter) {
+ // Handle overflow of counter and compile method.
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(continue_after_compile);
+ }
+
+ BLOCK_COMMENT("} native_entry");
+
+ return entry_point;
+}
+
+//
+// Generic interpreted method entry to template interpreter.
+//
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+ address entry_point = __ pc();
+
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // Interpreter entry for ordinary Java methods.
+ //
+ // Registers alive
+ // Z_SP - stack pointer
+ // Z_thread - JavaThread*
+ // Z_method - callee's method (method to be invoked)
+ // Z_esp - operand (or expression) stack pointer of caller. one slot above last arg.
+ // Z_R10 - sender sp (before modifications, e.g. by c2i adapter
+ // and as well by generate_fixed_frame below)
+ // Z_R14 - return address to caller (call_stub or c2i_adapter)
+ //
+ // Registers updated
+ // Z_SP - stack pointer
+ // Z_fp - callee's framepointer
+ // Z_esp - callee's operand stack pointer
+ // points to the slot above the value on top
+ // Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord)
+ // Z_tos - integer result, if any
+ // z_ftos - floating point result, if any
+ //
+ //
+ // stack layout at this point:
+ //
+ // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if
+ // frame was extended by c2i adapter)
+ // [outgoing Java arguments] <-- Z_esp
+ // ...
+ // PARENT [PARENT_IJAVA_FRAME_ABI]
+ // ...
+ //
+ // stack layout before dispatching the first bytecode:
+ //
+ // F0 [TOP_IJAVA_FRAME_ABI] <-- Z_SP
+ // [operand stack] <-- Z_esp
+ // monitor (optional, can grow)
+ // [IJAVA_STATE]
+ // F1 [PARENT_IJAVA_FRAME_ABI] <-- Z_fp (== *Z_SP)
+ // [F0's locals] <-- Z_locals
+ // [F1's operand stack]
+ // [F1's monitors] (optional)
+ // [IJAVA_STATE]
+
+ // Make sure registers are different!
+ assert_different_registers(Z_thread, Z_method, Z_esp);
+
+ BLOCK_COMMENT("normal_entry {");
+
+ // Make sure method is not native and not abstract.
+ // Rethink these assertions - they can be simplified and shared.
+#ifdef ASSERT
+ address reentry = NULL;
+ { Label L;
+ __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT);
+ __ z_bfalse(L);
+ reentry = __ stop_chain_static(reentry, "tried to execute native method as non-native");
+ __ bind(L);
+ }
+ { Label L;
+ __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT);
+ __ z_bfalse(L);
+ reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+#ifdef ASSERT
+ // Save the return PC into the callers frame for assertion in generate_fixed_frame.
+ __ save_return_pc(Z_R14);
+#endif
+
+ // Generate the code to allocate the interpreter stack frame.
+ generate_fixed_frame(false);
+
+ const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset());
+ // Since at this point in the method invocation the exception handler
+ // would try to exit the monitor of synchronized methods which hasn't
+ // been entered yet, we set the thread local variable
+ // _do_not_unlock_if_synchronized to true. If any exception was thrown by
+ // runtime, exception handling i.e. unlock_if_synchronized_method will
+ // check this thread local flag.
+ __ z_mvi(do_not_unlock_if_synchronized, true);
+
+ __ profile_parameters_type(Z_tmp_2, Z_ARG3, Z_ARG4);
+
+ // Increment invocation counter and check for overflow.
+ //
+ // Note: checking for negative value instead of overflow so we have a 'sticky'
+ // overflow test (may be of importance as soon as we have true MT/MP).
+ NearLabel invocation_counter_overflow;
+ NearLabel profile_method;
+ NearLabel profile_method_continue;
+ NearLabel Lcontinue;
+ if (inc_counter) {
+ generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue);
+ if (ProfileInterpreter) {
+ __ bind(profile_method_continue);
+ }
+ }
+ __ bind(Lcontinue);
+
+ bang_stack_shadow_pages(false);
+
+ // Reset the _do_not_unlock_if_synchronized flag.
+ __ z_mvi(do_not_unlock_if_synchronized, false);
+
+ // Check for synchronized methods.
+ // Must happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ if (synchronized) {
+ // Allocate monitor and lock method.
+ lock_method();
+ } else {
+#ifdef ASSERT
+ { Label L;
+ __ get_method(Z_R1_scratch);
+ __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT);
+ __ z_bfalse(L);
+ reentry = __ stop_chain_static(reentry, "method needs synchronization");
+ __ bind(L);
+ }
+#endif // ASSERT
+ }
+
+ // start execution
+
+#ifdef ASSERT
+ __ verify_esp(Z_esp, Z_R1_scratch);
+
+ __ verify_thread();
+#endif
+
+ // jvmti support
+ __ notify_method_entry();
+
+ // Start executing instructions.
+ __ dispatch_next(vtos);
+ // Dispatch_next does not return.
+ DEBUG_ONLY(__ should_not_reach_here());
+
+ // Invocation counter overflow.
+ if (inc_counter) {
+ if (ProfileInterpreter) {
+ // We have decided to profile this method in the interpreter.
+ __ bind(profile_method);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+ __ set_method_data_pointer_for_bcp();
+ __ z_bru(profile_method_continue);
+ }
+
+ // Handle invocation counter overflow.
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(Lcontinue);
+ }
+
+ BLOCK_COMMENT("} normal_entry");
+
+ return entry_point;
+}
+
+// Method entry for static native methods:
+// int java.util.zip.CRC32.update(int crc, int b)
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
+
+ if (UseCRC32Intrinsics) {
+ uint64_t entry_off = __ offset();
+ Label slow_path;
+
+ // If we need a safepoint check, generate full interpreter entry.
+ __ generate_safepoint_check(slow_path, Z_R1, false);
+
+ BLOCK_COMMENT("CRC32_update {");
+
+ // We don't generate local frame and don't align stack because
+ // we not even call stub code (we generate the code inline)
+ // and there is no safepoint on this path.
+
+ // Load java parameters.
+ // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+ const Register argP = Z_esp;
+ const Register crc = Z_ARG1; // crc value
+ const Register data = Z_ARG2; // address of java byte value (kernel_crc32 needs address)
+ const Register dataLen = Z_ARG3; // source data len (1 byte). Not used because calling the single-byte emitter.
+ const Register table = Z_ARG4; // address of crc32 table
+
+ // Arguments are reversed on java expression stack.
+ __ z_la(data, 3+1*wordSize, argP); // byte value (stack address).
+ // Being passed as an int, the single byte is at offset +3.
+ __ z_llgf(crc, 2 * wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register.
+
+ StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+ __ kernel_crc32_singleByte(crc, data, dataLen, table, Z_R1);
+
+ // Restore caller sp for c2i case.
+ __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+ __ z_br(Z_R14);
+
+ BLOCK_COMMENT("} CRC32_update");
+
+ // Use a previously generated vanilla native entry as the slow path.
+ BIND(slow_path);
+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1);
+ return __ addr_at(entry_off);
+ }
+
+ return NULL;
+}
+
+
+// Method entry for static native methods:
+// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+ if (UseCRC32Intrinsics) {
+ uint64_t entry_off = __ offset();
+ Label slow_path;
+
+ // If we need a safepoint check, generate full interpreter entry.
+ __ generate_safepoint_check(slow_path, Z_R1, false);
+
+ // We don't generate local frame and don't align stack because
+ // we call stub code and there is no safepoint on this path.
+
+ // Load parameters.
+ // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+ const Register argP = Z_esp;
+ const Register crc = Z_ARG1; // crc value
+ const Register data = Z_ARG2; // address of java byte array
+ const Register dataLen = Z_ARG3; // source data len
+ const Register table = Z_ARG4; // address of crc32 table
+ const Register t0 = Z_R10; // work reg for kernel* emitters
+ const Register t1 = Z_R11; // work reg for kernel* emitters
+ const Register t2 = Z_R12; // work reg for kernel* emitters
+ const Register t3 = Z_R13; // work reg for kernel* emitters
+
+ // Arguments are reversed on java expression stack.
+ // Calculate address of start element.
+ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct".
+ // crc @ (SP + 5W) (32bit)
+ // buf @ (SP + 3W) (64bit ptr to long array)
+ // off @ (SP + 2W) (32bit)
+ // dataLen @ (SP + 1W) (32bit)
+ // data = buf + off
+ BLOCK_COMMENT("CRC32_updateByteBuffer {");
+ __ z_llgf(crc, 5*wordSize, argP); // current crc state
+ __ z_lg(data, 3*wordSize, argP); // start of byte buffer
+ __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
+ __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
+ } else { // Used for "updateBytes update".
+ // crc @ (SP + 4W) (32bit)
+ // buf @ (SP + 3W) (64bit ptr to byte array)
+ // off @ (SP + 2W) (32bit)
+ // dataLen @ (SP + 1W) (32bit)
+ // data = buf + off + base_offset
+ BLOCK_COMMENT("CRC32_updateBytes {");
+ __ z_llgf(crc, 4*wordSize, argP); // current crc state
+ __ z_lg(data, 3*wordSize, argP); // start of byte buffer
+ __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
+ __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
+ __ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+ }
+
+ StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
+
+ __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
+ __ z_stmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 to make them available as work registers.
+ __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3);
+ __ z_lmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 back from stack.
+
+ // Restore caller sp for c2i case.
+ __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
+
+ __ z_br(Z_R14);
+
+ BLOCK_COMMENT("} CRC32_update{Bytes|ByteBuffer}");
+
+ // Use a previously generated vanilla native entry as the slow path.
+ BIND(slow_path);
+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1);
+ return __ addr_at(entry_off);
+ }
+
+ return NULL;
+}
+
+// Not supported
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+ return NULL;
+}
+
+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+ // Quick & dirty stack overflow checking: bang the stack & handle trap.
+ // Note that we do the banging after the frame is setup, since the exception
+ // handling code expects to find a valid interpreter frame on the stack.
+ // Doing the banging earlier fails if the caller frame is not an interpreter
+ // frame.
+ // (Also, the exception throwing code expects to unlock any synchronized
+ // method receiver, so do the banging after locking the receiver.)
+
+ // Bang each page in the shadow zone. We can't assume it's been done for
+ // an interpreter frame with greater than a page of locals, so each page
+ // needs to be checked. Only true for non-native. For native, we only bang the last page.
+ if (UseStackBanging) {
+ const int page_size = os::vm_page_size();
+ const int n_shadow_pages = (int)(JavaThread::stack_shadow_zone_size()/page_size);
+ const int start_page_num = native_call ? n_shadow_pages : 1;
+ for (int pages = start_page_num; pages <= n_shadow_pages; pages++) {
+ __ bang_stack_with_offset(pages*page_size);
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+
+ BLOCK_COMMENT("throw_exception {");
+
+ // Entry point in previous activation (i.e., if the caller was interpreted).
+ Interpreter::_rethrow_exception_entry = __ pc();
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Frame accessors use Z_fp.
+ // Z_ARG1 (==Z_tos): exception
+ // Z_ARG2 : Return address/pc that threw exception.
+ __ restore_bcp(); // R13 points to call/send.
+ __ restore_locals();
+
+ // Fallthrough, no need to restore Z_esp.
+
+ // Entry point for exceptions thrown within interpreter code.
+ Interpreter::_throw_exception_entry = __ pc();
+ // Expression stack is undefined here.
+ // Z_ARG1 (==Z_tos): exception
+ // Z_bcp: exception bcp
+ __ verify_oop(Z_ARG1);
+ __ z_lgr(Z_ARG2, Z_ARG1);
+
+ // Expression stack must be empty before entering the VM in case of
+ // an exception.
+ __ empty_expression_stack();
+ // Find exception handler address and preserve exception oop.
+ const Register Rpreserved_exc_oop = Z_tmp_1;
+ __ call_VM(Rpreserved_exc_oop,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception),
+ Z_ARG2);
+ // Z_RET: exception handler entry point
+ // Z_bcp: bcp for exception handler
+ __ push_ptr(Rpreserved_exc_oop); // Push exception which is now the only value on the stack.
+ __ z_br(Z_RET); // Jump to exception handler (may be _remove_activation_entry!).
+
+ // If the exception is not handled in the current frame the frame is
+ // removed and the exception is rethrown (i.e. exception
+ // continuation is _rethrow_exception).
+ //
+ // Note: At this point the bci is still the bci for the instruction
+ // which caused the exception and the expression stack is
+ // empty. Thus, for any VM calls at this point, GC will find a legal
+ // oop map (with empty expression stack).
+
+ //
+ // JVMTI PopFrame support
+ //
+
+ Interpreter::_remove_activation_preserving_args_entry = __ pc();
+ __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP);
+ __ empty_expression_stack();
+ // Set the popframe_processing bit in pending_popframe_condition
+ // indicating that we are currently handling popframe, so that
+ // call_VMs that may happen later do not trigger new popframe
+ // handling cycles.
+ __ load_sized_value(Z_tmp_1, Address(Z_thread, JavaThread::popframe_condition_offset()), 4, false /*signed*/);
+ __ z_oill(Z_tmp_1, JavaThread::popframe_processing_bit);
+ __ z_sty(Z_tmp_1, thread_(popframe_condition));
+
+ {
+ // Check to see whether we are returning to a deoptimized frame.
+ // (The PopFrame call ensures that the caller of the popped frame is
+ // either interpreted or compiled and deoptimizes it if compiled.)
+ // In this case, we can't call dispatch_next() after the frame is
+ // popped, but instead must save the incoming arguments and restore
+ // them after deoptimization has occurred.
+ //
+ // Note that we don't compare the return PC against the
+ // deoptimization blob's unpack entry because of the presence of
+ // adapter frames in C2.
+ NearLabel caller_not_deoptimized;
+ __ z_lg(Z_ARG1, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), Z_ARG1);
+ __ compareU64_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, caller_not_deoptimized);
+
+ // Compute size of arguments for saving when returning to
+ // deoptimized caller.
+ __ get_method(Z_ARG2);
+ __ z_lg(Z_ARG2, Address(Z_ARG2, Method::const_offset()));
+ __ z_llgh(Z_ARG2, Address(Z_ARG2, ConstMethod::size_of_parameters_offset()));
+ __ z_sllg(Z_ARG2, Z_ARG2, Interpreter::logStackElementSize); // slots 2 bytes
+ __ restore_locals();
+ // Compute address of args to be saved.
+ __ z_lgr(Z_ARG3, Z_locals);
+ __ z_slgr(Z_ARG3, Z_ARG2);
+ __ add2reg(Z_ARG3, wordSize);
+ // Save these arguments.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args),
+ Z_thread, Z_ARG2, Z_ARG3);
+
+ __ remove_activation(vtos, Z_R14,
+ /* throw_monitor_exception */ false,
+ /* install_monitor_exception */ false,
+ /* notify_jvmdi */ false);
+
+ // Inform deoptimization that it is responsible for restoring
+ // these arguments.
+ __ store_const(thread_(popframe_condition),
+ JavaThread::popframe_force_deopt_reexecution_bit,
+ Z_tmp_1, false);
+
+ // Continue in deoptimization handler.
+ __ z_br(Z_R14);
+
+ __ bind(caller_not_deoptimized);
+ }
+
+ // Clear the popframe condition flag.
+ __ clear_mem(thread_(popframe_condition), sizeof(int));
+
+ __ remove_activation(vtos,
+ noreg, // Retaddr is not used.
+ false, // throw_monitor_exception
+ false, // install_monitor_exception
+ false); // notify_jvmdi
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+ // The method data pointer was incremented already during
+ // call profiling. We have to restore the mdp for the current bcp.
+ if (ProfileInterpreter) {
+ __ set_method_data_pointer_for_bcp();
+ }
+#if INCLUDE_JVMTI
+ {
+ Label L_done;
+
+ __ z_cli(0, Z_bcp, Bytecodes::_invokestatic);
+ __ z_brc(Assembler::bcondNotEqual, L_done);
+
+ // The member name argument must be restored if _invokestatic is
+ // re-executed after a PopFrame call. Detect such a case in the
+ // InterpreterRuntime function and return the member name
+ // argument, or NULL.
+ __ z_lg(Z_ARG2, Address(Z_locals));
+ __ get_method(Z_ARG3);
+ __ call_VM(Z_tmp_1,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),
+ Z_ARG2, Z_ARG3, Z_bcp);
+
+ __ z_ltgr(Z_tmp_1, Z_tmp_1);
+ __ z_brc(Assembler::bcondEqual, L_done);
+
+ __ z_stg(Z_tmp_1, Address(Z_esp, wordSize));
+ __ bind(L_done);
+ }
+#endif // INCLUDE_JVMTI
+ __ dispatch_next(vtos);
+ // End of PopFrame support.
+ Interpreter::_remove_activation_entry = __ pc();
+
+ // In between activations - previous activation type unknown yet
+ // compute continuation point - the continuation point expects the
+ // following registers set up:
+ //
+ // Z_ARG1 (==Z_tos): exception
+ // Z_ARG2 : return address/pc that threw exception
+
+ Register return_pc = Z_tmp_1;
+ Register handler = Z_tmp_2;
+ assert(return_pc->is_nonvolatile(), "use non-volatile reg. to preserve exception pc");
+ assert(handler->is_nonvolatile(), "use non-volatile reg. to handler pc");
+ __ asm_assert_ijava_state_magic(return_pc/*tmp*/); // The top frame should be an interpreter frame.
+ __ z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp);
+
+ // Moved removing the activation after VM call, because the new top
+ // frame does not necessarily have the z_abi_160 required for a VM
+ // call (e.g. if it is compiled).
+
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+ SharedRuntime::exception_handler_for_return_address),
+ Z_thread, return_pc);
+ __ z_lgr(handler, Z_RET); // Save exception handler.
+
+ // Preserve exception over this code sequence.
+ __ pop_ptr(Z_ARG1);
+ __ set_vm_result(Z_ARG1);
+ // Remove the activation (without doing throws on illegalMonitorExceptions).
+ __ remove_activation(vtos, noreg/*ret.pc already loaded*/, false/*throw exc*/, true/*install exc*/, false/*notify jvmti*/);
+ __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer.
+
+ __ get_vm_result(Z_ARG1); // Restore exception.
+ __ verify_oop(Z_ARG1);
+ __ z_lgr(Z_ARG2, return_pc); // Restore return address.
+
+#ifdef ASSERT
+ // The return_pc in the new top frame is dead... at least that's my
+ // current understanding. To assert this I overwrite it.
+ // Note: for compiled frames the handler is the deopt blob
+ // which writes Z_ARG2 into the return_pc slot.
+ __ load_const_optimized(return_pc, 0xb00b1);
+ __ z_stg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_SP);
+#endif
+
+ // Z_ARG1 (==Z_tos): exception
+ // Z_ARG2 : return address/pc that threw exception
+
+ // Note that an "issuing PC" is actually the next PC after the call.
+ __ z_br(handler); // Jump to exception handler of caller.
+
+ BLOCK_COMMENT("} throw_exception");
+}
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for (TosState state) {
+ address entry = __ pc();
+
+ BLOCK_COMMENT("earlyret_entry {");
+
+ __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP);
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_esp();
+ __ empty_expression_stack();
+ __ load_earlyret_value(state);
+
+ Register RjvmtiState = Z_tmp_1;
+ __ z_lg(RjvmtiState, thread_(jvmti_thread_state));
+ __ store_const(Address(RjvmtiState, JvmtiThreadState::earlyret_state_offset()),
+ JvmtiThreadState::earlyret_inactive, 4, 4, Z_R0_scratch);
+
+ __ remove_activation(state,
+ Z_tmp_1, // retaddr
+ false, // throw_monitor_exception
+ false, // install_monitor_exception
+ true); // notify_jvmdi
+ __ z_br(Z_tmp_1);
+
+ BLOCK_COMMENT("} earlyret_entry");
+
+ return entry;
+}
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation.
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+ address& bep,
+ address& cep,
+ address& sep,
+ address& aep,
+ address& iep,
+ address& lep,
+ address& fep,
+ address& dep,
+ address& vep) {
+ assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+ Label L;
+ aep = __ pc(); __ push_ptr(); __ z_bru(L);
+ fep = __ pc(); __ push_f(); __ z_bru(L);
+ dep = __ pc(); __ push_d(); __ z_bru(L);
+ lep = __ pc(); __ push_l(); __ z_bru(L);
+ bep = cep = sep =
+ iep = __ pc(); __ push_i();
+ vep = __ pc();
+ __ bind(L);
+ generate_and_dispatch(t);
+}
+
+//-----------------------------------------------------------------------------
+
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+ address entry = __ pc();
+ NearLabel counter_below_trace_threshold;
+
+ if (TraceBytecodesAt > 0) {
+ // Skip runtime call, if the trace threshold is not yet reached.
+ __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value);
+ __ load_absolute_address(Z_tmp_2, (address)&TraceBytecodesAt);
+ __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/);
+ __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/);
+ __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, counter_below_trace_threshold);
+ }
+
+ int offset2 = state == ltos || state == dtos ? 2 : 1;
+
+ __ push(state);
+ // Preserved return pointer is in Z_R14.
+ // InterpreterRuntime::trace_bytecode() preserved and returns the value passed as second argument.
+ __ z_lgr(Z_ARG2, Z_R14);
+ __ z_lg(Z_ARG3, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)));
+ if (WizardMode) {
+ __ z_lgr(Z_ARG4, Z_esp); // Trace Z_esp in WizardMode.
+ } else {
+ __ z_lg(Z_ARG4, Address(Z_esp, Interpreter::expr_offset_in_bytes(offset2)));
+ }
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), Z_ARG2, Z_ARG3, Z_ARG4);
+ __ z_lgr(Z_R14, Z_RET); // Estore return address (see above).
+ __ pop(state);
+
+ __ bind(counter_below_trace_threshold);
+ __ z_br(Z_R14); // return
+
+ return entry;
+}
+
+// Make feasible for old CPUs.
+void TemplateInterpreterGenerator::count_bytecode() {
+ __ load_absolute_address(Z_R1_scratch, (address) &BytecodeCounter::_counter_value);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template * t) {
+ __ load_absolute_address(Z_R1_scratch, (address)&BytecodeHistogram::_counters[ t->bytecode() ]);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template * t) {
+ Address index_addr(Z_tmp_1, (intptr_t) 0);
+ Register index = Z_tmp_2;
+
+ // Load previous index.
+ __ load_absolute_address(Z_tmp_1, (address) &BytecodePairHistogram::_index);
+ __ mem2reg_opt(index, index_addr, false);
+
+ // Mask with current bytecode and store as new previous index.
+ __ z_srl(index, BytecodePairHistogram::log2_number_of_codes);
+ __ load_const_optimized(Z_R0_scratch,
+ (int)t->bytecode() << BytecodePairHistogram::log2_number_of_codes);
+ __ z_or(index, Z_R0_scratch);
+ __ reg2mem_opt(index, index_addr, false);
+
+ // Load counter array's address.
+ __ z_lgfr(index, index); // Sign extend for addressing.
+ __ z_sllg(index, index, LogBytesPerInt); // index2bytes
+ __ load_absolute_address(Z_R1_scratch,
+ (address) &BytecodePairHistogram::_counters);
+ // Add index and increment counter.
+ __ z_agr(Z_R1_scratch, index);
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1);
+}
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+ // Call a little run-time stub to avoid blow-up for each bytecode.
+ // The run-time runtime saves the right registers, depending on
+ // the tosca in-state for the given template.
+ address entry = Interpreter::trace_code(t->tos_in());
+ guarantee(entry != NULL, "entry must have been generated");
+ __ call_stub(entry);
+}
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+ NearLabel L;
+
+ __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value);
+ __ load_absolute_address(Z_tmp_2, (address)&StopInterpreterAt);
+ __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/);
+ __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/);
+ __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, L);
+ assert(Z_tmp_1->is_nonvolatile(), "must be nonvolatile to preserve Z_tos");
+ assert(Z_F8->is_nonvolatile(), "must be nonvolatile to preserve Z_ftos");
+ __ z_lgr(Z_tmp_1, Z_tos); // Save tos.
+ __ z_lgr(Z_tmp_2, Z_bytecode); // Save Z_bytecode.
+ __ z_ldr(Z_F8, Z_ftos); // Save ftos.
+ // Use -XX:StopInterpreterAt=<num> to set the limit
+ // and break at breakpoint().
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, breakpoint), false);
+ __ z_lgr(Z_tos, Z_tmp_1); // Restore tos.
+ __ z_lgr(Z_bytecode, Z_tmp_2); // Save Z_bytecode.
+ __ z_ldr(Z_ftos, Z_F8); // Restore ftos.
+ __ bind(L);
+}
+
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateTable_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,4250 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+
+#ifdef PRODUCT
+#define __ _masm->
+#define BLOCK_COMMENT(str)
+#define BIND(label) __ bind(label);
+#else
+#define __ (PRODUCT_ONLY(false&&)Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)->
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":")
+#endif
+
+// The assumed minimum size of a BranchTableBlock.
+// The actual size of each block heavily depends on the CPU capabilities and,
+// of course, on the logic implemented in each block.
+#ifdef ASSERT
+ #define BTB_MINSIZE 256
+#else
+ #define BTB_MINSIZE 64
+#endif
+
+#ifdef ASSERT
+// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_BEGIN(lbl, alignment, name) \
+ __ align_address(alignment); \
+ __ bind(lbl); \
+ { unsigned int b_off = __ offset(); \
+ uintptr_t b_addr = (uintptr_t)__ pc(); \
+ __ z_larl(Z_R0, (int64_t)0); /* Check current address alignment. */ \
+ __ z_slgr(Z_R0, br_tab); /* Current Address must be equal */ \
+ __ z_slgr(Z_R0, flags); /* to calculated branch target. */ \
+ __ z_brc(Assembler::bcondLogZero, 3); /* skip trap if ok. */ \
+ __ z_illtrap(0x55); \
+ guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name);
+
+// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_END(lbl, alignment, name) \
+ uintptr_t e_addr = (uintptr_t)__ pc(); \
+ unsigned int e_off = __ offset(); \
+ unsigned int len = e_off-b_off; \
+ if (len > alignment) { \
+ tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \
+ len, alignment, e_addr-len, name); \
+ guarantee(len <= alignment, "block too large"); \
+ } \
+ guarantee(len == e_addr-b_addr, "block len mismatch"); \
+ }
+#else
+// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_BEGIN(lbl, alignment, name) \
+ __ align_address(alignment); \
+ __ bind(lbl); \
+ { unsigned int b_off = __ offset(); \
+ uintptr_t b_addr = (uintptr_t)__ pc(); \
+ guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name);
+
+// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch).
+#define BTB_END(lbl, alignment, name) \
+ uintptr_t e_addr = (uintptr_t)__ pc(); \
+ unsigned int e_off = __ offset(); \
+ unsigned int len = e_off-b_off; \
+ if (len > alignment) { \
+ tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \
+ len, alignment, e_addr-len, name); \
+ guarantee(len <= alignment, "block too large"); \
+ } \
+ guarantee(len == e_addr-b_addr, "block len mismatch"); \
+ }
+#endif // ASSERT
+
+// Platform-dependent initialization.
+
+void TemplateTable::pd_initialize() {
+ // No specific initialization.
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+ return Address(Z_locals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+ return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+ return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+ return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+ return iaddress(n);
+}
+
+// Pass NULL, if no shift instruction should be emitted.
+static inline Address iaddress(InterpreterMacroAssembler *masm, Register r) {
+ if (masm) {
+ masm->z_sllg(r, r, LogBytesPerWord); // index2bytes
+ }
+ return Address(Z_locals, r, Interpreter::local_offset_in_bytes(0));
+}
+
+// Pass NULL, if no shift instruction should be emitted.
+static inline Address laddress(InterpreterMacroAssembler *masm, Register r) {
+ if (masm) {
+ masm->z_sllg(r, r, LogBytesPerWord); // index2bytes
+ }
+ return Address(Z_locals, r, Interpreter::local_offset_in_bytes(1) );
+}
+
+static inline Address faddress(InterpreterMacroAssembler *masm, Register r) {
+ return iaddress(masm, r);
+}
+
+static inline Address daddress(InterpreterMacroAssembler *masm, Register r) {
+ return laddress(masm, r);
+}
+
+static inline Address aaddress(InterpreterMacroAssembler *masm, Register r) {
+ return iaddress(masm, r);
+}
+
+// At top of Java expression stack which may be different than esp(). It
+// isn't for category 1 objects.
+static inline Address at_tos(int slot = 0) {
+ return Address(Z_esp, Interpreter::expr_offset_in_bytes(slot));
+}
+
+// Condition conversion
+static Assembler::branch_condition j_not(TemplateTable::Condition cc) {
+ switch (cc) {
+ case TemplateTable::equal :
+ return Assembler::bcondNotEqual;
+ case TemplateTable::not_equal :
+ return Assembler::bcondEqual;
+ case TemplateTable::less :
+ return Assembler::bcondNotLow;
+ case TemplateTable::less_equal :
+ return Assembler::bcondHigh;
+ case TemplateTable::greater :
+ return Assembler::bcondNotHigh;
+ case TemplateTable::greater_equal:
+ return Assembler::bcondLow;
+ }
+ ShouldNotReachHere();
+ return Assembler::bcondZero;
+}
+
+// Do an oop store like *(base + offset) = val
+// offset can be a register or a constant.
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+ Register base,
+ RegisterOrConstant offset,
+ Register val,
+ bool val_is_null, // == false does not guarantee that val really is not equal NULL.
+ Register tmp1, // If tmp3 is volatile, either tmp1 or tmp2 must be
+ Register tmp2, // non-volatile to hold a copy of pre_val across runtime calls.
+ Register tmp3, // Ideally, this tmp register is non-volatile, as it is used to
+ // hold pre_val (must survive runtime calls).
+ BarrierSet::Name barrier,
+ bool precise) {
+ BLOCK_COMMENT("do_oop_store {");
+ assert(val != noreg, "val must always be valid, even if it is zero");
+ assert_different_registers(tmp1, tmp2, tmp3, val, base, offset.register_or_noreg());
+ __ verify_oop(val);
+ switch (barrier) {
+#if INCLUDE_ALL_GCS
+ case BarrierSet::G1SATBCTLogging:
+ {
+#ifdef ASSERT
+ if (val_is_null) { // Check if the flag setting reflects reality.
+ Label OK;
+ __ z_ltgr(val, val);
+ __ z_bre(OK);
+ __ z_illtrap(0x11);
+ __ bind(OK);
+ }
+#endif
+ Register pre_val = tmp3;
+ // Load and record the previous value.
+ __ g1_write_barrier_pre(base, offset, pre_val, val,
+ tmp1, tmp2,
+ false); // Needs to hold pre_val in non_volatile register?
+
+ if (val_is_null) {
+ __ store_heap_oop_null(val, offset, base);
+ } else {
+ Label Done;
+ // val_is_null == false does not guarantee that val really is not equal NULL.
+ // Checking for this case dynamically has some cost, but also some benefit (in GC).
+ // It's hard to say if cost or benefit is greater.
+ { Label OK;
+ __ z_ltgr(val, val);
+ __ z_brne(OK);
+ __ store_heap_oop_null(val, offset, base);
+ __ z_bru(Done);
+ __ bind(OK);
+ }
+ // G1 barrier needs uncompressed oop for region cross check.
+ // Store_heap_oop compresses the oop in the argument register.
+ Register val_work = val;
+ if (UseCompressedOops) {
+ val_work = tmp3;
+ __ z_lgr(val_work, val);
+ }
+ __ store_heap_oop_not_null(val_work, offset, base);
+
+ // We need precise card marks for oop array stores.
+ // Otherwise, cardmarking the object which contains the oop is sufficient.
+ if (precise && !(offset.is_constant() && offset.as_constant() == 0)) {
+ __ add2reg_with_index(base,
+ offset.constant_or_zero(),
+ offset.register_or_noreg(),
+ base);
+ }
+ __ g1_write_barrier_post(base /* store_adr */, val, tmp1, tmp2, tmp3);
+ __ bind(Done);
+ }
+ }
+ break;
+#endif // INCLUDE_ALL_GCS
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ {
+ if (val_is_null) {
+ __ store_heap_oop_null(val, offset, base);
+ } else {
+ __ store_heap_oop(val, offset, base);
+ // Flatten object address if needed.
+ if (precise && ((offset.register_or_noreg() != noreg) || (offset.constant_or_zero() != 0))) {
+ __ load_address(base, Address(base, offset.register_or_noreg(), offset.constant_or_zero()));
+ }
+ __ card_write_barrier_post(base, tmp1);
+ }
+ }
+ break;
+ case BarrierSet::ModRef:
+ // fall through
+ default:
+ ShouldNotReachHere();
+
+ }
+ BLOCK_COMMENT("} do_oop_store");
+}
+
+Address TemplateTable::at_bcp(int offset) {
+ assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+ return Address(Z_bcp, offset);
+}
+
+void TemplateTable::patch_bytecode(Bytecodes::Code bc,
+ Register bc_reg,
+ Register temp_reg,
+ bool load_bc_into_bc_reg, // = true
+ int byte_no) {
+ if (!RewriteBytecodes) { return; }
+
+ NearLabel L_patch_done;
+ BLOCK_COMMENT("patch_bytecode {");
+
+ switch (bc) {
+ case Bytecodes::_fast_aputfield:
+ case Bytecodes::_fast_bputfield:
+ case Bytecodes::_fast_zputfield:
+ case Bytecodes::_fast_cputfield:
+ case Bytecodes::_fast_dputfield:
+ case Bytecodes::_fast_fputfield:
+ case Bytecodes::_fast_iputfield:
+ case Bytecodes::_fast_lputfield:
+ case Bytecodes::_fast_sputfield:
+ {
+ // We skip bytecode quickening for putfield instructions when
+ // the put_code written to the constant pool cache is zero.
+ // This is required so that every execution of this instruction
+ // calls out to InterpreterRuntime::resolve_get_put to do
+ // additional, required work.
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+ __ get_cache_and_index_and_bytecode_at_bcp(Z_R1_scratch, bc_reg,
+ temp_reg, byte_no, 1);
+ __ load_const_optimized(bc_reg, bc);
+ __ compareU32_and_branch(temp_reg, (intptr_t)0,
+ Assembler::bcondZero, L_patch_done);
+ }
+ break;
+ default:
+ assert(byte_no == -1, "sanity");
+ // The pair bytecodes have already done the load.
+ if (load_bc_into_bc_reg) {
+ __ load_const_optimized(bc_reg, bc);
+ }
+ break;
+ }
+
+ if (JvmtiExport::can_post_breakpoint()) {
+
+ Label L_fast_patch;
+
+ // If a breakpoint is present we can't rewrite the stream directly.
+ __ z_cli(at_bcp(0), Bytecodes::_breakpoint);
+ __ z_brne(L_fast_patch);
+ __ get_method(temp_reg);
+ // Let breakpoint table handling rewrite to quicker bytecode.
+ __ call_VM_static(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at),
+ temp_reg, Z_R13, bc_reg);
+ __ z_bru(L_patch_done);
+
+ __ bind(L_fast_patch);
+ }
+
+#ifdef ASSERT
+ NearLabel L_okay;
+
+ // We load into 64 bits, since this works on any CPU.
+ __ z_llgc(temp_reg, at_bcp(0));
+ __ compareU32_and_branch(temp_reg, Bytecodes::java_code(bc),
+ Assembler::bcondEqual, L_okay );
+ __ compareU32_and_branch(temp_reg, bc_reg, Assembler::bcondEqual, L_okay);
+ __ stop_static("patching the wrong bytecode");
+ __ bind(L_okay);
+#endif
+
+ // Patch bytecode.
+ __ z_stc(bc_reg, at_bcp(0));
+
+ __ bind(L_patch_done);
+ BLOCK_COMMENT("} patch_bytecode");
+}
+
+// Individual instructions
+
+void TemplateTable::nop() {
+ transition(vtos, vtos);
+}
+
+void TemplateTable::shouldnotreachhere() {
+ transition(vtos, vtos);
+ __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null() {
+ transition(vtos, atos);
+ __ clear_reg(Z_tos, true, false);
+}
+
+void TemplateTable::iconst(int value) {
+ transition(vtos, itos);
+ // Zero extension of the iconst makes zero extension at runtime obsolete.
+ __ load_const_optimized(Z_tos, ((unsigned long)(unsigned int)value));
+}
+
+void TemplateTable::lconst(int value) {
+ transition(vtos, ltos);
+ __ load_const_optimized(Z_tos, value);
+}
+
+// No pc-relative load/store for floats.
+void TemplateTable::fconst(int value) {
+ transition(vtos, ftos);
+ static float one = 1.0f, two = 2.0f;
+
+ switch (value) {
+ case 0:
+ __ z_lzer(Z_ftos);
+ return;
+ case 1:
+ __ load_absolute_address(Z_R1_scratch, (address) &one);
+ __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false);
+ return;
+ case 2:
+ __ load_absolute_address(Z_R1_scratch, (address) &two);
+ __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false);
+ return;
+ default:
+ ShouldNotReachHere();
+ return;
+ }
+}
+
+void TemplateTable::dconst(int value) {
+ transition(vtos, dtos);
+ static double one = 1.0;
+
+ switch (value) {
+ case 0:
+ __ z_lzdr(Z_ftos);
+ return;
+ case 1:
+ __ load_absolute_address(Z_R1_scratch, (address) &one);
+ __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch));
+ return;
+ default:
+ ShouldNotReachHere();
+ return;
+ }
+}
+
+void TemplateTable::bipush() {
+ transition(vtos, itos);
+ __ z_lb(Z_tos, at_bcp(1));
+}
+
+void TemplateTable::sipush() {
+ transition(vtos, itos);
+ __ get_2_byte_integer_at_bcp(Z_tos, 1, InterpreterMacroAssembler::Signed);
+}
+
+
+void TemplateTable::ldc(bool wide) {
+ transition(vtos, vtos);
+ Label call_ldc, notFloat, notClass, Done;
+ const Register RcpIndex = Z_tmp_1;
+ const Register Rtags = Z_ARG2;
+
+ if (wide) {
+ __ get_2_byte_integer_at_bcp(RcpIndex, 1, InterpreterMacroAssembler::Unsigned);
+ } else {
+ __ z_llgc(RcpIndex, at_bcp(1));
+ }
+
+ __ get_cpool_and_tags(Z_tmp_2, Rtags);
+
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+ const Register Raddr_type = Rtags;
+
+ // Get address of type.
+ __ add2reg_with_index(Raddr_type, tags_offset, RcpIndex, Rtags);
+
+ __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClass);
+ __ z_bre(call_ldc); // Unresolved class - get the resolved class.
+
+ __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClassInError);
+ __ z_bre(call_ldc); // Unresolved class in error state - call into runtime
+ // to throw the error from the first resolution attempt.
+
+ __ z_cli(0, Raddr_type, JVM_CONSTANT_Class);
+ __ z_brne(notClass); // Resolved class - need to call vm to get java
+ // mirror of the class.
+
+ // We deal with a class. Call vm to do the appropriate.
+ __ bind(call_ldc);
+ __ load_const_optimized(Z_ARG2, wide);
+ call_VM(Z_RET, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), Z_ARG2);
+ __ push_ptr(Z_RET);
+ __ z_bru(Done);
+
+ // Not a class.
+ __ bind(notClass);
+ Register RcpOffset = RcpIndex;
+ __ z_sllg(RcpOffset, RcpIndex, LogBytesPerWord); // Convert index to offset.
+ __ z_cli(0, Raddr_type, JVM_CONSTANT_Float);
+ __ z_brne(notFloat);
+
+ // ftos
+ __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, RcpOffset, base_offset), false);
+ __ push_f();
+ __ z_bru(Done);
+
+ __ bind(notFloat);
+#ifdef ASSERT
+ {
+ Label L;
+
+ __ z_cli(0, Raddr_type, JVM_CONSTANT_Integer);
+ __ z_bre(L);
+ // String and Object are rewritten to fast_aldc.
+ __ stop("unexpected tag type in ldc");
+
+ __ bind(L);
+ }
+#endif
+
+ // itos
+ __ mem2reg_opt(Z_tos, Address(Z_tmp_2, RcpOffset, base_offset), false);
+ __ push_i(Z_tos);
+
+ __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+// %%% We should use this to handle Class and String constants also.
+// %%% It will simplify the ldc/primitive path considerably.
+void TemplateTable::fast_aldc(bool wide) {
+ transition(vtos, atos);
+
+ const Register index = Z_tmp_2;
+ int index_size = wide ? sizeof(u2) : sizeof(u1);
+ Label L_resolved;
+
+ // We are resolved if the resolved reference cache entry contains a
+ // non-null object (CallSite, etc.).
+ __ get_cache_index_at_bcp(index, 1, index_size); // Load index.
+ __ load_resolved_reference_at_index(Z_tos, index);
+ __ z_ltgr(Z_tos, Z_tos);
+ __ z_brne(L_resolved);
+
+ // First time invocation - must resolve first.
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+ __ load_const_optimized(Z_ARG1, (int)bytecode());
+ __ call_VM(Z_tos, entry, Z_ARG1);
+
+ __ bind(L_resolved);
+ __ verify_oop(Z_tos);
+}
+
+void TemplateTable::ldc2_w() {
+ transition(vtos, vtos);
+ Label Long, Done;
+
+ // Z_tmp_1 = index of cp entry
+ __ get_2_byte_integer_at_bcp(Z_tmp_1, 1, InterpreterMacroAssembler::Unsigned);
+
+ __ get_cpool_and_tags(Z_tmp_2, Z_tos);
+
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+ // Get address of type.
+ __ add2reg_with_index(Z_tos, tags_offset, Z_tos, Z_tmp_1);
+
+ // Index needed in both branches, so calculate here.
+ __ z_sllg(Z_tmp_1, Z_tmp_1, LogBytesPerWord); // index2bytes
+
+ // Check type.
+ __ z_cli(0, Z_tos, JVM_CONSTANT_Double);
+ __ z_brne(Long);
+
+ // dtos
+ __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, Z_tmp_1, base_offset));
+ __ push_d();
+ __ z_bru(Done);
+
+ __ bind(Long);
+ // ltos
+ __ mem2reg_opt(Z_tos, Address(Z_tmp_2, Z_tmp_1, base_offset));
+ __ push_l();
+
+ __ bind(Done);
+}
+
+void TemplateTable::locals_index(Register reg, int offset) {
+ __ z_llgc(reg, at_bcp(offset));
+ __ z_lcgr(reg);
+}
+
+void TemplateTable::iload() {
+ iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+ iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
+ transition(vtos, itos);
+
+ if (RewriteFrequentPairs && rc == may_rewrite) {
+ NearLabel rewrite, done;
+ const Register bc = Z_ARG4;
+
+ assert(Z_R1_scratch != bc, "register damaged");
+
+ // Get next byte.
+ __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_iload)));
+
+ // If _iload, wait to rewrite to iload2. We only want to rewrite the
+ // last two iloads in a pair. Comparing against fast_iload means that
+ // the next bytecode is neither an iload or a caload, and therefore
+ // an iload pair.
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_iload,
+ Assembler::bcondEqual, done);
+
+ __ load_const_optimized(bc, Bytecodes::_fast_iload2);
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_iload,
+ Assembler::bcondEqual, rewrite);
+
+ // If _caload, rewrite to fast_icaload.
+ __ load_const_optimized(bc, Bytecodes::_fast_icaload);
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_caload,
+ Assembler::bcondEqual, rewrite);
+
+ // Rewrite so iload doesn't check again.
+ __ load_const_optimized(bc, Bytecodes::_fast_iload);
+
+ // rewrite
+ // bc: fast bytecode
+ __ bind(rewrite);
+ patch_bytecode(Bytecodes::_iload, bc, Z_R1_scratch, false);
+
+ __ bind(done);
+
+ }
+
+ // Get the local value into tos.
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::fast_iload2() {
+ transition(vtos, itos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+ __ push_i(Z_tos);
+ locals_index(Z_R1_scratch, 3);
+ __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::fast_iload() {
+ transition(vtos, itos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::lload() {
+ transition(vtos, ltos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_tos, laddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::fload() {
+ transition(vtos, ftos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2freg_opt(Z_ftos, faddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::dload() {
+ transition(vtos, dtos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2freg_opt(Z_ftos, daddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::aload() {
+ transition(vtos, atos);
+
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_tos, aaddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+ __ get_2_byte_integer_at_bcp(reg, 2, InterpreterMacroAssembler::Unsigned);
+ __ z_lcgr(reg);
+}
+
+void TemplateTable::wide_iload() {
+ transition(vtos, itos);
+
+ locals_index_wide(Z_tmp_1);
+ __ mem2reg_opt(Z_tos, iaddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_lload() {
+ transition(vtos, ltos);
+
+ locals_index_wide(Z_tmp_1);
+ __ mem2reg_opt(Z_tos, laddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_fload() {
+ transition(vtos, ftos);
+
+ locals_index_wide(Z_tmp_1);
+ __ mem2freg_opt(Z_ftos, faddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_dload() {
+ transition(vtos, dtos);
+
+ locals_index_wide(Z_tmp_1);
+ __ mem2freg_opt(Z_ftos, daddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_aload() {
+ transition(vtos, atos);
+
+ locals_index_wide(Z_tmp_1);
+ __ mem2reg_opt(Z_tos, aaddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::index_check(Register array, Register index, unsigned int shift) {
+ assert_different_registers(Z_R1_scratch, array, index);
+
+ // Check array.
+ __ null_check(array, Z_R0_scratch, arrayOopDesc::length_offset_in_bytes());
+
+ // Sign extend index for use by indexed load.
+ __ z_lgfr(index, index);
+
+ // Check index.
+ Label index_ok;
+ __ z_cl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
+ __ z_brl(index_ok);
+ __ lgr_if_needed(Z_ARG3, index); // See generate_ArrayIndexOutOfBounds_handler().
+ // Give back the array to create more detailed exceptions.
+ __ lgr_if_needed(Z_ARG2, array); // See generate_ArrayIndexOutOfBounds_handler().
+ __ load_absolute_address(Z_R1_scratch,
+ Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+ __ z_bcr(Assembler::bcondAlways, Z_R1_scratch);
+ __ bind(index_ok);
+
+ if (shift > 0)
+ __ z_sllg(index, index, shift);
+}
+
+void TemplateTable::iaload() {
+ transition(itos, itos);
+
+ __ pop_ptr(Z_tmp_1); // array
+ // Index is in Z_tos.
+ Register index = Z_tos;
+ index_check(Z_tmp_1, index, LogBytesPerInt); // Kills Z_ARG3.
+ // Load the value.
+ __ mem2reg_opt(Z_tos,
+ Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)),
+ false);
+}
+
+void TemplateTable::laload() {
+ transition(itos, ltos);
+
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : index
+ // Z_tmp_2 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_2, index, LogBytesPerLong);
+ __ mem2reg_opt(Z_tos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::faload() {
+ transition(itos, ftos);
+
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : index
+ // Z_tmp_2 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_2, index, LogBytesPerInt);
+ __ mem2freg_opt(Z_ftos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
+ false);
+}
+
+void TemplateTable::daload() {
+ transition(itos, dtos);
+
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : index
+ // Z_tmp_2 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_2, index, LogBytesPerLong);
+ __ mem2freg_opt(Z_ftos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aaload() {
+ transition(itos, atos);
+
+ unsigned const int shift = LogBytesPerHeapOop;
+ __ pop_ptr(Z_tmp_1); // array
+ // Index is in Z_tos.
+ Register index = Z_tos;
+ index_check(Z_tmp_1, index, shift);
+ // Now load array element.
+ __ load_heap_oop(Z_tos,
+ Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+ __ verify_oop(Z_tos);
+}
+
+void TemplateTable::baload() {
+ transition(itos, itos);
+
+ __ pop_ptr(Z_tmp_1);
+ // Z_tos : index
+ // Z_tmp_1 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_1, index, 0);
+ __ z_lb(Z_tos,
+ Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::caload() {
+ transition(itos, itos);
+
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : index
+ // Z_tmp_2 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_2, index, LogBytesPerShort);
+ // Load into 64 bits, works on all CPUs.
+ __ z_llgh(Z_tos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+// Iload followed by caload frequent pair.
+void TemplateTable::fast_icaload() {
+ transition(vtos, itos);
+
+ // Load index out of locals.
+ locals_index(Z_R1_scratch);
+ __ mem2reg_opt(Z_ARG3, iaddress(_masm, Z_R1_scratch), false);
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ __ pop_ptr(Z_tmp_2);
+ index_check(Z_tmp_2, Z_ARG3, LogBytesPerShort);
+ // Load into 64 bits, works on all CPUs.
+ __ z_llgh(Z_tos,
+ Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::saload() {
+ transition(itos, itos);
+
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : index
+ // Z_tmp_2 : array
+ Register index = Z_tos;
+ index_check(Z_tmp_2, index, LogBytesPerShort);
+ __ z_lh(Z_tos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+}
+
+void TemplateTable::iload(int n) {
+ transition(vtos, itos);
+ __ z_ly(Z_tos, iaddress(n));
+}
+
+void TemplateTable::lload(int n) {
+ transition(vtos, ltos);
+ __ z_lg(Z_tos, laddress(n));
+}
+
+void TemplateTable::fload(int n) {
+ transition(vtos, ftos);
+ __ mem2freg_opt(Z_ftos, faddress(n), false);
+}
+
+void TemplateTable::dload(int n) {
+ transition(vtos, dtos);
+ __ mem2freg_opt(Z_ftos, daddress(n));
+}
+
+void TemplateTable::aload(int n) {
+ transition(vtos, atos);
+ __ mem2reg_opt(Z_tos, aaddress(n));
+}
+
+void TemplateTable::aload_0() {
+ aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+ aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
+ transition(vtos, atos);
+
+ // According to bytecode histograms, the pairs:
+ //
+ // _aload_0, _fast_igetfield
+ // _aload_0, _fast_agetfield
+ // _aload_0, _fast_fgetfield
+ //
+ // occur frequently. If RewriteFrequentPairs is set, the (slow)
+ // _aload_0 bytecode checks if the next bytecode is either
+ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+ // rewrites the current bytecode into a pair bytecode; otherwise it
+ // rewrites the current bytecode into _fast_aload_0 that doesn't do
+ // the pair check anymore.
+ //
+ // Note: If the next bytecode is _getfield, the rewrite must be
+ // delayed, otherwise we may miss an opportunity for a pair.
+ //
+ // Also rewrite frequent pairs
+ // aload_0, aload_1
+ // aload_0, iload_1
+ // These bytecodes with a small amount of code are most profitable
+ // to rewrite.
+ if (!(RewriteFrequentPairs && (rc == may_rewrite))) {
+ aload(0);
+ return;
+ }
+
+ NearLabel rewrite, done;
+ const Register bc = Z_ARG4;
+
+ assert(Z_R1_scratch != bc, "register damaged");
+ // Get next byte.
+ __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_aload_0)));
+
+ // Do actual aload_0.
+ aload(0);
+
+ // If _getfield then wait with rewrite.
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_getfield,
+ Assembler::bcondEqual, done);
+
+ // If _igetfield then rewrite to _fast_iaccess_0.
+ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0)
+ == Bytecodes::_aload_0, "fix bytecode definition");
+
+ __ load_const_optimized(bc, Bytecodes::_fast_iaccess_0);
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_igetfield,
+ Assembler::bcondEqual, rewrite);
+
+ // If _agetfield then rewrite to _fast_aaccess_0.
+ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0)
+ == Bytecodes::_aload_0, "fix bytecode definition");
+
+ __ load_const_optimized(bc, Bytecodes::_fast_aaccess_0);
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_agetfield,
+ Assembler::bcondEqual, rewrite);
+
+ // If _fgetfield then rewrite to _fast_faccess_0.
+ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0)
+ == Bytecodes::_aload_0, "fix bytecode definition");
+
+ __ load_const_optimized(bc, Bytecodes::_fast_faccess_0);
+ __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_fgetfield,
+ Assembler::bcondEqual, rewrite);
+
+ // Else rewrite to _fast_aload0.
+ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0)
+ == Bytecodes::_aload_0, "fix bytecode definition");
+ __ load_const_optimized(bc, Bytecodes::_fast_aload_0);
+
+ // rewrite
+ // bc: fast bytecode
+ __ bind(rewrite);
+
+ patch_bytecode(Bytecodes::_aload_0, bc, Z_R1_scratch, false);
+ // Reload local 0 because of VM call inside patch_bytecode().
+ // this may trigger GC and thus change the oop.
+ aload(0);
+
+ __ bind(done);
+}
+
+void TemplateTable::istore() {
+ transition(itos, vtos);
+ locals_index(Z_R1_scratch);
+ __ reg2mem_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false);
+}
+
+void TemplateTable::lstore() {
+ transition(ltos, vtos);
+ locals_index(Z_R1_scratch);
+ __ reg2mem_opt(Z_tos, laddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::fstore() {
+ transition(ftos, vtos);
+ locals_index(Z_R1_scratch);
+ __ freg2mem_opt(Z_ftos, faddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::dstore() {
+ transition(dtos, vtos);
+ locals_index(Z_R1_scratch);
+ __ freg2mem_opt(Z_ftos, daddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::astore() {
+ transition(vtos, vtos);
+ __ pop_ptr(Z_tos);
+ locals_index(Z_R1_scratch);
+ __ reg2mem_opt(Z_tos, aaddress(_masm, Z_R1_scratch));
+}
+
+void TemplateTable::wide_istore() {
+ transition(vtos, vtos);
+ __ pop_i(Z_tos);
+ locals_index_wide(Z_tmp_1);
+ __ reg2mem_opt(Z_tos, iaddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_lstore() {
+ transition(vtos, vtos);
+ __ pop_l(Z_tos);
+ locals_index_wide(Z_tmp_1);
+ __ reg2mem_opt(Z_tos, laddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_fstore() {
+ transition(vtos, vtos);
+ __ pop_f(Z_ftos);
+ locals_index_wide(Z_tmp_1);
+ __ freg2mem_opt(Z_ftos, faddress(_masm, Z_tmp_1), false);
+}
+
+void TemplateTable::wide_dstore() {
+ transition(vtos, vtos);
+ __ pop_d(Z_ftos);
+ locals_index_wide(Z_tmp_1);
+ __ freg2mem_opt(Z_ftos, daddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::wide_astore() {
+ transition(vtos, vtos);
+ __ pop_ptr(Z_tos);
+ locals_index_wide(Z_tmp_1);
+ __ reg2mem_opt(Z_tos, aaddress(_masm, Z_tmp_1));
+}
+
+void TemplateTable::iastore() {
+ transition(itos, vtos);
+
+ Register index = Z_ARG3; // Index_check expects index in Z_ARG3.
+ // Value is in Z_tos ...
+ __ pop_i(index); // index
+ __ pop_ptr(Z_tmp_1); // array
+ index_check(Z_tmp_1, index, LogBytesPerInt);
+ // ... and then move the value.
+ __ reg2mem_opt(Z_tos,
+ Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)),
+ false);
+}
+
+void TemplateTable::lastore() {
+ transition(ltos, vtos);
+
+ __ pop_i(Z_ARG3);
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : value
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3.
+ __ reg2mem_opt(Z_tos,
+ Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_LONG)));
+}
+
+void TemplateTable::fastore() {
+ transition(ftos, vtos);
+
+ __ pop_i(Z_ARG3);
+ __ pop_ptr(Z_tmp_2);
+ // Z_ftos : value
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ index_check(Z_tmp_2, Z_ARG3, LogBytesPerInt); // Prefer index in Z_ARG3.
+ __ freg2mem_opt(Z_ftos,
+ Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
+ false);
+}
+
+void TemplateTable::dastore() {
+ transition(dtos, vtos);
+
+ __ pop_i(Z_ARG3);
+ __ pop_ptr(Z_tmp_2);
+ // Z_ftos : value
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3.
+ __ freg2mem_opt(Z_ftos,
+ Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
+}
+
+void TemplateTable::aastore() {
+ NearLabel is_null, ok_is_subtype, done;
+ transition(vtos, vtos);
+
+ // stack: ..., array, index, value
+
+ Register Rvalue = Z_tos;
+ Register Rarray = Z_ARG2;
+ Register Rindex = Z_ARG3; // Convention for index_check().
+
+ __ load_ptr(0, Rvalue);
+ __ z_l(Rindex, Address(Z_esp, Interpreter::expr_offset_in_bytes(1)));
+ __ load_ptr(2, Rarray);
+
+ unsigned const int shift = LogBytesPerHeapOop;
+ index_check(Rarray, Rindex, shift); // side effect: Rindex = Rindex << shift
+ Register Rstore_addr = Rindex;
+ // Address where the store goes to, i.e. &(Rarry[index])
+ __ load_address(Rstore_addr, Address(Rarray, Rindex, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+
+ // do array store check - check for NULL value first.
+ __ compareU64_and_branch(Rvalue, (intptr_t)0, Assembler::bcondEqual, is_null);
+
+ Register Rsub_klass = Z_ARG4;
+ Register Rsuper_klass = Z_ARG5;
+ __ load_klass(Rsub_klass, Rvalue);
+ // Load superklass.
+ __ load_klass(Rsuper_klass, Rarray);
+ __ z_lg(Rsuper_klass, Address(Rsuper_klass, ObjArrayKlass::element_klass_offset()));
+
+ // Generate a fast subtype check. Branch to ok_is_subtype if no failure.
+ // Throw if failure.
+ Register tmp1 = Z_tmp_1;
+ Register tmp2 = Z_tmp_2;
+ __ gen_subtype_check(Rsub_klass, Rsuper_klass, tmp1, tmp2, ok_is_subtype);
+
+ // Fall through on failure.
+ // Object is in Rvalue == Z_tos.
+ assert(Rvalue == Z_tos, "that's the expected location");
+ __ load_absolute_address(tmp1, Interpreter::_throw_ArrayStoreException_entry);
+ __ z_br(tmp1);
+
+ // Come here on success.
+ __ bind(ok_is_subtype);
+
+ // Now store using the appropriate barrier.
+ Register tmp3 = Rsub_klass;
+ do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, false/*val==null*/,
+ tmp3, tmp2, tmp1, _bs->kind(), true);
+ __ z_bru(done);
+
+ // Have a NULL in Rvalue.
+ __ bind(is_null);
+ __ profile_null_seen(tmp1);
+
+ // Store a NULL.
+ do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, true/*val==null*/,
+ tmp3, tmp2, tmp1, _bs->kind(), true);
+
+ // Pop stack arguments.
+ __ bind(done);
+ __ add2reg(Z_esp, 3 * Interpreter::stackElementSize);
+}
+
+
+void TemplateTable::bastore() {
+ transition(itos, vtos);
+
+ __ pop_i(Z_ARG3);
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : value
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ // No index shift necessary - pass 0.
+ index_check(Z_tmp_2, Z_ARG3, 0); // Prefer index in Z_ARG3.
+ __ z_stc(Z_tos,
+ Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
+}
+
+void TemplateTable::castore() {
+ transition(itos, vtos);
+
+ __ pop_i(Z_ARG3);
+ __ pop_ptr(Z_tmp_2);
+ // Z_tos : value
+ // Z_ARG3 : index
+ // Z_tmp_2 : array
+ Register index = Z_ARG3; // prefer index in Z_ARG3
+ index_check(Z_tmp_2, index, LogBytesPerShort);
+ __ z_sth(Z_tos,
+ Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+}
+
+void TemplateTable::sastore() {
+ castore();
+}
+
+void TemplateTable::istore(int n) {
+ transition(itos, vtos);
+ __ reg2mem_opt(Z_tos, iaddress(n), false);
+}
+
+void TemplateTable::lstore(int n) {
+ transition(ltos, vtos);
+ __ reg2mem_opt(Z_tos, laddress(n));
+}
+
+void TemplateTable::fstore(int n) {
+ transition(ftos, vtos);
+ __ freg2mem_opt(Z_ftos, faddress(n), false);
+}
+
+void TemplateTable::dstore(int n) {
+ transition(dtos, vtos);
+ __ freg2mem_opt(Z_ftos, daddress(n));
+}
+
+void TemplateTable::astore(int n) {
+ transition(vtos, vtos);
+ __ pop_ptr(Z_tos);
+ __ reg2mem_opt(Z_tos, aaddress(n));
+}
+
+void TemplateTable::pop() {
+ transition(vtos, vtos);
+ __ add2reg(Z_esp, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2() {
+ transition(vtos, vtos);
+ __ add2reg(Z_esp, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup() {
+ transition(vtos, vtos);
+ __ load_ptr(0, Z_tos);
+ __ push_ptr(Z_tos);
+ // stack: ..., a, a
+}
+
+void TemplateTable::dup_x1() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b
+ __ load_ptr(0, Z_tos); // load b
+ __ load_ptr(1, Z_R0_scratch); // load a
+ __ store_ptr(1, Z_tos); // store b
+ __ store_ptr(0, Z_R0_scratch); // store a
+ __ push_ptr(Z_tos); // push b
+ // stack: ..., b, a, b
+}
+
+void TemplateTable::dup_x2() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b, c
+ __ load_ptr(0, Z_R0_scratch); // load c
+ __ load_ptr(2, Z_R1_scratch); // load a
+ __ store_ptr(2, Z_R0_scratch); // store c in a
+ __ push_ptr(Z_R0_scratch); // push c
+ // stack: ..., c, b, c, c
+ __ load_ptr(2, Z_R0_scratch); // load b
+ __ store_ptr(2, Z_R1_scratch); // store a in b
+ // stack: ..., c, a, c, c
+ __ store_ptr(1, Z_R0_scratch); // store b in c
+ // stack: ..., c, a, b, c
+}
+
+void TemplateTable::dup2() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b
+ __ load_ptr(1, Z_R0_scratch); // load a
+ __ push_ptr(Z_R0_scratch); // push a
+ __ load_ptr(1, Z_R0_scratch); // load b
+ __ push_ptr(Z_R0_scratch); // push b
+ // stack: ..., a, b, a, b
+}
+
+void TemplateTable::dup2_x1() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b, c
+ __ load_ptr(0, Z_R0_scratch); // load c
+ __ load_ptr(1, Z_R1_scratch); // load b
+ __ push_ptr(Z_R1_scratch); // push b
+ __ push_ptr(Z_R0_scratch); // push c
+ // stack: ..., a, b, c, b, c
+ __ store_ptr(3, Z_R0_scratch); // store c in b
+ // stack: ..., a, c, c, b, c
+ __ load_ptr( 4, Z_R0_scratch); // load a
+ __ store_ptr(2, Z_R0_scratch); // store a in 2nd c
+ // stack: ..., a, c, a, b, c
+ __ store_ptr(4, Z_R1_scratch); // store b in a
+ // stack: ..., b, c, a, b, c
+}
+
+void TemplateTable::dup2_x2() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b, c, d
+ __ load_ptr(0, Z_R0_scratch); // load d
+ __ load_ptr(1, Z_R1_scratch); // load c
+ __ push_ptr(Z_R1_scratch); // push c
+ __ push_ptr(Z_R0_scratch); // push d
+ // stack: ..., a, b, c, d, c, d
+ __ load_ptr(4, Z_R1_scratch); // load b
+ __ store_ptr(2, Z_R1_scratch); // store b in d
+ __ store_ptr(4, Z_R0_scratch); // store d in b
+ // stack: ..., a, d, c, b, c, d
+ __ load_ptr(5, Z_R0_scratch); // load a
+ __ load_ptr(3, Z_R1_scratch); // load c
+ __ store_ptr(3, Z_R0_scratch); // store a in c
+ __ store_ptr(5, Z_R1_scratch); // store c in a
+ // stack: ..., c, d, a, b, c, d
+}
+
+void TemplateTable::swap() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b
+ __ load_ptr(1, Z_R0_scratch); // load a
+ __ load_ptr(0, Z_R1_scratch); // load b
+ __ store_ptr(0, Z_R0_scratch); // store a in b
+ __ store_ptr(1, Z_R1_scratch); // store b in a
+ // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op) {
+ transition(itos, itos);
+ switch (op) {
+ case add : __ z_ay(Z_tos, __ stackTop()); __ pop_i(); break;
+ case sub : __ z_sy(Z_tos, __ stackTop()); __ pop_i(); __ z_lcr(Z_tos, Z_tos); break;
+ case mul : __ z_msy(Z_tos, __ stackTop()); __ pop_i(); break;
+ case _and : __ z_ny(Z_tos, __ stackTop()); __ pop_i(); break;
+ case _or : __ z_oy(Z_tos, __ stackTop()); __ pop_i(); break;
+ case _xor : __ z_xy(Z_tos, __ stackTop()); __ pop_i(); break;
+ case shl : __ z_lr(Z_tmp_1, Z_tos);
+ __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount.
+ __ pop_i(Z_tos); __ z_sll(Z_tos, 0, Z_tmp_1); break;
+ case shr : __ z_lr(Z_tmp_1, Z_tos);
+ __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount.
+ __ pop_i(Z_tos); __ z_sra(Z_tos, 0, Z_tmp_1); break;
+ case ushr : __ z_lr(Z_tmp_1, Z_tos);
+ __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount.
+ __ pop_i(Z_tos); __ z_srl(Z_tos, 0, Z_tmp_1); break;
+ default : ShouldNotReachHere(); break;
+ }
+ return;
+}
+
+void TemplateTable::lop2(Operation op) {
+ transition(ltos, ltos);
+
+ switch (op) {
+ case add : __ z_ag(Z_tos, __ stackTop()); __ pop_l(); break;
+ case sub : __ z_sg(Z_tos, __ stackTop()); __ pop_l(); __ z_lcgr(Z_tos, Z_tos); break;
+ case mul : __ z_msg(Z_tos, __ stackTop()); __ pop_l(); break;
+ case _and : __ z_ng(Z_tos, __ stackTop()); __ pop_l(); break;
+ case _or : __ z_og(Z_tos, __ stackTop()); __ pop_l(); break;
+ case _xor : __ z_xg(Z_tos, __ stackTop()); __ pop_l(); break;
+ default : ShouldNotReachHere(); break;
+ }
+ return;
+}
+
+// Common part of idiv/irem.
+static void idiv_helper(InterpreterMacroAssembler * _masm, address exception) {
+ NearLabel not_null;
+
+ // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE.
+ assert(Z_tmp_1->successor() == Z_tmp_2, " need even/odd register pair for idiv/irem");
+
+ // Get dividend.
+ __ pop_i(Z_tmp_2);
+
+ // If divisor == 0 throw exception.
+ __ compare32_and_branch(Z_tos, (intptr_t) 0,
+ Assembler::bcondNotEqual, not_null );
+ __ load_absolute_address(Z_R1_scratch, exception);
+ __ z_br(Z_R1_scratch);
+
+ __ bind(not_null);
+
+ __ z_lgfr(Z_tmp_2, Z_tmp_2); // Sign extend dividend.
+ __ z_dsgfr(Z_tmp_1, Z_tos); // Do it.
+}
+
+void TemplateTable::idiv() {
+ transition(itos, itos);
+
+ idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry);
+ __ z_llgfr(Z_tos, Z_tmp_2); // Result is in Z_tmp_2.
+}
+
+void TemplateTable::irem() {
+ transition(itos, itos);
+
+ idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry);
+ __ z_llgfr(Z_tos, Z_tmp_1); // Result is in Z_tmp_1.
+}
+
+void TemplateTable::lmul() {
+ transition(ltos, ltos);
+
+ // Multiply with memory operand.
+ __ z_msg(Z_tos, __ stackTop());
+ __ pop_l(); // Pop operand.
+}
+
+// Common part of ldiv/lrem.
+//
+// Input:
+// Z_tos := the divisor (dividend still on stack)
+//
+// Updated registers:
+// Z_tmp_1 := pop_l() % Z_tos ; if is_ldiv == false
+// Z_tmp_2 := pop_l() / Z_tos ; if is_ldiv == true
+//
+static void ldiv_helper(InterpreterMacroAssembler * _masm, address exception, bool is_ldiv) {
+ NearLabel not_null, done;
+
+ // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE.
+ assert(Z_tmp_1->successor() == Z_tmp_2,
+ " need even/odd register pair for idiv/irem");
+
+ // Get dividend.
+ __ pop_l(Z_tmp_2);
+
+ // If divisor == 0 throw exception.
+ __ compare64_and_branch(Z_tos, (intptr_t)0, Assembler::bcondNotEqual, not_null);
+ __ load_absolute_address(Z_R1_scratch, exception);
+ __ z_br(Z_R1_scratch);
+
+ __ bind(not_null);
+ // Special case for dividend == 0x8000 and divisor == -1.
+ if (is_ldiv) {
+ // result := Z_tmp_2 := - dividend
+ __ z_lcgr(Z_tmp_2, Z_tmp_2);
+ } else {
+ // result remainder := Z_tmp_1 := 0
+ __ clear_reg(Z_tmp_1, true, false); // Don't set CC.
+ }
+
+ // if divisor == -1 goto done
+ __ compare64_and_branch(Z_tos, -1, Assembler::bcondEqual, done);
+ if (is_ldiv)
+ // Restore sign, because divisor != -1.
+ __ z_lcgr(Z_tmp_2, Z_tmp_2);
+ __ z_dsgr(Z_tmp_1, Z_tos); // Do it.
+ __ bind(done);
+}
+
+void TemplateTable::ldiv() {
+ transition(ltos, ltos);
+
+ ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, true /*is_ldiv*/);
+ __ z_lgr(Z_tos, Z_tmp_2); // Result is in Z_tmp_2.
+}
+
+void TemplateTable::lrem() {
+ transition(ltos, ltos);
+
+ ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, false /*is_ldiv*/);
+ __ z_lgr(Z_tos, Z_tmp_1); // Result is in Z_tmp_1.
+}
+
+void TemplateTable::lshl() {
+ transition(itos, ltos);
+
+ // Z_tos: shift amount
+ __ pop_l(Z_tmp_1); // Get shift value.
+ __ z_sllg(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::lshr() {
+ transition(itos, ltos);
+
+ // Z_tos: shift amount
+ __ pop_l(Z_tmp_1); // Get shift value.
+ __ z_srag(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::lushr() {
+ transition(itos, ltos);
+
+ // Z_tos: shift amount
+ __ pop_l(Z_tmp_1); // Get shift value.
+ __ z_srlg(Z_tos, Z_tmp_1, 0, Z_tos);
+}
+
+void TemplateTable::fop2(Operation op) {
+ transition(ftos, ftos);
+
+ switch (op) {
+ case add:
+ // Add memory operand.
+ __ z_aeb(Z_ftos, __ stackTop()); __ pop_f(); return;
+ case sub:
+ // Sub memory operand.
+ __ z_ler(Z_F1, Z_ftos); // first operand
+ __ pop_f(Z_ftos); // second operand from stack
+ __ z_sebr(Z_ftos, Z_F1);
+ return;
+ case mul:
+ // Multiply with memory operand.
+ __ z_meeb(Z_ftos, __ stackTop()); __ pop_f(); return;
+ case div:
+ __ z_ler(Z_F1, Z_ftos); // first operand
+ __ pop_f(Z_ftos); // second operand from stack
+ __ z_debr(Z_ftos, Z_F1);
+ return;
+ case rem:
+ // Do runtime call.
+ __ z_ler(Z_FARG2, Z_ftos); // divisor
+ __ pop_f(Z_FARG1); // dividend
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
+ // Result should be in the right place (Z_ftos == Z_FRET).
+ return;
+ default:
+ ShouldNotReachHere();
+ return;
+ }
+}
+
+void TemplateTable::dop2(Operation op) {
+ transition(dtos, dtos);
+
+ switch (op) {
+ case add:
+ // Add memory operand.
+ __ z_adb(Z_ftos, __ stackTop()); __ pop_d(); return;
+ case sub:
+ // Sub memory operand.
+ __ z_ldr(Z_F1, Z_ftos); // first operand
+ __ pop_d(Z_ftos); // second operand from stack
+ __ z_sdbr(Z_ftos, Z_F1);
+ return;
+ case mul:
+ // Multiply with memory operand.
+ __ z_mdb(Z_ftos, __ stackTop()); __ pop_d(); return;
+ case div:
+ __ z_ldr(Z_F1, Z_ftos); // first operand
+ __ pop_d(Z_ftos); // second operand from stack
+ __ z_ddbr(Z_ftos, Z_F1);
+ return;
+ case rem:
+ // Do runtime call.
+ __ z_ldr(Z_FARG2, Z_ftos); // divisor
+ __ pop_d(Z_FARG1); // dividend
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
+ // Result should be in the right place (Z_ftos == Z_FRET).
+ return;
+ default:
+ ShouldNotReachHere();
+ return;
+ }
+}
+
+void TemplateTable::ineg() {
+ transition(itos, itos);
+ __ z_lcr(Z_tos);
+}
+
+void TemplateTable::lneg() {
+ transition(ltos, ltos);
+ __ z_lcgr(Z_tos);
+}
+
+void TemplateTable::fneg() {
+ transition(ftos, ftos);
+ __ z_lcebr(Z_ftos, Z_ftos);
+}
+
+void TemplateTable::dneg() {
+ transition(dtos, dtos);
+ __ z_lcdbr(Z_ftos, Z_ftos);
+}
+
+void TemplateTable::iinc() {
+ transition(vtos, vtos);
+
+ Address local;
+ __ z_lb(Z_R0_scratch, at_bcp(2)); // Get constant.
+ locals_index(Z_R1_scratch);
+ local = iaddress(_masm, Z_R1_scratch);
+ __ z_a(Z_R0_scratch, local);
+ __ reg2mem_opt(Z_R0_scratch, local, false);
+}
+
+void TemplateTable::wide_iinc() {
+ transition(vtos, vtos);
+
+ // Z_tmp_1 := increment
+ __ get_2_byte_integer_at_bcp(Z_tmp_1, 4, InterpreterMacroAssembler::Signed);
+ // Z_R1_scratch := index of local to increment
+ locals_index_wide(Z_tmp_2);
+ // Load, increment, and store.
+ __ access_local_int(Z_tmp_2, Z_tos);
+ __ z_agr(Z_tos, Z_tmp_1);
+ // Shifted index is still in Z_tmp_2.
+ __ reg2mem_opt(Z_tos, Address(Z_locals, Z_tmp_2), false);
+}
+
+
+void TemplateTable::convert() {
+ // Checking
+#ifdef ASSERT
+ TosState tos_in = ilgl;
+ TosState tos_out = ilgl;
+
+ switch (bytecode()) {
+ case Bytecodes::_i2l:
+ case Bytecodes::_i2f:
+ case Bytecodes::_i2d:
+ case Bytecodes::_i2b:
+ case Bytecodes::_i2c:
+ case Bytecodes::_i2s:
+ tos_in = itos;
+ break;
+ case Bytecodes::_l2i:
+ case Bytecodes::_l2f:
+ case Bytecodes::_l2d:
+ tos_in = ltos;
+ break;
+ case Bytecodes::_f2i:
+ case Bytecodes::_f2l:
+ case Bytecodes::_f2d:
+ tos_in = ftos;
+ break;
+ case Bytecodes::_d2i:
+ case Bytecodes::_d2l:
+ case Bytecodes::_d2f:
+ tos_in = dtos;
+ break;
+ default :
+ ShouldNotReachHere();
+ }
+ switch (bytecode()) {
+ case Bytecodes::_l2i:
+ case Bytecodes::_f2i:
+ case Bytecodes::_d2i:
+ case Bytecodes::_i2b:
+ case Bytecodes::_i2c:
+ case Bytecodes::_i2s:
+ tos_out = itos;
+ break;
+ case Bytecodes::_i2l:
+ case Bytecodes::_f2l:
+ case Bytecodes::_d2l:
+ tos_out = ltos;
+ break;
+ case Bytecodes::_i2f:
+ case Bytecodes::_l2f:
+ case Bytecodes::_d2f:
+ tos_out = ftos;
+ break;
+ case Bytecodes::_i2d:
+ case Bytecodes::_l2d:
+ case Bytecodes::_f2d:
+ tos_out = dtos;
+ break;
+ default :
+ ShouldNotReachHere();
+ }
+
+ transition(tos_in, tos_out);
+#endif // ASSERT
+
+ // Conversion
+ Label done;
+ switch (bytecode()) {
+ case Bytecodes::_i2l:
+ __ z_lgfr(Z_tos, Z_tos);
+ return;
+ case Bytecodes::_i2f:
+ __ z_cefbr(Z_ftos, Z_tos);
+ return;
+ case Bytecodes::_i2d:
+ __ z_cdfbr(Z_ftos, Z_tos);
+ return;
+ case Bytecodes::_i2b:
+ // Sign extend least significant byte.
+ __ move_reg_if_needed(Z_tos, T_BYTE, Z_tos, T_INT);
+ return;
+ case Bytecodes::_i2c:
+ // Zero extend 2 least significant bytes.
+ __ move_reg_if_needed(Z_tos, T_CHAR, Z_tos, T_INT);
+ return;
+ case Bytecodes::_i2s:
+ // Sign extend 2 least significant bytes.
+ __ move_reg_if_needed(Z_tos, T_SHORT, Z_tos, T_INT);
+ return;
+ case Bytecodes::_l2i:
+ // Sign-extend not needed here, upper 4 bytes of int value in register are ignored.
+ return;
+ case Bytecodes::_l2f:
+ __ z_cegbr(Z_ftos, Z_tos);
+ return;
+ case Bytecodes::_l2d:
+ __ z_cdgbr(Z_ftos, Z_tos);
+ return;
+ case Bytecodes::_f2i:
+ case Bytecodes::_f2l:
+ __ clear_reg(Z_tos, true, false); // Don't set CC.
+ __ z_cebr(Z_ftos, Z_ftos);
+ __ z_brno(done); // NaN -> 0
+ if (bytecode() == Bytecodes::_f2i)
+ __ z_cfebr(Z_tos, Z_ftos, Assembler::to_zero);
+ else // bytecode() == Bytecodes::_f2l
+ __ z_cgebr(Z_tos, Z_ftos, Assembler::to_zero);
+ break;
+ case Bytecodes::_f2d:
+ __ move_freg_if_needed(Z_ftos, T_DOUBLE, Z_ftos, T_FLOAT);
+ return;
+ case Bytecodes::_d2i:
+ case Bytecodes::_d2l:
+ __ clear_reg(Z_tos, true, false); // Ddon't set CC.
+ __ z_cdbr(Z_ftos, Z_ftos);
+ __ z_brno(done); // NaN -> 0
+ if (bytecode() == Bytecodes::_d2i)
+ __ z_cfdbr(Z_tos, Z_ftos, Assembler::to_zero);
+ else // Bytecodes::_d2l
+ __ z_cgdbr(Z_tos, Z_ftos, Assembler::to_zero);
+ break;
+ case Bytecodes::_d2f:
+ __ move_freg_if_needed(Z_ftos, T_FLOAT, Z_ftos, T_DOUBLE);
+ return;
+ default:
+ ShouldNotReachHere();
+ }
+ __ bind(done);
+}
+
+void TemplateTable::lcmp() {
+ transition(ltos, itos);
+
+ Label done;
+ Register val1 = Z_R0_scratch;
+ Register val2 = Z_R1_scratch;
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ pop_l(val1); // pop value 1.
+ __ z_lghi(val2, -1); // lt value
+ __ z_cgr(val1, Z_tos); // Compare with Z_tos (value 2). Protect CC under all circumstances.
+ __ z_lghi(val1, 1); // gt value
+ __ z_lghi(Z_tos, 0); // eq value
+
+ __ z_locgr(Z_tos, val1, Assembler::bcondHigh);
+ __ z_locgr(Z_tos, val2, Assembler::bcondLow);
+ } else {
+ __ pop_l(val1); // Pop value 1.
+ __ z_cgr(val1, Z_tos); // Compare with Z_tos (value 2). Protect CC under all circumstances.
+
+ __ z_lghi(Z_tos, 0); // eq value
+ __ z_bre(done);
+
+ __ z_lghi(Z_tos, 1); // gt value
+ __ z_brh(done);
+
+ __ z_lghi(Z_tos, -1); // lt value
+ }
+
+ __ bind(done);
+}
+
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+ Label done;
+
+ if (is_float) {
+ __ pop_f(Z_FARG2);
+ __ z_cebr(Z_FARG2, Z_ftos);
+ } else {
+ __ pop_d(Z_FARG2);
+ __ z_cdbr(Z_FARG2, Z_ftos);
+ }
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ Register one = Z_R0_scratch;
+ Register minus_one = Z_R1_scratch;
+ __ z_lghi(minus_one, -1);
+ __ z_lghi(one, 1);
+ __ z_lghi(Z_tos, 0);
+ __ z_locgr(Z_tos, one, unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh);
+ __ z_locgr(Z_tos, minus_one, unordered_result == 1 ? Assembler::bcondLow : Assembler::bcondLowOrNotOrdered);
+ } else {
+ // Z_FARG2 == Z_ftos
+ __ clear_reg(Z_tos, false, false);
+ __ z_bre(done);
+
+ // F_ARG2 > Z_Ftos, or unordered
+ __ z_lhi(Z_tos, 1);
+ __ z_brc(unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh, done);
+
+ // F_ARG2 < Z_FTOS, or unordered
+ __ z_lhi(Z_tos, -1);
+
+ __ bind(done);
+ }
+}
+
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+ const Register bumped_count = Z_tmp_1;
+ const Register method = Z_tmp_2;
+ const Register m_counters = Z_R1_scratch;
+ const Register mdo = Z_tos;
+
+ BLOCK_COMMENT("TemplateTable::branch {");
+ __ get_method(method);
+ __ profile_taken_branch(mdo, bumped_count);
+
+ const ByteSize ctr_offset = InvocationCounter::counter_offset();
+ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ctr_offset;
+ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ctr_offset;
+
+ // Get (wide) offset to disp.
+ const Register disp = Z_ARG5;
+ if (is_wide) {
+ __ get_4_byte_integer_at_bcp(disp, 1);
+ } else {
+ __ get_2_byte_integer_at_bcp(disp, 1, InterpreterMacroAssembler::Signed);
+ }
+
+ // Handle all the JSR stuff here, then exit.
+ // It's much shorter and cleaner than intermingling with the
+ // non-JSR normal-branch stuff occurring below.
+ if (is_jsr) {
+ // Compute return address as bci in Z_tos.
+ __ z_lgr(Z_R1_scratch, Z_bcp);
+ __ z_sg(Z_R1_scratch, Address(method, Method::const_offset()));
+ __ add2reg(Z_tos, (is_wide ? 5 : 3) - in_bytes(ConstMethod::codes_offset()), Z_R1_scratch);
+
+ // Bump bcp to target of JSR.
+ __ z_agr(Z_bcp, disp);
+ // Push return address for "ret" on stack.
+ __ push_ptr(Z_tos);
+ // And away we go!
+ __ dispatch_next(vtos);
+ return;
+ }
+
+ // Normal (non-jsr) branch handling.
+
+ // Bump bytecode pointer by displacement (take the branch).
+ __ z_agr(Z_bcp, disp);
+
+ assert(UseLoopCounter || !UseOnStackReplacement,
+ "on-stack-replacement requires loop counters");
+
+ NearLabel backedge_counter_overflow;
+ NearLabel profile_method;
+ NearLabel dispatch;
+ int increment = InvocationCounter::count_increment;
+
+ if (UseLoopCounter) {
+ // Increment backedge counter for backward branches.
+ // disp: target offset
+ // Z_bcp: target bcp
+ // Z_locals: locals pointer
+ //
+ // Count only if backward branch.
+ __ compare32_and_branch(disp, (intptr_t)0, Assembler::bcondHigh, dispatch);
+
+ if (TieredCompilation) {
+ Label noCounters;
+
+ if (ProfileInterpreter) {
+ NearLabel no_mdo;
+
+ // Are we profiling?
+ __ load_and_test_long(mdo, Address(method, Method::method_data_offset()));
+ __ branch_optimized(Assembler::bcondZero, no_mdo);
+
+ // Increment the MDO backedge counter.
+ const Address mdo_backedge_counter(mdo, MethodData::backedge_counter_offset() + InvocationCounter::counter_offset());
+
+ const Address mask(mdo, MethodData::backedge_mask_offset());
+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+ Z_ARG2, false, Assembler::bcondZero,
+ UseOnStackReplacement ? &backedge_counter_overflow : NULL);
+ __ z_bru(dispatch);
+ __ bind(no_mdo);
+ }
+
+ // Increment backedge counter in MethodCounters*.
+ __ get_method_counters(method, m_counters, noCounters);
+ const Address mask(m_counters, MethodCounters::backedge_mask_offset());
+ __ increment_mask_and_jump(Address(m_counters, be_offset),
+ increment, mask,
+ Z_ARG2, false, Assembler::bcondZero,
+ UseOnStackReplacement ? &backedge_counter_overflow : NULL);
+ __ bind(noCounters);
+ } else {
+ Register counter = Z_tos;
+ Label noCounters;
+ // Get address of MethodCounters object.
+ __ get_method_counters(method, m_counters, noCounters);
+ // Increment backedge counter.
+ __ increment_backedge_counter(m_counters, counter);
+
+ if (ProfileInterpreter) {
+ // Test to see if we should create a method data obj.
+ __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_profile_limit_offset()));
+ __ z_brl(dispatch);
+
+ // If no method data exists, go to profile method.
+ __ test_method_data_pointer(Z_ARG4/*result unused*/, profile_method);
+
+ if (UseOnStackReplacement) {
+ // Check for overflow against 'bumped_count' which is the MDO taken count.
+ __ z_cl(bumped_count, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset()));
+ __ z_brl(dispatch);
+
+ // When ProfileInterpreter is on, the backedge_count comes
+ // from the methodDataOop, which value does not get reset on
+ // the call to frequency_counter_overflow(). To avoid
+ // excessive calls to the overflow routine while the method is
+ // being compiled, add a second test to make sure the overflow
+ // function is called only once every overflow_frequency.
+ const int overflow_frequency = 1024;
+ __ and_imm(bumped_count, overflow_frequency - 1);
+ __ z_brz(backedge_counter_overflow);
+
+ }
+ } else {
+ if (UseOnStackReplacement) {
+ // Check for overflow against 'counter', which is the sum of the
+ // counters.
+ __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset()));
+ __ z_brh(backedge_counter_overflow);
+ }
+ }
+ __ bind(noCounters);
+ }
+
+ __ bind(dispatch);
+ }
+
+ // Pre-load the next target bytecode into rbx.
+ __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0));
+
+ // Continue with the bytecode @ target.
+ // Z_tos: Return bci for jsr's, unused otherwise.
+ // Z_bytecode: target bytecode
+ // Z_bcp: target bcp
+ __ dispatch_only(vtos);
+
+ // Out-of-line code runtime calls.
+ if (UseLoopCounter) {
+ if (ProfileInterpreter) {
+ // Out-of-line code to allocate method data oop.
+ __ bind(profile_method);
+
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+ __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0)); // Restore target bytecode.
+ __ set_method_data_pointer_for_bcp();
+ __ z_bru(dispatch);
+ }
+
+ if (UseOnStackReplacement) {
+
+ // invocation counter overflow
+ __ bind(backedge_counter_overflow);
+
+ __ z_lcgr(Z_ARG2, disp); // Z_ARG2 := -disp
+ __ z_agr(Z_ARG2, Z_bcp); // Z_ARG2 := branch target bcp - disp == branch bcp
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
+ Z_ARG2);
+
+ // Z_RET: osr nmethod (osr ok) or NULL (osr not possible).
+ __ compare64_and_branch(Z_RET, (intptr_t) 0, Assembler::bcondEqual, dispatch);
+
+ // Nmethod may have been invalidated (VM may block upon call_VM return).
+ __ z_cliy(nmethod::state_offset(), Z_RET, nmethod::in_use);
+ __ z_brne(dispatch);
+
+ // Migrate the interpreter frame off of the stack.
+
+ __ z_lgr(Z_tmp_1, Z_RET); // Save the nmethod.
+
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+ // Z_RET is OSR buffer, move it to expected parameter location.
+ __ lgr_if_needed(Z_ARG1, Z_RET);
+
+ // Pop the interpreter frame ...
+ __ pop_interpreter_frame(Z_R14, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/);
+
+ // ... and begin the OSR nmethod.
+ __ z_lg(Z_R1_scratch, Address(Z_tmp_1, nmethod::osr_entry_point_offset()));
+ __ z_br(Z_R1_scratch);
+ }
+ }
+ BLOCK_COMMENT("} TemplateTable::branch");
+}
+
+void TemplateTable::if_0cmp(Condition cc) {
+ transition(itos, vtos);
+
+ // Assume branch is more often taken than not (loops use backward branches).
+ NearLabel not_taken;
+ __ compare32_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken);
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_icmp(Condition cc) {
+ transition(itos, vtos);
+
+ // Assume branch is more often taken than not (loops use backward branches).
+ NearLabel not_taken;
+ __ pop_i(Z_R0_scratch);
+ __ compare32_and_branch(Z_R0_scratch, Z_tos, j_not(cc), not_taken);
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_nullcmp(Condition cc) {
+ transition(atos, vtos);
+
+ // Assume branch is more often taken than not (loops use backward branches) .
+ NearLabel not_taken;
+ __ compare64_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken);
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(Z_tos);
+}
+
+void TemplateTable::if_acmp(Condition cc) {
+ transition(atos, vtos);
+ // Assume branch is more often taken than not (loops use backward branches).
+ NearLabel not_taken;
+ __ pop_ptr(Z_ARG2);
+ __ verify_oop(Z_ARG2);
+ __ verify_oop(Z_tos);
+ __ compareU64_and_branch(Z_tos, Z_ARG2, j_not(cc), not_taken);
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(Z_ARG3);
+}
+
+void TemplateTable::ret() {
+ transition(vtos, vtos);
+
+ locals_index(Z_tmp_1);
+ // Get return bci, compute return bcp. Must load 64 bits.
+ __ mem2reg_opt(Z_tmp_1, iaddress(_masm, Z_tmp_1));
+ __ profile_ret(Z_tmp_1, Z_tmp_2);
+ __ get_method(Z_tos);
+ __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
+ __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
+ __ dispatch_next(vtos);
+}
+
+void TemplateTable::wide_ret() {
+ transition(vtos, vtos);
+
+ locals_index_wide(Z_tmp_1);
+ // Get return bci, compute return bcp.
+ __ mem2reg_opt(Z_tmp_1, aaddress(_masm, Z_tmp_1));
+ __ profile_ret(Z_tmp_1, Z_tmp_2);
+ __ get_method(Z_tos);
+ __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
+ __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
+ __ dispatch_next(vtos);
+}
+
+void TemplateTable::tableswitch () {
+ transition(itos, vtos);
+
+ NearLabel default_case, continue_execution;
+ Register bcp = Z_ARG5;
+ // Align bcp.
+ __ load_address(bcp, at_bcp(BytesPerInt));
+ __ z_nill(bcp, (-BytesPerInt) & 0xffff);
+
+ // Load lo & hi.
+ Register low = Z_tmp_1;
+ Register high = Z_tmp_2;
+
+ // Load low into 64 bits, since used for address calculation.
+ __ mem2reg_signed_opt(low, Address(bcp, BytesPerInt));
+ __ mem2reg_opt(high, Address(bcp, 2 * BytesPerInt), false);
+ // Sign extend "label" value for address calculation.
+ __ z_lgfr(Z_tos, Z_tos);
+
+ // Check against lo & hi.
+ __ compare32_and_branch(Z_tos, low, Assembler::bcondLow, default_case);
+ __ compare32_and_branch(Z_tos, high, Assembler::bcondHigh, default_case);
+
+ // Lookup dispatch offset.
+ __ z_sgr(Z_tos, low);
+ Register jump_table_offset = Z_ARG3;
+ // Index2offset; index in Z_tos is killed by profile_switch_case.
+ __ z_sllg(jump_table_offset, Z_tos, LogBytesPerInt);
+ __ profile_switch_case(Z_tos, Z_ARG4 /*tmp for mdp*/, low/*tmp*/, Z_bytecode/*tmp*/);
+
+ Register index = Z_tmp_2;
+
+ // Load index sign extended for addressing.
+ __ mem2reg_signed_opt(index, Address(bcp, jump_table_offset, 3 * BytesPerInt));
+
+ // Continue execution.
+ __ bind(continue_execution);
+
+ // Load next bytecode.
+ __ z_llgc(Z_bytecode, Address(Z_bcp, index));
+ __ z_agr(Z_bcp, index); // Advance bcp.
+ __ dispatch_only(vtos);
+
+ // Handle default.
+ __ bind(default_case);
+
+ __ profile_switch_default(Z_tos);
+ __ mem2reg_signed_opt(index, Address(bcp));
+ __ z_bru(continue_execution);
+}
+
+void TemplateTable::lookupswitch () {
+ transition(itos, itos);
+ __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+void TemplateTable::fast_linearswitch () {
+ transition(itos, vtos);
+
+ Label loop_entry, loop, found, continue_execution;
+ Register bcp = Z_ARG5;
+
+ // Align bcp.
+ __ load_address(bcp, at_bcp(BytesPerInt));
+ __ z_nill(bcp, (-BytesPerInt) & 0xffff);
+
+ // Start search with last case.
+ Register current_case_offset = Z_tmp_1;
+
+ __ mem2reg_signed_opt(current_case_offset, Address(bcp, BytesPerInt));
+ __ z_sllg(current_case_offset, current_case_offset, LogBytesPerWord); // index2bytes
+ __ z_bru(loop_entry);
+
+ // table search
+ __ bind(loop);
+
+ __ z_c(Z_tos, Address(bcp, current_case_offset, 2 * BytesPerInt));
+ __ z_bre(found);
+
+ __ bind(loop_entry);
+ __ z_aghi(current_case_offset, -2 * BytesPerInt); // Decrement.
+ __ z_brnl(loop);
+
+ // default case
+ Register offset = Z_tmp_2;
+
+ __ profile_switch_default(Z_tos);
+ // Load offset sign extended for addressing.
+ __ mem2reg_signed_opt(offset, Address(bcp));
+ __ z_bru(continue_execution);
+
+ // Entry found -> get offset.
+ __ bind(found);
+ __ mem2reg_signed_opt(offset, Address(bcp, current_case_offset, 3 * BytesPerInt));
+ // Profile that this case was taken.
+ Register current_case_idx = Z_ARG4;
+ __ z_srlg(current_case_idx, current_case_offset, LogBytesPerWord); // bytes2index
+ __ profile_switch_case(current_case_idx, Z_tos, bcp, Z_bytecode);
+
+ // Continue execution.
+ __ bind(continue_execution);
+
+ // Load next bytecode.
+ __ z_llgc(Z_bytecode, Address(Z_bcp, offset, 0));
+ __ z_agr(Z_bcp, offset); // Advance bcp.
+ __ dispatch_only(vtos);
+}
+
+
+void TemplateTable::fast_binaryswitch() {
+
+ transition(itos, vtos);
+
+ // Implementation using the following core algorithm:
+ //
+ // int binary_search(int key, LookupswitchPair* array, int n) {
+ // // Binary search according to "Methodik des Programmierens" by
+ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+ // int i = 0;
+ // int j = n;
+ // while (i+1 < j) {
+ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+ // // with Q: for all i: 0 <= i < n: key < a[i]
+ // // where a stands for the array and assuming that the (inexisting)
+ // // element a[n] is infinitely big.
+ // int h = (i + j) >> 1;
+ // // i < h < j
+ // if (key < array[h].fast_match()) {
+ // j = h;
+ // } else {
+ // i = h;
+ // }
+ // }
+ // // R: a[i] <= key < a[i+1] or Q
+ // // (i.e., if key is within array, i is the correct index)
+ // return i;
+ // }
+
+ // Register allocation
+ // Note: Since we use the indices in address operands, we do all the
+ // computation in 64 bits.
+ const Register key = Z_tos; // Already set (tosca).
+ const Register array = Z_tmp_1;
+ const Register i = Z_tmp_2;
+ const Register j = Z_ARG5;
+ const Register h = Z_ARG4;
+ const Register temp = Z_R1_scratch;
+
+ // Find array start.
+ __ load_address(array, at_bcp(3 * BytesPerInt));
+ __ z_nill(array, (-BytesPerInt) & 0xffff); // align
+
+ // Initialize i & j.
+ __ clear_reg(i, true, false); // i = 0; Don't set CC.
+ __ mem2reg_signed_opt(j, Address(array, -BytesPerInt)); // j = length(array);
+
+ // And start.
+ Label entry;
+ __ z_bru(entry);
+
+ // binary search loop
+ {
+ NearLabel loop;
+
+ __ bind(loop);
+
+ // int h = (i + j) >> 1;
+ __ add2reg_with_index(h, 0, i, j); // h = i + j;
+ __ z_srag(h, h, 1); // h = (i + j) >> 1;
+
+ // if (key < array[h].fast_match()) {
+ // j = h;
+ // } else {
+ // i = h;
+ // }
+
+ // Convert array[h].match to native byte-ordering before compare.
+ __ z_sllg(temp, h, LogBytesPerWord); // index2bytes
+ __ mem2reg_opt(temp, Address(array, temp), false);
+
+ NearLabel else_;
+
+ __ compare32_and_branch(key, temp, Assembler::bcondNotLow, else_);
+ // j = h if (key < array[h].fast_match())
+ __ z_lgr(j, h);
+ __ z_bru(entry); // continue
+
+ __ bind(else_);
+
+ // i = h if (key >= array[h].fast_match())
+ __ z_lgr(i, h); // and fallthrough
+
+ // while (i+1 < j)
+ __ bind(entry);
+
+ // if (i + 1 < j) continue search
+ __ add2reg(h, 1, i);
+ __ compare64_and_branch(h, j, Assembler::bcondLow, loop);
+ }
+
+ // End of binary search, result index is i (must check again!).
+ NearLabel default_case;
+
+ // h is no longer needed, so use it to hold the byte offset.
+ __ z_sllg(h, i, LogBytesPerWord); // index2bytes
+ __ mem2reg_opt(temp, Address(array, h), false);
+ __ compare32_and_branch(key, temp, Assembler::bcondNotEqual, default_case);
+
+ // entry found -> j = offset
+ __ mem2reg_signed_opt(j, Address(array, h, BytesPerInt));
+ __ profile_switch_case(i, key, array, Z_bytecode);
+ // Load next bytecode.
+ __ z_llgc(Z_bytecode, Address(Z_bcp, j));
+ __ z_agr(Z_bcp, j); // Advance bcp.
+ __ dispatch_only(vtos);
+
+ // default case -> j = default offset
+ __ bind(default_case);
+
+ __ profile_switch_default(i);
+ __ mem2reg_signed_opt(j, Address(array, -2 * BytesPerInt));
+ // Load next bytecode.
+ __ z_llgc(Z_bytecode, Address(Z_bcp, j));
+ __ z_agr(Z_bcp, j); // Advance bcp.
+ __ dispatch_only(vtos);
+}
+
+void TemplateTable::_return(TosState state) {
+ transition(state, state);
+ assert(_desc->calls_vm(),
+ "inconsistent calls_vm information"); // call in remove_activation
+
+ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+ Register Rthis = Z_ARG2;
+ Register Rklass = Z_ARG5;
+ Label skip_register_finalizer;
+ assert(state == vtos, "only valid state");
+ __ z_lg(Rthis, aaddress(0));
+ __ load_klass(Rklass, Rthis);
+ __ testbit(Address(Rklass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER));
+ __ z_bfalse(skip_register_finalizer);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Rthis);
+ __ bind(skip_register_finalizer);
+ }
+
+ __ remove_activation(state, Z_R14);
+ __ z_br(Z_R14);
+}
+
+// ----------------------------------------------------------------------------
+// NOTE: Cpe_offset is already computed as byte offset, so we must not
+// shift it afterwards!
+void TemplateTable::resolve_cache_and_index(int byte_no,
+ Register Rcache,
+ Register cpe_offset,
+ size_t index_size) {
+ BLOCK_COMMENT("resolve_cache_and_index {");
+ NearLabel resolved;
+ const Register bytecode_in_cpcache = Z_R1_scratch;
+ const int total_f1_offset = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset());
+ assert_different_registers(Rcache, cpe_offset, bytecode_in_cpcache);
+
+ Bytecodes::Code code = bytecode();
+ switch (code) {
+ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+ }
+
+ {
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, cpe_offset, bytecode_in_cpcache, byte_no, 1, index_size);
+ // Have we resolved this bytecode?
+ __ compare32_and_branch(bytecode_in_cpcache, (int)code, Assembler::bcondEqual, resolved);
+ }
+
+ // Resolve first time through.
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+ __ load_const_optimized(Z_ARG2, (int) code);
+ __ call_VM(noreg, entry, Z_ARG2);
+
+ // Update registers with resolved info.
+ __ get_cache_and_index_at_bcp(Rcache, cpe_offset, 1, index_size);
+ __ bind(resolved);
+ BLOCK_COMMENT("} resolve_cache_and_index");
+}
+
+// The Rcache and index registers must be set before call.
+// Index is already a byte offset, don't shift!
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+ Register cache,
+ Register index,
+ Register off,
+ Register flags,
+ bool is_static = false) {
+ assert_different_registers(cache, index, flags, off);
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+ // Field offset
+ __ mem2reg_opt(off, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+ // Flags. Must load 64 bits.
+ __ mem2reg_opt(flags, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+ // klass overwrite register
+ if (is_static) {
+ __ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
+ __ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
+ }
+}
+
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+ Register method,
+ Register itable_index,
+ Register flags,
+ bool is_invokevirtual,
+ bool is_invokevfinal, // unused
+ bool is_invokedynamic) {
+ BLOCK_COMMENT("load_invoke_cp_cache_entry {");
+ // Setup registers.
+ const Register cache = Z_ARG1;
+ const Register cpe_offset= flags;
+ const ByteSize base_off = ConstantPoolCache::base_offset();
+ const ByteSize f1_off = ConstantPoolCacheEntry::f1_offset();
+ const ByteSize f2_off = ConstantPoolCacheEntry::f2_offset();
+ const ByteSize flags_off = ConstantPoolCacheEntry::flags_offset();
+ const int method_offset = in_bytes(base_off + ((byte_no == f2_byte) ? f2_off : f1_off));
+ const int flags_offset = in_bytes(base_off + flags_off);
+ // Access constant pool cache fields.
+ const int index_offset = in_bytes(base_off + f2_off);
+
+ assert_different_registers(method, itable_index, flags, cache);
+ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+
+ if (is_invokevfinal) {
+ // Already resolved.
+ assert(itable_index == noreg, "register not used");
+ __ get_cache_and_index_at_bcp(cache, cpe_offset, 1);
+ } else {
+ // Need to resolve.
+ resolve_cache_and_index(byte_no, cache, cpe_offset, is_invokedynamic ? sizeof(u4) : sizeof(u2));
+ }
+ __ z_lg(method, Address(cache, cpe_offset, method_offset));
+
+ if (itable_index != noreg) {
+ __ z_lg(itable_index, Address(cache, cpe_offset, index_offset));
+ }
+
+ // Only load the lower 4 bytes and fill high bytes of flags with zeros.
+ // Callers depend on this zero-extension!!!
+ // Attention: overwrites cpe_offset == flags
+ __ z_llgf(flags, Address(cache, cpe_offset, flags_offset + (BytesPerLong-BytesPerInt)));
+
+ BLOCK_COMMENT("} load_invoke_cp_cache_entry");
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+ bool is_static, bool has_tos) {
+
+ // Do the JVMTI work here to avoid disturbing the register state below.
+ // We use c_rarg registers here because we want to use the register used in
+ // the call to the VM
+ if (!JvmtiExport::can_post_field_access()) {
+ return;
+ }
+
+ // Check to see if a field access watch has been set before we
+ // take the time to call into the VM.
+ Label exit;
+ assert_different_registers(cache, index, Z_tos);
+ __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_access_count_addr());
+ __ load_and_test_int(Z_R0, Address(Z_tos));
+ __ z_brz(exit);
+
+ // Index is returned as byte offset, do not shift!
+ __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1);
+
+ // cache entry pointer
+ __ add2reg_with_index(Z_ARG3,
+ in_bytes(ConstantPoolCache::base_offset()),
+ Z_ARG3, Z_R1_scratch);
+
+ if (is_static) {
+ __ clear_reg(Z_ARG2, true, false); // NULL object reference. Don't set CC.
+ } else {
+ __ mem2reg_opt(Z_ARG2, at_tos()); // Get object pointer without popping it.
+ __ verify_oop(Z_ARG2);
+ }
+ // Z_ARG2: object pointer or NULL
+ // Z_ARG3: cache entry pointer
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+ Z_ARG2, Z_ARG3);
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+
+ __ bind(exit);
+}
+
+void TemplateTable::pop_and_check_object(Register r) {
+ __ pop_ptr(r);
+ __ null_check(r); // for field access must check obj.
+ __ verify_oop(r);
+}
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+ transition(vtos, vtos);
+
+ const Register cache = Z_tmp_1;
+ const Register index = Z_tmp_2;
+ const Register obj = Z_tmp_1;
+ const Register off = Z_ARG2;
+ const Register flags = Z_ARG1;
+ const Register bc = Z_tmp_1; // Uses same reg as obj, so don't mix them.
+
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+ jvmti_post_field_access(cache, index, is_static, false);
+ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+ if (!is_static) {
+ // Obj is on the stack.
+ pop_and_check_object(obj);
+ }
+
+ // Displacement is 0, so any store instruction will be fine on any CPU.
+ const Address field(obj, off);
+
+ Label is_Byte, is_Bool, is_Int, is_Short, is_Char,
+ is_Long, is_Float, is_Object, is_Double;
+ Label is_badState8, is_badState9, is_badStateA, is_badStateB,
+ is_badStateC, is_badStateD, is_badStateE, is_badStateF,
+ is_badState;
+ Label branchTable, atosHandler, Done;
+ Register br_tab = Z_R1_scratch;
+ bool do_rewrite = !is_static && (rc == may_rewrite);
+ bool dont_rewrite = (is_static || (rc == may_not_rewrite));
+
+ assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that");
+ assert(btos == 0, "change code, btos != 0");
+
+ // Calculate branch table size. Generated code size depends on ASSERT and on bytecode rewriting.
+#ifdef ASSERT
+ const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#else
+ const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#endif
+
+ // Calculate address of branch table entry and branch there.
+ {
+ const int bit_shift = exact_log2(bsize); // Size of each branch table entry.
+ const int r_bitpos = 63 - bit_shift;
+ const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+ const int n_rotate = (bit_shift-ConstantPoolCacheEntry::tos_state_shift);
+ __ z_larl(br_tab, branchTable);
+ __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true);
+ }
+ __ z_bc(Assembler::bcondAlways, 0, flags, br_tab);
+
+ __ align_address(bsize);
+ BIND(branchTable);
+
+ // btos
+ BTB_BEGIN(is_Byte, bsize, "getfield_or_static:is_Byte");
+ __ z_lb(Z_tos, field);
+ __ push(btos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Byte, bsize, "getfield_or_static:is_Byte");
+
+ // ztos
+ BTB_BEGIN(is_Bool, bsize, "getfield_or_static:is_Bool");
+ __ z_lb(Z_tos, field);
+ __ push(ztos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ // Use btos rewriting, no truncating to t/f bit is needed for getfield.
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Bool, bsize, "getfield_or_static:is_Bool");
+
+ // ctos
+ BTB_BEGIN(is_Char, bsize, "getfield_or_static:is_Char");
+ // Load into 64 bits, works on all CPUs.
+ __ z_llgh(Z_tos, field);
+ __ push(ctos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Char, bsize, "getfield_or_static:is_Char");
+
+ // stos
+ BTB_BEGIN(is_Short, bsize, "getfield_or_static:is_Short");
+ __ z_lh(Z_tos, field);
+ __ push(stos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Short, bsize, "getfield_or_static:is_Short");
+
+ // itos
+ BTB_BEGIN(is_Int, bsize, "getfield_or_static:is_Int");
+ __ mem2reg_opt(Z_tos, field, false);
+ __ push(itos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_igetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Int, bsize, "getfield_or_static:is_Int");
+
+ // ltos
+ BTB_BEGIN(is_Long, bsize, "getfield_or_static:is_Long");
+ __ mem2reg_opt(Z_tos, field);
+ __ push(ltos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Long, bsize, "getfield_or_static:is_Long");
+
+ // ftos
+ BTB_BEGIN(is_Float, bsize, "getfield_or_static:is_Float");
+ __ mem2freg_opt(Z_ftos, field, false);
+ __ push(ftos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Float, bsize, "getfield_or_static:is_Float");
+
+ // dtos
+ BTB_BEGIN(is_Double, bsize, "getfield_or_static:is_Double");
+ __ mem2freg_opt(Z_ftos, field);
+ __ push(dtos);
+ // Rewrite bytecode to be faster.
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dgetfield, bc, Z_ARG5);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Double, bsize, "getfield_or_static:is_Double");
+
+ // atos
+ BTB_BEGIN(is_Object, bsize, "getfield_or_static:is_Object");
+ __ z_bru(atosHandler);
+ BTB_END(is_Object, bsize, "getfield_or_static:is_Object");
+
+ // Bad state detection comes at no extra runtime cost.
+ BTB_BEGIN(is_badState8, bsize, "getfield_or_static:is_badState8");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badState8, bsize, "getfield_or_static:is_badState8");
+ BTB_BEGIN(is_badState9, bsize, "getfield_or_static:is_badState9");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badState9, bsize, "getfield_or_static:is_badState9");
+ BTB_BEGIN(is_badStateA, bsize, "getfield_or_static:is_badStateA");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateA, bsize, "getfield_or_static:is_badStateA");
+ BTB_BEGIN(is_badStateB, bsize, "getfield_or_static:is_badStateB");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateB, bsize, "getfield_or_static:is_badStateB");
+ BTB_BEGIN(is_badStateC, bsize, "getfield_or_static:is_badStateC");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateC, bsize, "getfield_or_static:is_badStateC");
+ BTB_BEGIN(is_badStateD, bsize, "getfield_or_static:is_badStateD");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateD, bsize, "getfield_or_static:is_badStateD");
+ BTB_BEGIN(is_badStateE, bsize, "getfield_or_static:is_badStateE");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateE, bsize, "getfield_or_static:is_badStateE");
+ BTB_BEGIN(is_badStateF, bsize, "getfield_or_static:is_badStateF");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateF, bsize, "getfield_or_static:is_badStateF");
+
+ __ align_address(64);
+ BIND(is_badState); // Do this outside branch table. Needs a lot of space.
+ {
+ unsigned int b_off = __ offset();
+ if (is_static) {
+ __ stop_static("Bad state in getstatic");
+ } else {
+ __ stop_static("Bad state in getfield");
+ }
+ unsigned int e_off = __ offset();
+ }
+
+ __ align_address(64);
+ BIND(atosHandler); // Oops are really complicated to handle.
+ // There is a lot of code generated.
+ // Therefore: generate the handler outside of branch table.
+ // There is no performance penalty. The additional branch
+ // to here is compensated for by the fallthru to "Done".
+ {
+ unsigned int b_off = __ offset();
+ __ load_heap_oop(Z_tos, field);
+ __ verify_oop(Z_tos);
+ __ push(atos);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_agetfield, bc, Z_ARG5);
+ }
+ unsigned int e_off = __ offset();
+ }
+
+ BIND(Done);
+}
+
+void TemplateTable::getfield(int byte_no) {
+ BLOCK_COMMENT("getfield {");
+ getfield_or_static(byte_no, false);
+ BLOCK_COMMENT("} getfield");
+}
+
+void TemplateTable::nofast_getfield(int byte_no) {
+ getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+ BLOCK_COMMENT("getstatic {");
+ getfield_or_static(byte_no, true);
+ BLOCK_COMMENT("} getstatic");
+}
+
+// The registers cache and index expected to be set before call. The
+// function may destroy various registers, just not the cache and
+// index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache,
+ Register index, bool is_static) {
+ transition(vtos, vtos);
+
+ if (!JvmtiExport::can_post_field_modification()) {
+ return;
+ }
+
+ BLOCK_COMMENT("jvmti_post_field_mod {");
+
+ // Check to see if a field modification watch has been set before
+ // we take the time to call into the VM.
+ Label L1;
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+ assert_different_registers(cache, index, Z_tos);
+
+ __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_modification_count_addr());
+ __ load_and_test_int(Z_R0, Address(Z_tos));
+ __ z_brz(L1);
+
+ // Index is returned as byte offset, do not shift!
+ __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1);
+
+ if (is_static) {
+ // Life is simple. Null out the object pointer.
+ __ clear_reg(Z_ARG2, true, false); // Don't set CC.
+ } else {
+ // Life is harder. The stack holds the value on top, followed by
+ // the object. We don't know the size of the value, though. It
+ // could be one or two words depending on its type. As a result,
+ // we must find the type to determine where the object is.
+ __ mem2reg_opt(Z_ARG4,
+ Address(Z_ARG3, Z_R1_scratch,
+ in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()) +
+ (BytesPerLong - BytesPerInt)),
+ false);
+ __ z_srl(Z_ARG4, ConstantPoolCacheEntry::tos_state_shift);
+ // Make sure we don't need to mask Z_ARG4 for tos_state after the above shift.
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+ __ mem2reg_opt(Z_ARG2, at_tos(1)); // Initially assume a one word jvalue.
+
+ NearLabel load_dtos, cont;
+
+ __ compareU32_and_branch(Z_ARG4, (intptr_t) ltos,
+ Assembler::bcondNotEqual, load_dtos);
+ __ mem2reg_opt(Z_ARG2, at_tos(2)); // ltos (two word jvalue)
+ __ z_bru(cont);
+
+ __ bind(load_dtos);
+ __ compareU32_and_branch(Z_ARG4, (intptr_t)dtos, Assembler::bcondNotEqual, cont);
+ __ mem2reg_opt(Z_ARG2, at_tos(2)); // dtos (two word jvalue)
+
+ __ bind(cont);
+ }
+ // cache entry pointer
+
+ __ add2reg_with_index(Z_ARG3, in_bytes(cp_base_offset), Z_ARG3, Z_R1_scratch);
+
+ // object(tos)
+ __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize));
+ // Z_ARG2: object pointer set up above (NULL if static)
+ // Z_ARG3: cache entry pointer
+ // Z_ARG4: jvalue object on the stack
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
+ Z_ARG2, Z_ARG3, Z_ARG4);
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+
+ __ bind(L1);
+ BLOCK_COMMENT("} jvmti_post_field_mod");
+}
+
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+ transition(vtos, vtos);
+
+ const Register cache = Z_tmp_1;
+ const Register index = Z_ARG5;
+ const Register obj = Z_tmp_1;
+ const Register off = Z_tmp_2;
+ const Register flags = Z_R1_scratch;
+ const Register br_tab = Z_ARG5;
+ const Register bc = Z_tmp_1;
+ const Register oopStore_tmp1 = Z_R1_scratch;
+ const Register oopStore_tmp2 = Z_ARG5;
+ const Register oopStore_tmp3 = Z_R0_scratch;
+
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+ jvmti_post_field_mod(cache, index, is_static);
+ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+ // begin of life for:
+ // obj, off long life range
+ // flags short life range, up to branch into branch table
+ // end of life for:
+ // cache, index
+
+ const Address field(obj, off);
+ Label is_Byte, is_Bool, is_Int, is_Short, is_Char,
+ is_Long, is_Float, is_Object, is_Double;
+ Label is_badState8, is_badState9, is_badStateA, is_badStateB,
+ is_badStateC, is_badStateD, is_badStateE, is_badStateF,
+ is_badState;
+ Label branchTable, atosHandler, Done;
+ bool do_rewrite = !is_static && (rc == may_rewrite);
+ bool dont_rewrite = (is_static || (rc == may_not_rewrite));
+
+ assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that");
+
+ assert(btos == 0, "change code, btos != 0");
+
+#ifdef ASSERT
+ const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*4;
+#else
+ const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*8;
+#endif
+
+ // Calculate address of branch table entry and branch there.
+ {
+ const int bit_shift = exact_log2(bsize); // Size of each branch table entry.
+ const int r_bitpos = 63 - bit_shift;
+ const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+ const int n_rotate = (bit_shift-ConstantPoolCacheEntry::tos_state_shift);
+ __ z_larl(br_tab, branchTable);
+ __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true);
+ __ z_bc(Assembler::bcondAlways, 0, flags, br_tab);
+ }
+ // end of life for:
+ // flags, br_tab
+
+ __ align_address(bsize);
+ BIND(branchTable);
+
+ // btos
+ BTB_BEGIN(is_Byte, bsize, "putfield_or_static:is_Byte");
+ __ pop(btos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ z_stc(Z_tos, field);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Byte, bsize, "putfield_or_static:is_Byte");
+
+ // ztos
+ BTB_BEGIN(is_Bool, bsize, "putfield_or_static:is_Bool");
+ __ pop(ztos);
+ if (do_rewrite) {
+ pop_and_check_object(obj);
+ }
+ __ z_nilf(Z_tos, 0x1);
+ __ z_stc(Z_tos, field);
+ if (!is_static) {
+ patch_bytecode(Bytecodes::_fast_zputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END(is_Bool, bsize, "putfield_or_static:is_Bool");
+
+ // ctos
+ BTB_BEGIN(is_Char, bsize, "putfield_or_static:is_Char");
+ __ pop(ctos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ z_sth(Z_tos, field);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Char, bsize, "putfield_or_static:is_Char");
+
+ // stos
+ BTB_BEGIN(is_Short, bsize, "putfield_or_static:is_Short");
+ __ pop(stos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ z_sth(Z_tos, field);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Short, bsize, "putfield_or_static:is_Short");
+
+ // itos
+ BTB_BEGIN(is_Int, bsize, "putfield_or_static:is_Int");
+ __ pop(itos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ reg2mem_opt(Z_tos, field, false);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_iputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Int, bsize, "putfield_or_static:is_Int");
+
+ // ltos
+ BTB_BEGIN(is_Long, bsize, "putfield_or_static:is_Long");
+ __ pop(ltos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ reg2mem_opt(Z_tos, field);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Long, bsize, "putfield_or_static:is_Long");
+
+ // ftos
+ BTB_BEGIN(is_Float, bsize, "putfield_or_static:is_Float");
+ __ pop(ftos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ freg2mem_opt(Z_ftos, field, false);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Float, bsize, "putfield_or_static:is_Float");
+
+ // dtos
+ BTB_BEGIN(is_Double, bsize, "putfield_or_static:is_Double");
+ __ pop(dtos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ freg2mem_opt(Z_ftos, field);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dputfield, bc, Z_ARG5, true, byte_no);
+ }
+ __ z_bru(Done);
+ BTB_END( is_Double, bsize, "putfield_or_static:is_Double");
+
+ // atos
+ BTB_BEGIN(is_Object, bsize, "putfield_or_static:is_Object");
+ __ z_bru(atosHandler);
+ BTB_END( is_Object, bsize, "putfield_or_static:is_Object");
+
+ // Bad state detection comes at no extra runtime cost.
+ BTB_BEGIN(is_badState8, bsize, "putfield_or_static:is_badState8");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badState8, bsize, "putfield_or_static:is_badState8");
+ BTB_BEGIN(is_badState9, bsize, "putfield_or_static:is_badState9");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badState9, bsize, "putfield_or_static:is_badState9");
+ BTB_BEGIN(is_badStateA, bsize, "putfield_or_static:is_badStateA");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateA, bsize, "putfield_or_static:is_badStateA");
+ BTB_BEGIN(is_badStateB, bsize, "putfield_or_static:is_badStateB");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateB, bsize, "putfield_or_static:is_badStateB");
+ BTB_BEGIN(is_badStateC, bsize, "putfield_or_static:is_badStateC");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateC, bsize, "putfield_or_static:is_badStateC");
+ BTB_BEGIN(is_badStateD, bsize, "putfield_or_static:is_badStateD");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateD, bsize, "putfield_or_static:is_badStateD");
+ BTB_BEGIN(is_badStateE, bsize, "putfield_or_static:is_badStateE");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateE, bsize, "putfield_or_static:is_badStateE");
+ BTB_BEGIN(is_badStateF, bsize, "putfield_or_static:is_badStateF");
+ __ z_illtrap();
+ __ z_bru(is_badState);
+ BTB_END( is_badStateF, bsize, "putfield_or_static:is_badStateF");
+
+ __ align_address(64);
+ BIND(is_badState); // Do this outside branch table. Needs a lot of space.
+ {
+ unsigned int b_off = __ offset();
+ if (is_static) __ stop_static("Bad state in putstatic");
+ else __ stop_static("Bad state in putfield");
+ unsigned int e_off = __ offset();
+ }
+
+ __ align_address(64);
+ BIND(atosHandler); // Oops are really complicated to handle.
+ // There is a lot of code generated.
+ // Therefore: generate the handler outside of branch table.
+ // There is no performance penalty. The additional branch
+ // to here is compensated for by the fallthru to "Done".
+ {
+ unsigned int b_off = __ offset();
+ __ pop(atos);
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ // Store into the field
+ do_oop_store(_masm, obj, off, Z_tos, false,
+ oopStore_tmp1, oopStore_tmp2, oopStore_tmp3, _bs->kind(), false);
+ if (do_rewrite) {
+ patch_bytecode(Bytecodes::_fast_aputfield, bc, Z_ARG5, true, byte_no);
+ }
+ // __ z_bru(Done); // fallthru
+ unsigned int e_off = __ offset();
+ }
+
+ BIND(Done);
+
+ // Check for volatile store.
+ Label notVolatile;
+
+ __ testbit(Z_ARG4, ConstantPoolCacheEntry::is_volatile_shift);
+ __ z_brz(notVolatile);
+ __ z_fence();
+
+ BIND(notVolatile);
+}
+
+void TemplateTable::putfield(int byte_no) {
+ BLOCK_COMMENT("putfield {");
+ putfield_or_static(byte_no, false);
+ BLOCK_COMMENT("} putfield");
+}
+
+void TemplateTable::nofast_putfield(int byte_no) {
+ putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+ BLOCK_COMMENT("putstatic {");
+ putfield_or_static(byte_no, true);
+ BLOCK_COMMENT("} putstatic");
+}
+
+// Push the tos value back to the stack.
+// gc will find oops there and update.
+void TemplateTable::jvmti_post_fast_field_mod() {
+
+ if (!JvmtiExport::can_post_field_modification()) {
+ return;
+ }
+
+ // Check to see if a field modification watch has been set before
+ // we take the time to call into the VM.
+ Label exit;
+
+ BLOCK_COMMENT("jvmti_post_fast_field_mod {");
+
+ __ load_absolute_address(Z_R1_scratch,
+ (address) JvmtiExport::get_field_modification_count_addr());
+ __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch));
+ __ z_brz(exit);
+
+ Register obj = Z_tmp_1;
+
+ __ pop_ptr(obj); // Copy the object pointer from tos.
+ __ verify_oop(obj);
+ __ push_ptr(obj); // Put the object pointer back on tos.
+
+ // Save tos values before call_VM() clobbers them. Since we have
+ // to do it for every data type, we use the saved values as the
+ // jvalue object.
+ switch (bytecode()) { // Load values into the jvalue object.
+ case Bytecodes::_fast_aputfield:
+ __ push_ptr(Z_tos);
+ break;
+ case Bytecodes::_fast_bputfield:
+ case Bytecodes::_fast_zputfield:
+ case Bytecodes::_fast_sputfield:
+ case Bytecodes::_fast_cputfield:
+ case Bytecodes::_fast_iputfield:
+ __ push_i(Z_tos);
+ break;
+ case Bytecodes::_fast_dputfield:
+ __ push_d();
+ break;
+ case Bytecodes::_fast_fputfield:
+ __ push_f();
+ break;
+ case Bytecodes::_fast_lputfield:
+ __ push_l(Z_tos);
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ // jvalue on the stack
+ __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize));
+ // Access constant pool cache entry.
+ __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tos, 1);
+ __ verify_oop(obj);
+
+ // obj : object pointer copied above
+ // Z_ARG3: cache entry pointer
+ // Z_ARG4: jvalue object on the stack
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
+ obj, Z_ARG3, Z_ARG4);
+
+ switch (bytecode()) { // Restore tos values.
+ case Bytecodes::_fast_aputfield:
+ __ pop_ptr(Z_tos);
+ break;
+ case Bytecodes::_fast_bputfield:
+ case Bytecodes::_fast_zputfield:
+ case Bytecodes::_fast_sputfield:
+ case Bytecodes::_fast_cputfield:
+ case Bytecodes::_fast_iputfield:
+ __ pop_i(Z_tos);
+ break;
+ case Bytecodes::_fast_dputfield:
+ __ pop_d(Z_ftos);
+ break;
+ case Bytecodes::_fast_fputfield:
+ __ pop_f(Z_ftos);
+ break;
+ case Bytecodes::_fast_lputfield:
+ __ pop_l(Z_tos);
+ break;
+ }
+
+ __ bind(exit);
+ BLOCK_COMMENT("} jvmti_post_fast_field_mod");
+}
+
+void TemplateTable::fast_storefield(TosState state) {
+ transition(state, vtos);
+
+ ByteSize base = ConstantPoolCache::base_offset();
+ jvmti_post_fast_field_mod();
+
+ // Access constant pool cache.
+ Register cache = Z_tmp_1;
+ Register index = Z_tmp_2;
+ Register flags = Z_ARG5;
+
+ // Index comes in bytes, don't shift afterwards!
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+
+ // Test for volatile.
+ assert(!flags->is_volatile(), "do_oop_store could perform leaf RT call");
+ __ z_lg(flags, Address(cache, index, base + ConstantPoolCacheEntry::flags_offset()));
+
+ // Replace index with field offset from cache entry.
+ Register field_offset = index;
+ __ z_lg(field_offset, Address(cache, index, base + ConstantPoolCacheEntry::f2_offset()));
+
+ // Get object from stack.
+ Register obj = cache;
+
+ pop_and_check_object(obj);
+
+ // field address
+ const Address field(obj, field_offset);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_aputfield:
+ do_oop_store(_masm, obj, field_offset, Z_tos, false,
+ Z_ARG2, Z_ARG3, Z_ARG4, _bs->kind(), false);
+ break;
+ case Bytecodes::_fast_lputfield:
+ __ reg2mem_opt(Z_tos, field);
+ break;
+ case Bytecodes::_fast_iputfield:
+ __ reg2mem_opt(Z_tos, field, false);
+ break;
+ case Bytecodes::_fast_zputfield:
+ __ z_nilf(Z_tos, 0x1);
+ // fall through to bputfield
+ case Bytecodes::_fast_bputfield:
+ __ z_stc(Z_tos, field);
+ break;
+ case Bytecodes::_fast_sputfield:
+ // fall through
+ case Bytecodes::_fast_cputfield:
+ __ z_sth(Z_tos, field);
+ break;
+ case Bytecodes::_fast_fputfield:
+ __ freg2mem_opt(Z_ftos, field, false);
+ break;
+ case Bytecodes::_fast_dputfield:
+ __ freg2mem_opt(Z_ftos, field);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ // Check for volatile store.
+ Label notVolatile;
+
+ __ testbit(flags, ConstantPoolCacheEntry::is_volatile_shift);
+ __ z_brz(notVolatile);
+ __ z_fence();
+
+ __ bind(notVolatile);
+}
+
+void TemplateTable::fast_accessfield(TosState state) {
+ transition(atos, state);
+
+ Register obj = Z_tos;
+
+ // Do the JVMTI work here to avoid disturbing the register state below
+ if (JvmtiExport::can_post_field_access()) {
+ // Check to see if a field access watch has been set before we
+ // take the time to call into the VM.
+ Label cont;
+
+ __ load_absolute_address(Z_R1_scratch,
+ (address)JvmtiExport::get_field_access_count_addr());
+ __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch));
+ __ z_brz(cont);
+
+ // Access constant pool cache entry.
+
+ __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tmp_1, 1);
+ __ verify_oop(obj);
+ __ push_ptr(obj); // Save object pointer before call_VM() clobbers it.
+ __ z_lgr(Z_ARG2, obj);
+
+ // Z_ARG2: object pointer copied above
+ // Z_ARG3: cache entry pointer
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+ Z_ARG2, Z_ARG3);
+ __ pop_ptr(obj); // Restore object pointer.
+
+ __ bind(cont);
+ }
+
+ // Access constant pool cache.
+ Register cache = Z_tmp_1;
+ Register index = Z_tmp_2;
+
+ // Index comes in bytes, don't shift afterwards!
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+ // Replace index with field offset from cache entry.
+ __ mem2reg_opt(index,
+ Address(cache, index,
+ ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+ __ verify_oop(obj);
+ __ null_check(obj);
+
+ Address field(obj, index);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_agetfield:
+ __ load_heap_oop(Z_tos, field);
+ __ verify_oop(Z_tos);
+ return;
+ case Bytecodes::_fast_lgetfield:
+ __ mem2reg_opt(Z_tos, field);
+ return;
+ case Bytecodes::_fast_igetfield:
+ __ mem2reg_opt(Z_tos, field, false);
+ return;
+ case Bytecodes::_fast_bgetfield:
+ __ z_lb(Z_tos, field);
+ return;
+ case Bytecodes::_fast_sgetfield:
+ __ z_lh(Z_tos, field);
+ return;
+ case Bytecodes::_fast_cgetfield:
+ __ z_llgh(Z_tos, field); // Load into 64 bits, works on all CPUs.
+ return;
+ case Bytecodes::_fast_fgetfield:
+ __ mem2freg_opt(Z_ftos, field, false);
+ return;
+ case Bytecodes::_fast_dgetfield:
+ __ mem2freg_opt(Z_ftos, field);
+ return;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::fast_xaccess(TosState state) {
+ transition(vtos, state);
+
+ Register receiver = Z_tos;
+ // Get receiver.
+ __ mem2reg_opt(Z_tos, aaddress(0));
+
+ // Access constant pool cache.
+ Register cache = Z_tmp_1;
+ Register index = Z_tmp_2;
+
+ // Index comes in bytes, don't shift afterwards!
+ __ get_cache_and_index_at_bcp(cache, index, 2);
+ // Replace index with field offset from cache entry.
+ __ mem2reg_opt(index,
+ Address(cache, index,
+ ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+ // Make sure exception is reported in correct bcp range (getfield is
+ // next instruction).
+ __ add2reg(Z_bcp, 1);
+ __ null_check(receiver);
+ switch (state) {
+ case itos:
+ __ mem2reg_opt(Z_tos, Address(receiver, index), false);
+ break;
+ case atos:
+ __ load_heap_oop(Z_tos, Address(receiver, index));
+ __ verify_oop(Z_tos);
+ break;
+ case ftos:
+ __ mem2freg_opt(Z_ftos, Address(receiver, index));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ // Reset bcp to original position.
+ __ add2reg(Z_bcp, -1);
+}
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index, // itable index, MethodType, etc.
+ Register recv, // If caller wants to see it.
+ Register flags) { // If caller wants to test it.
+ // Determine flags.
+ const Bytecodes::Code code = bytecode();
+ const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
+ const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
+ const bool is_invokehandle = code == Bytecodes::_invokehandle;
+ const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
+ const bool is_invokespecial = code == Bytecodes::_invokespecial;
+ const bool load_receiver = (recv != noreg);
+ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+
+ // Setup registers & access constant pool cache.
+ if (recv == noreg) { recv = Z_ARG1; }
+ if (flags == noreg) { flags = Z_ARG2; }
+ assert_different_registers(method, Z_R14, index, recv, flags);
+
+ BLOCK_COMMENT("prepare_invoke {");
+
+ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+ // Maybe push appendix to arguments.
+ if (is_invokedynamic || is_invokehandle) {
+ Label L_no_push;
+ Register resolved_reference = Z_R1_scratch;
+ __ testbit(flags, ConstantPoolCacheEntry::has_appendix_shift);
+ __ z_bfalse(L_no_push);
+ // Push the appendix as a trailing parameter.
+ // This must be done before we get the receiver,
+ // since the parameter_size includes it.
+ __ load_resolved_reference_at_index(resolved_reference, index);
+ __ verify_oop(resolved_reference);
+ __ push_ptr(resolved_reference); // Push appendix (MethodType, CallSite, etc.).
+ __ bind(L_no_push);
+ }
+
+ // Load receiver if needed (after appendix is pushed so parameter size is correct).
+ if (load_receiver) {
+ assert(!is_invokedynamic, "");
+ // recv := int2long(flags & ConstantPoolCacheEntry::parameter_size_mask) << 3
+ // Flags is zero-extended int2long when loaded during load_invoke_cp_cache_entry().
+ // Only the least significant byte (psize) of flags is used.
+ {
+ const unsigned int logSES = Interpreter::logStackElementSize;
+ const int bit_shift = logSES;
+ const int r_bitpos = 63 - bit_shift;
+ const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::parameter_size_bits + 1;
+ const int n_rotate = bit_shift;
+ assert(ConstantPoolCacheEntry::parameter_size_mask == 255, "adapt bitpositions");
+ __ rotate_then_insert(recv, flags, l_bitpos, r_bitpos, n_rotate, true);
+ }
+ // Recv now contains #arguments * StackElementSize.
+
+ Address recv_addr(Z_esp, recv);
+ __ z_lg(recv, recv_addr);
+ __ verify_oop(recv);
+ }
+
+ // Compute return type.
+ // ret_type is used by callers (invokespecial, invokestatic) at least.
+ Register ret_type = Z_R1_scratch;
+ assert_different_registers(ret_type, method);
+
+ const address table_addr = (address)Interpreter::invoke_return_entry_table_for(code);
+ __ load_absolute_address(Z_R14, table_addr);
+
+ {
+ const int bit_shift = LogBytesPerWord; // Size of each table entry.
+ const int r_bitpos = 63 - bit_shift;
+ const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1;
+ const int n_rotate = bit_shift-ConstantPoolCacheEntry::tos_state_shift;
+ __ rotate_then_insert(ret_type, flags, l_bitpos, r_bitpos, n_rotate, true);
+ // Make sure we don't need to mask flags for tos_state after the above shift.
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+ }
+
+ __ z_lg(Z_R14, Address(Z_R14, ret_type)); // Load return address.
+ BLOCK_COMMENT("} prepare_invoke");
+}
+
+
+void TemplateTable::invokevirtual_helper(Register index,
+ Register recv,
+ Register flags) {
+ // Uses temporary registers Z_tmp_2, Z_ARG4.
+ assert_different_registers(index, recv, Z_tmp_2, Z_ARG4);
+
+ // Test for an invoke of a final method.
+ Label notFinal;
+
+ BLOCK_COMMENT("invokevirtual_helper {");
+
+ __ testbit(flags, ConstantPoolCacheEntry::is_vfinal_shift);
+ __ z_brz(notFinal);
+
+ const Register method = index; // Method must be Z_ARG3.
+ assert(method == Z_ARG3, "method must be second argument for interpreter calling convention");
+
+ // Do the call - the index is actually the method to call.
+ // That is, f2 is a vtable index if !is_vfinal, else f2 is a method.
+
+ // It's final, need a null check here!
+ __ null_check(recv);
+
+ // Profile this call.
+ __ profile_final_call(Z_tmp_2);
+ __ profile_arguments_type(Z_tmp_2, method, Z_ARG5, true); // Argument type profiling.
+ __ jump_from_interpreted(method, Z_tmp_2);
+
+ __ bind(notFinal);
+
+ // Get receiver klass.
+ __ null_check(recv, Z_R0_scratch, oopDesc::klass_offset_in_bytes());
+ __ load_klass(Z_tmp_2, recv);
+
+ // Profile this call.
+ __ profile_virtual_call(Z_tmp_2, Z_ARG4, Z_ARG5);
+
+ // Get target method & entry point.
+ __ z_sllg(index, index, exact_log2(vtableEntry::size_in_bytes()));
+ __ mem2reg_opt(method,
+ Address(Z_tmp_2, index,
+ InstanceKlass::vtable_start_offset() + in_ByteSize(vtableEntry::method_offset_in_bytes())));
+ __ profile_arguments_type(Z_ARG4, method, Z_ARG5, true);
+ __ jump_from_interpreted(method, Z_ARG4);
+ BLOCK_COMMENT("} invokevirtual_helper");
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+ transition(vtos, vtos);
+
+ assert(byte_no == f2_byte, "use this argument");
+ prepare_invoke(byte_no,
+ Z_ARG3, // method or vtable index
+ noreg, // unused itable index
+ Z_ARG1, // recv
+ Z_ARG2); // flags
+
+ // Z_ARG3 : index
+ // Z_ARG1 : receiver
+ // Z_ARG2 : flags
+ invokevirtual_helper(Z_ARG3, Z_ARG1, Z_ARG2);
+}
+
+void TemplateTable::invokespecial(int byte_no) {
+ transition(vtos, vtos);
+
+ assert(byte_no == f1_byte, "use this argument");
+ Register Rmethod = Z_tmp_2;
+ prepare_invoke(byte_no, Rmethod, noreg, // Get f1 method.
+ Z_ARG3); // Get receiver also for null check.
+ __ verify_oop(Z_ARG3);
+ __ null_check(Z_ARG3);
+ // Do the call.
+ __ profile_call(Z_ARG2);
+ __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+ __ jump_from_interpreted(Rmethod, Z_R1_scratch);
+}
+
+void TemplateTable::invokestatic(int byte_no) {
+ transition(vtos, vtos);
+
+ assert(byte_no == f1_byte, "use this argument");
+ Register Rmethod = Z_tmp_2;
+ prepare_invoke(byte_no, Rmethod); // Get f1 method.
+ // Do the call.
+ __ profile_call(Z_ARG2);
+ __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+ __ jump_from_interpreted(Rmethod, Z_R1_scratch);
+}
+
+// Outdated feature, and we don't support it.
+void TemplateTable::fast_invokevfinal(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f2_byte, "use this argument");
+ __ stop("fast_invokevfinal not used on linuxs390x");
+}
+
+void TemplateTable::invokeinterface(int byte_no) {
+ transition(vtos, vtos);
+
+ assert(byte_no == f1_byte, "use this argument");
+ Register interface = Z_tos;
+ Register index = Z_ARG3;
+ Register receiver = Z_tmp_1;
+ Register flags = Z_ARG5;
+
+ BLOCK_COMMENT("invokeinterface {");
+
+ // Destroys Z_ARG1 and Z_ARG2, thus use Z_ARG4 and copy afterwards.
+ prepare_invoke(byte_no, Z_ARG4, index, // Get f1 klassOop, f2 itable index.
+ receiver, flags);
+
+ // Z_R14 (== Z_bytecode) : return entry
+
+ __ z_lgr(interface, Z_ARG4);
+
+ // Special case of invokeinterface called for virtual method of
+ // java.lang.Object. See cpCacheOop.cpp for details.
+ // This code isn't produced by javac, but could be produced by
+ // another compliant java compiler.
+ Label notMethod;
+ __ testbit(flags, ConstantPoolCacheEntry::is_forced_virtual_shift);
+ __ z_brz(notMethod);
+ invokevirtual_helper(index, receiver, flags);
+ __ bind(notMethod);
+
+ // Get receiver klass into klass - also a null check.
+ Register klass = flags;
+
+ __ restore_locals();
+ __ load_klass(klass, receiver);
+
+ // Profile this call.
+ __ profile_virtual_call(klass, Z_ARG2/*mdp*/, Z_ARG4/*scratch*/);
+
+ NearLabel no_such_interface, no_such_method;
+ Register method = Z_tmp_2;
+
+ // TK 2010-08-24: save the index to Z_ARG4. needed in case of an error
+ // in throw_AbstractMethodErrorByTemplateTable
+ __ z_lgr(Z_ARG4, index);
+ // TK 2011-03-24: copy also klass because it could be changed in
+ // lookup_interface_method
+ __ z_lgr(Z_ARG2, klass);
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
+ klass, interface, index,
+ // outputs: method, scan temp. reg
+ method, Z_tmp_2, Z_R1_scratch,
+ no_such_interface);
+
+ // Check for abstract method error.
+ // Note: This should be done more efficiently via a throw_abstract_method_error
+ // interpreter entry point and a conditional jump to it in case of a null
+ // method.
+ __ compareU64_and_branch(method, (intptr_t) 0,
+ Assembler::bcondZero, no_such_method);
+
+ __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true);
+
+ // Do the call.
+ __ jump_from_interpreted(method, Z_ARG5);
+ __ should_not_reach_here();
+
+ // exception handling code follows...
+ // Note: Must restore interpreter registers to canonical
+ // state for exception handling to work correctly!
+
+ __ bind(no_such_method);
+
+ // Throw exception.
+ __ restore_bcp(); // Bcp must be correct for exception handler (was destroyed).
+ __ restore_locals(); // Make sure locals pointer is correct as well (was destroyed).
+ // TK 2010-08-24: Call throw_AbstractMethodErrorByTemplateTable now with the
+ // relevant information for generating a better error message
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_AbstractMethodError),
+ Z_ARG2, interface, Z_ARG4);
+ // The call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+
+ __ bind(no_such_interface);
+
+ // Throw exception.
+ __ restore_bcp(); // Bcp must be correct for exception handler (was destroyed).
+ __ restore_locals(); // Make sure locals pointer is correct as well (was destroyed).
+ // TK 2010-08-24: Call throw_IncompatibleClassChangeErrorByTemplateTable now with the
+ // relevant information for generating a better error message
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_IncompatibleClassChangeError),
+ Z_ARG2, interface);
+ // The call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+
+ BLOCK_COMMENT("} invokeinterface");
+ return;
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+ transition(vtos, vtos);
+
+ const Register method = Z_tmp_2;
+ const Register recv = Z_ARG5;
+ const Register mtype = Z_tmp_1;
+ prepare_invoke(byte_no,
+ method, mtype, // Get f2 method, f1 MethodType.
+ recv);
+ __ verify_method_ptr(method);
+ __ verify_oop(recv);
+ __ null_check(recv);
+
+ // Note: Mtype is already pushed (if necessary) by prepare_invoke.
+
+ // FIXME: profile the LambdaForm also.
+ __ profile_final_call(Z_ARG2);
+ __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true);
+
+ __ jump_from_interpreted(method, Z_ARG3);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+ transition(vtos, vtos);
+
+ const Register Rmethod = Z_tmp_2;
+ const Register Rcallsite = Z_tmp_1;
+
+ prepare_invoke(byte_no, Rmethod, Rcallsite);
+
+ // Rmethod: CallSite object (from f1)
+ // Rcallsite: MH.linkToCallSite method (from f2)
+
+ // Note: Callsite is already pushed by prepare_invoke.
+
+ // TODO: should make a type profile for any invokedynamic that takes a ref argument.
+ // Profile this call.
+ __ profile_call(Z_ARG2);
+ __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false);
+ __ jump_from_interpreted(Rmethod, Z_ARG2);
+}
+
+//-----------------------------------------------------------------------------
+// Allocation
+
+// Original comment on "allow_shared_alloc":
+// Always go the slow path.
+// + Eliminated optimization within the template-based interpreter:
+// If an allocation is done within the interpreter without using
+// tlabs, the interpreter tries to do the allocation directly
+// on the heap.
+// + That means the profiling hooks are not considered and allocations
+// get lost for the profiling framework.
+// + However, we do not think that this optimization is really needed,
+// so we always go now the slow path through the VM in this case --
+// spec jbb2005 shows no measurable performance degradation.
+void TemplateTable::_new() {
+ transition(vtos, atos);
+ address prev_instr_address = NULL;
+ Register tags = Z_tmp_1;
+ Register RallocatedObject = Z_tos;
+ Register cpool = Z_ARG2;
+ Register tmp = Z_ARG3; // RobjectFields==tmp and Rsize==offset must be a register pair.
+ Register offset = Z_ARG4;
+ Label slow_case;
+ Label done;
+ Label initialize_header;
+ Label initialize_object; // Including clearing the fields.
+ Label allocate_shared;
+
+ BLOCK_COMMENT("TemplateTable::_new {");
+ __ get_2_byte_integer_at_bcp(offset/*dest*/, 1, InterpreterMacroAssembler::Unsigned);
+ __ get_cpool_and_tags(cpool, tags);
+ // Make sure the class we're about to instantiate has been resolved.
+ // This is done before loading InstanceKlass to be consistent with the order
+ // how Constant Pool is updated (see ConstantPool::klass_at_put).
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+ __ load_address(tmp, Address(tags, offset, tags_offset));
+ __ z_cli(0, tmp, JVM_CONSTANT_Class);
+ __ z_brne(slow_case);
+
+ __ z_sllg(offset, offset, LogBytesPerWord); // Convert to to offset.
+ // Get InstanceKlass.
+ Register iklass = cpool;
+ __ z_lg(iklass, Address(cpool, offset, sizeof(ConstantPool)));
+
+ // Make sure klass is initialized & doesn't have finalizer.
+ // Make sure klass is fully initialized.
+ const int state_offset = in_bytes(InstanceKlass::init_state_offset());
+ if (Immediate::is_uimm12(state_offset)) {
+ __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized);
+ } else {
+ __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized);
+ }
+ __ z_brne(slow_case);
+
+ // Get instance_size in InstanceKlass (scaled to a count of bytes).
+ Register Rsize = offset;
+ const int mask = 1 << Klass::_lh_instance_slow_path_bit;
+ __ z_llgf(Rsize, Address(iklass, Klass::layout_helper_offset()));
+ __ z_tmll(Rsize, mask);
+ __ z_btrue(slow_case);
+
+ // Allocate the instance
+ // 1) Try to allocate in the TLAB.
+ // 2) If fail and the object is large allocate in the shared Eden.
+ // 3) If the above fails (or is not applicable), go to a slow case
+ // (creates a new TLAB, etc.).
+
+ // Always go the slow path. See comment above this template.
+ const bool allow_shared_alloc = false;
+
+ if (UseTLAB) {
+ Register RoldTopValue = RallocatedObject;
+ Register RnewTopValue = tmp;
+ __ z_lg(RoldTopValue, Address(Z_thread, JavaThread::tlab_top_offset()));
+ __ load_address(RnewTopValue, Address(RoldTopValue, Rsize));
+ __ z_cg(RnewTopValue, Address(Z_thread, JavaThread::tlab_end_offset()));
+ __ z_brh(allow_shared_alloc ? allocate_shared : slow_case);
+ __ z_stg(RnewTopValue, Address(Z_thread, JavaThread::tlab_top_offset()));
+ if (ZeroTLAB) {
+ // The fields have been already cleared.
+ __ z_bru(initialize_header);
+ } else {
+ // Initialize both the header and fields.
+ if (allow_shared_alloc) {
+ __ z_bru(initialize_object);
+ } else {
+ // Fallthrough to initialize_object, but assert that it is on fall through path.
+ prev_instr_address = __ pc();
+ }
+ }
+ }
+
+ if (allow_shared_alloc) {
+ // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it.
+ Unimplemented();
+ }
+
+ if (UseTLAB) {
+ Register RobjectFields = tmp;
+ Register Rzero = Z_R1_scratch;
+
+ assert(ZeroTLAB || prev_instr_address == __ pc(),
+ "must not omit jump to initialize_object above, as it is not on the fall through path");
+ __ clear_reg(Rzero, true /*whole reg*/, false); // Load 0L into Rzero. Don't set CC.
+
+ // The object is initialized before the header. If the object size is
+ // zero, go directly to the header initialization.
+ __ bind(initialize_object);
+ __ z_aghi(Rsize, (int)-sizeof(oopDesc)); // Subtract header size, set CC.
+ __ z_bre(initialize_header); // Jump if size of fields is zero.
+
+ // Initialize object fields.
+ // See documentation for MVCLE instruction!!!
+ assert(RobjectFields->encoding() % 2 == 0, "RobjectFields must be an even register");
+ assert(Rsize->encoding() == (RobjectFields->encoding()+1),
+ "RobjectFields and Rsize must be a register pair");
+ assert(Rzero->encoding() % 2 == 1, "Rzero must be an odd register");
+
+ // Set Rzero to 0 and use it as src length, then mvcle will copy nothing
+ // and fill the object with the padding value 0.
+ __ add2reg(RobjectFields, sizeof(oopDesc), RallocatedObject);
+ __ move_long_ext(RobjectFields, as_Register(Rzero->encoding() - 1), 0);
+
+ // Initialize object header only.
+ __ bind(initialize_header);
+ if (UseBiasedLocking) {
+ Register prototype = RobjectFields;
+ __ z_lg(prototype, Address(iklass, Klass::prototype_header_offset()));
+ __ z_stg(prototype, Address(RallocatedObject, oopDesc::mark_offset_in_bytes()));
+ } else {
+ __ store_const(Address(RallocatedObject, oopDesc::mark_offset_in_bytes()),
+ (long)markOopDesc::prototype());
+ }
+
+ __ store_klass_gap(Rzero, RallocatedObject); // Zero klass gap for compressed oops.
+ __ store_klass(iklass, RallocatedObject); // Store klass last.
+
+ {
+ SkipIfEqual skip(_masm, &DTraceAllocProbes, false, Z_ARG5 /*scratch*/);
+ // Trigger dtrace event for fastpath.
+ __ push(atos); // Save the return value.
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), RallocatedObject);
+ __ pop(atos); // Restore the return value.
+ }
+ __ z_bru(done);
+ }
+
+ // slow case
+ __ bind(slow_case);
+ __ get_constant_pool(Z_ARG2);
+ __ get_2_byte_integer_at_bcp(Z_ARG3/*dest*/, 1, InterpreterMacroAssembler::Unsigned);
+ call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Z_ARG2, Z_ARG3);
+ __ verify_oop(Z_tos);
+
+ // continue
+ __ bind(done);
+
+ BLOCK_COMMENT("} TemplateTable::_new");
+}
+
+void TemplateTable::newarray() {
+ transition(itos, atos);
+
+ // Call runtime.
+ __ z_llgc(Z_ARG2, at_bcp(1)); // type
+ // size in Z_tos
+ call_VM(Z_RET,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
+ Z_ARG2, Z_tos);
+}
+
+void TemplateTable::anewarray() {
+ transition(itos, atos);
+ __ get_2_byte_integer_at_bcp(Z_ARG3, 1, InterpreterMacroAssembler::Unsigned);
+ __ get_constant_pool(Z_ARG2);
+ __ z_llgfr(Z_ARG4, Z_tos);
+ call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
+ Z_ARG2, Z_ARG3, Z_ARG4);
+}
+
+void TemplateTable::arraylength() {
+ transition(atos, itos);
+
+ int offset = arrayOopDesc::length_offset_in_bytes();
+
+ __ null_check(Z_tos, Z_R0_scratch, offset);
+ __ mem2reg_opt(Z_tos, Address(Z_tos, offset), false);
+}
+
+void TemplateTable::checkcast() {
+ transition(atos, atos);
+
+ NearLabel done, is_null, ok_is_subtype, quicked, resolved;
+
+ BLOCK_COMMENT("checkcast {");
+ // If object is NULL, we are almost done.
+ __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null);
+
+ // Get cpool & tags index.
+ Register cpool = Z_tmp_1;
+ Register tags = Z_tmp_2;
+ Register index = Z_ARG5;
+
+ __ get_cpool_and_tags(cpool, tags);
+ __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned);
+ // See if bytecode has already been quicked.
+ // Note: For CLI, we would have to add the index to the tags pointer first,
+ // thus load and compare in a "classic" manner.
+ __ z_llgc(Z_R0_scratch,
+ Address(tags, index, Array<u1>::base_offset_in_bytes()));
+ __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class,
+ Assembler::bcondEqual, quicked);
+
+ __ push(atos); // Save receiver for result, and for GC.
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ __ get_vm_result_2(Z_tos);
+
+ Register receiver = Z_ARG4;
+ Register klass = Z_tos;
+ Register subklass = Z_ARG5;
+
+ __ pop_ptr(receiver); // restore receiver
+ __ z_bru(resolved);
+
+ // Get superklass in klass and subklass in subklass.
+ __ bind(quicked);
+
+ __ z_lgr(Z_ARG4, Z_tos); // Save receiver.
+ __ z_sllg(index, index, LogBytesPerWord); // index2bytes for addressing
+ __ mem2reg_opt(klass, Address(cpool, index, sizeof(ConstantPool)));
+
+ __ bind(resolved);
+
+ __ load_klass(subklass, receiver);
+
+ // Generate subtype check. Object in receiver.
+ // Superklass in klass. Subklass in subklass.
+ __ gen_subtype_check(subklass, klass, Z_ARG3, Z_tmp_1, ok_is_subtype);
+
+ // Come here on failure.
+ __ push_ptr(receiver);
+ // Object is at TOS, target klass oop expected in rax by convention.
+ __ z_brul((address) Interpreter::_throw_ClassCastException_entry);
+
+ // Come here on success.
+ __ bind(ok_is_subtype);
+
+ __ z_lgr(Z_tos, receiver); // Restore object.
+
+ // Collect counts on whether this test sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ z_bru(done);
+ __ bind(is_null);
+ __ profile_null_seen(Z_tmp_1);
+ } else {
+ __ bind(is_null); // Same as 'done'.
+ }
+
+ __ bind(done);
+ BLOCK_COMMENT("} checkcast");
+}
+
+void TemplateTable::instanceof() {
+ transition(atos, itos);
+
+ NearLabel done, is_null, ok_is_subtype, quicked, resolved;
+
+ BLOCK_COMMENT("instanceof {");
+ // If object is NULL, we are almost done.
+ __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null);
+
+ // Get cpool & tags index.
+ Register cpool = Z_tmp_1;
+ Register tags = Z_tmp_2;
+ Register index = Z_ARG5;
+
+ __ get_cpool_and_tags(cpool, tags);
+ __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned);
+ // See if bytecode has already been quicked.
+ // Note: For CLI, we would have to add the index to the tags pointer first,
+ // thus load and compare in a "classic" manner.
+ __ z_llgc(Z_R0_scratch,
+ Address(tags, index, Array<u1>::base_offset_in_bytes()));
+ __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class, Assembler::bcondEqual, quicked);
+
+ __ push(atos); // Save receiver for result, and for GC.
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ __ get_vm_result_2(Z_tos);
+
+ Register receiver = Z_tmp_2;
+ Register klass = Z_tos;
+ Register subklass = Z_tmp_2;
+
+ __ pop_ptr(receiver); // Restore receiver.
+ __ verify_oop(receiver);
+ __ load_klass(subklass, subklass);
+ __ z_bru(resolved);
+
+ // Get superklass in klass and subklass in subklass.
+ __ bind(quicked);
+
+ __ load_klass(subklass, Z_tos);
+ __ z_sllg(index, index, LogBytesPerWord); // index2bytes for addressing
+ __ mem2reg_opt(klass,
+ Address(cpool, index, sizeof(ConstantPool)));
+
+ __ bind(resolved);
+
+ // Generate subtype check.
+ // Superklass in klass. Subklass in subklass.
+ __ gen_subtype_check(subklass, klass, Z_ARG4, Z_ARG5, ok_is_subtype);
+
+ // Come here on failure.
+ __ clear_reg(Z_tos, true, false);
+ __ z_bru(done);
+
+ // Come here on success.
+ __ bind(ok_is_subtype);
+ __ load_const_optimized(Z_tos, 1);
+
+ // Collect counts on whether this test sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ z_bru(done);
+ __ bind(is_null);
+ __ profile_null_seen(Z_tmp_1);
+ } else {
+ __ bind(is_null); // same as 'done'
+ }
+
+ __ bind(done);
+ // tos = 0: obj == NULL or obj is not an instanceof the specified klass
+ // tos = 1: obj != NULL and obj is an instanceof the specified klass
+ BLOCK_COMMENT("} instanceof");
+}
+
+//-----------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+
+ // Note: We get here even if we are single stepping.
+ // Jbug insists on setting breakpoints at every bytecode
+ // even if we are in single step mode.
+
+ transition(vtos, vtos);
+
+ // Get the unpatched byte code.
+ __ get_method(Z_ARG2);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at),
+ Z_ARG2, Z_bcp);
+ // Save the result to a register that is preserved over C-function calls.
+ __ z_lgr(Z_tmp_1, Z_RET);
+
+ // Post the breakpoint event.
+ __ get_method(Z_ARG2);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
+ Z_ARG2, Z_bcp);
+
+ // Must restore the bytecode, because call_VM destroys Z_bytecode.
+ __ z_lgr(Z_bytecode, Z_tmp_1);
+
+ // Complete the execution of original bytecode.
+ __ dispatch_only_normal(vtos);
+}
+
+
+// Exceptions
+
+void TemplateTable::athrow() {
+ transition(atos, vtos);
+ __ null_check(Z_tos);
+ __ load_absolute_address(Z_ARG2, Interpreter::throw_exception_entry());
+ __ z_br(Z_ARG2);
+}
+
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+// in the assembly code structure as well
+//
+// Stack layout:
+//
+// callers_sp <- Z_SP (callers_sp == Z_fp (own fp))
+// return_pc
+// [rest of ABI_160]
+// /slot o: free
+// / ... free
+// oper. | slot n+1: free <- Z_esp points to first free slot
+// stack | slot n: val caches IJAVA_STATE.esp
+// | ...
+// \slot 0: val
+// /slot m <- IJAVA_STATE.monitors = monitor block top
+// | ...
+// monitors| slot 2
+// | slot 1
+// \slot 0
+// /slot l <- monitor block bot
+// ijava_state | ...
+// | slot 2
+// \slot 0
+// <- Z_fp
+void TemplateTable::monitorenter() {
+ transition(atos, vtos);
+
+ BLOCK_COMMENT("monitorenter {");
+
+ // Check for NULL object.
+ __ null_check(Z_tos);
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ NearLabel allocated;
+ // Initialize entry pointer.
+ const Register Rfree_slot = Z_tmp_1;
+ __ clear_reg(Rfree_slot, true, false); // Points to free slot or NULL. Don't set CC.
+
+ // Find a free slot in the monitor block from top to bot (result in Rfree_slot).
+ {
+ const Register Rcurr_monitor = Z_ARG2;
+ const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block.
+ const Register Rlocked_obj = Z_ARG4;
+ NearLabel loop, exit, not_free;
+ // Starting with top-most entry.
+ __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors
+ __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp);
+
+#ifdef ASSERT
+ address reentry = NULL;
+ { NearLabel ok;
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok);
+ reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom");
+ __ bind(ok);
+ }
+ { NearLabel ok;
+ __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok);
+ reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp");
+ __ bind(ok);
+ }
+#endif
+
+ // Check if bottom reached, i.e. if there is at least one monitor.
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, exit);
+
+ __ bind(loop);
+ // Check if current entry is used.
+ __ load_and_test_long(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes()));
+ __ z_brne(not_free);
+ // If not used then remember entry in Rfree_slot.
+ __ z_lgr(Rfree_slot, Rcurr_monitor);
+ __ bind(not_free);
+ // Exit if current entry is for same object; this guarantees, that new monitor
+ // used for recursive lock is above the older one.
+ __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, exit);
+ // otherwise advance to next entry
+ __ add2reg(Rcurr_monitor, entry_size);
+ // Check if bottom reached, if not at bottom then check this entry.
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop);
+ __ bind(exit);
+ }
+
+ // Rfree_slot != NULL -> found one
+ __ compareU64_and_branch(Rfree_slot, (intptr_t)0L, Assembler::bcondNotEqual, allocated);
+
+ // Allocate one if there's no free slot.
+ __ add_monitor_to_stack(false, Z_ARG3, Z_ARG4, Z_ARG5);
+ __ get_monitors(Rfree_slot);
+
+ // Rfree_slot: points to monitor entry.
+ __ bind(allocated);
+
+ // Increment bcp to point to the next bytecode, so exception
+ // handling for async. exceptions work correctly.
+ // The object has already been poped from the stack, so the
+ // expression stack looks correct.
+ __ add2reg(Z_bcp, 1, Z_bcp);
+
+ // Store object.
+ __ z_stg(Z_tos, BasicObjectLock::obj_offset_in_bytes(), Rfree_slot);
+ __ lock_object(Rfree_slot, Z_tos);
+
+ // Check to make sure this monitor doesn't cause stack overflow after locking.
+ __ save_bcp(); // in case of exception
+ __ generate_stack_overflow_check(0);
+
+ // The bcp has already been incremented. Just need to dispatch to
+ // next instruction.
+ __ dispatch_next(vtos);
+
+ BLOCK_COMMENT("} monitorenter");
+}
+
+
+void TemplateTable::monitorexit() {
+ transition(atos, vtos);
+
+ BLOCK_COMMENT("monitorexit {");
+
+ // Check for NULL object.
+ __ null_check(Z_tos);
+
+ NearLabel found, not_found;
+ const Register Rcurr_monitor = Z_ARG2;
+
+ // Find matching slot.
+ {
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ NearLabel entry, loop;
+
+ const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block.
+ const Register Rlocked_obj = Z_ARG4;
+ // Starting with top-most entry.
+ __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors
+ __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp);
+
+#ifdef ASSERT
+ address reentry = NULL;
+ { NearLabel ok;
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok);
+ reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom");
+ __ bind(ok);
+ }
+ { NearLabel ok;
+ __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok);
+ reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp");
+ __ bind(ok);
+ }
+#endif
+
+ // Check if bottom reached, i.e. if there is at least one monitor.
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, not_found);
+
+ __ bind(loop);
+ // Check if current entry is for same object.
+ __ z_lg(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes()));
+ // If same object then stop searching.
+ __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, found);
+ // Otherwise advance to next entry.
+ __ add2reg(Rcurr_monitor, entry_size);
+ // Check if bottom reached, if not at bottom then check this entry.
+ __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop);
+ }
+
+ __ bind(not_found);
+ // Error handling. Unlocking was not block-structured.
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
+ __ should_not_reach_here();
+
+ __ bind(found);
+ __ push_ptr(Z_tos); // Make sure object is on stack (contract with oopMaps).
+ __ unlock_object(Rcurr_monitor, Z_tos);
+ __ pop_ptr(Z_tos); // Discard object.
+ BLOCK_COMMENT("} monitorexit");
+}
+
+// Wide instructions
+void TemplateTable::wide() {
+ transition(vtos, vtos);
+
+ __ z_llgc(Z_R1_scratch, at_bcp(1));
+ __ z_sllg(Z_R1_scratch, Z_R1_scratch, LogBytesPerWord);
+ __ load_absolute_address(Z_tmp_1, (address) Interpreter::_wentry_point);
+ __ mem2reg_opt(Z_tmp_1, Address(Z_tmp_1, Z_R1_scratch));
+ __ z_br(Z_tmp_1);
+ // Note: the bcp increment step is part of the individual wide
+ // bytecode implementations.
+}
+
+// Multi arrays
+void TemplateTable::multianewarray() {
+ transition(vtos, atos);
+
+ __ z_llgc(Z_tmp_1, at_bcp(3)); // Get number of dimensions.
+ // Slot count to byte offset.
+ __ z_sllg(Z_tmp_1, Z_tmp_1, Interpreter::logStackElementSize);
+ // Z_esp points past last_dim, so set to Z_ARG2 to first_dim address.
+ __ load_address(Z_ARG2, Address(Z_esp, Z_tmp_1));
+ call_VM(Z_RET,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
+ Z_ARG2);
+ // Pop dimensions from expression stack.
+ __ z_agr(Z_esp, Z_tmp_1);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/templateTable_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_TEMPLATETABLE_S390_HPP
+#define CPU_S390_VM_TEMPLATETABLE_S390_HPP
+
+ static void prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index = noreg, // itable index, MethodType, etc.
+ Register recv = noreg, // If caller wants to see it.
+ Register flags = noreg); // If caller wants to test it.
+ static void invokevirtual_helper(Register index, Register recv,
+ Register flags);
+
+ // Helpers
+ static void index_check(Register array, Register index, unsigned int shift);
+ static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_S390_VM_TEMPLATETABLE_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmStructs_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMSTRUCTS_S390_HPP
+#define CPU_S390_VM_VMSTRUCTS_S390_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // CPU_S390_VM_VMSTRUCTS_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,1182 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "code/compiledIC.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_s390.hpp"
+
+# include <sys/sysinfo.h>
+
+bool VM_Version::_is_determine_features_test_running = false;
+
+unsigned long VM_Version::_features[_features_buffer_len] = {0, 0, 0, 0};
+unsigned long VM_Version::_cipher_features[_features_buffer_len] = {0, 0, 0, 0};
+unsigned long VM_Version::_msgdigest_features[_features_buffer_len] = {0, 0, 0, 0};
+unsigned int VM_Version::_nfeatures = 0;
+unsigned int VM_Version::_ncipher_features = 0;
+unsigned int VM_Version::_nmsgdigest_features = 0;
+unsigned int VM_Version::_Dcache_lineSize = 256;
+unsigned int VM_Version::_Icache_lineSize = 256;
+
+static const char* z_gen[] = {" ", "G1", "G2", "G3", "G4", "G5", "G6", "G7" };
+static const char* z_machine[] = {" ", "2064", "2084", "2094", "2097", "2817", " ", "2964" };
+static const char* z_name[] = {" ", "z900", "z990", "z9 EC", "z10 EC", "z196 EC", "ec12", "z13" };
+
+void VM_Version::initialize() {
+ determine_features(); // Get processor capabilities.
+ set_features_string(); // Set a descriptive feature indication.
+
+ if (Verbose) {
+ print_features();
+ }
+
+ intx cache_line_size = Dcache_lineSize(0);
+
+ MaxVectorSize = 8;
+
+ if (has_PrefetchRaw()) {
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { // not preset
+ // 0 = no prefetch.
+ // 1 = Prefetch instructions for each allocation.
+ // 2 = Use TLAB watermark to gate allocation prefetch.
+ AllocatePrefetchStyle = 1;
+ }
+
+ if (AllocatePrefetchStyle > 0) { // Prefetching turned on at all?
+ // Distance to prefetch ahead of allocation pointer.
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance) || (AllocatePrefetchDistance < 0)) { // not preset
+ AllocatePrefetchDistance = 0;
+ }
+
+ // Number of lines to prefetch ahead of allocation pointer.
+ if (FLAG_IS_DEFAULT(AllocatePrefetchLines) || (AllocatePrefetchLines <= 0)) { // not preset
+ AllocatePrefetchLines = 3;
+ }
+
+ // Step size in bytes of sequential prefetch instructions.
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) || (AllocatePrefetchStepSize <= 0)) { // not preset
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ } else if (AllocatePrefetchStepSize < cache_line_size) {
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ } else {
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ }
+ } else {
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+ AllocatePrefetchDistance = 0;
+ AllocatePrefetchLines = 0;
+ // Can't be zero. Will SIGFPE during constraints checking.
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ }
+
+ } else {
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+ AllocatePrefetchDistance = 0;
+ AllocatePrefetchLines = 0;
+ // Can't be zero. Will SIGFPE during constraints checking.
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ }
+
+ // TODO:
+ // On z/Architecture, cache line size is significantly large (256 bytes). Do we really need
+ // to keep contended members that far apart? Performance tests are required.
+ if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) {
+ ContendedPaddingWidth = cache_line_size;
+ }
+
+ // On z/Architecture, the CRC32 intrinsics had to be implemented "by hand".
+ // They cannot be based on the CHECKSUM instruction which has been there
+ // since the very beginning (of z/Architecture). It computes "some kind of" a checksum
+ // which has nothing to do with the CRC32 algorithm.
+ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
+ }
+
+ // On z/Architecture, we take UseAES as the general switch to enable/disable the AES intrinsics.
+ // The specific, and yet to be defined, switches UseAESxxxIntrinsics will then be set
+ // depending on the actual machine capabilities.
+ // Explicitly setting them via CmdLine option takes precedence, of course.
+ // TODO: UseAESIntrinsics must be made keylength specific.
+ // As of March 2015 and Java8, only AES128 is supported by the Java Cryptographic Extensions.
+ // Therefore, UseAESIntrinsics is of minimal use at the moment.
+ if (FLAG_IS_DEFAULT(UseAES) && has_Crypto_AES()) {
+ FLAG_SET_DEFAULT(UseAES, true);
+ }
+ if (UseAES && !has_Crypto_AES()) {
+ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+ if (UseAES) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+ }
+ }
+ if (UseAESIntrinsics && !has_Crypto_AES()) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+
+ // TODO: implement AES/CTR intrinsics
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
+ // TODO: implement GHASH intrinsics
+ if (UseGHASHIntrinsics) {
+ warning("GHASH intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+ }
+
+ if (UseFMA) {
+ warning("FMA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseFMA, false);
+ }
+
+ // On z/Architecture, we take UseSHA as the general switch to enable/disable the SHA intrinsics.
+ // The specific switches UseSHAxxxIntrinsics will then be set depending on the actual
+ // machine capabilities.
+ // Explicitly setting them via CmdLine option takes precedence, of course.
+ if (FLAG_IS_DEFAULT(UseSHA) && has_Crypto_SHA()) {
+ FLAG_SET_DEFAULT(UseSHA, true);
+ }
+ if (UseSHA && !has_Crypto_SHA()) {
+ warning("SHA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseSHA, false);
+ }
+ if (UseSHA && has_Crypto_SHA1()) {
+ if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+ }
+ } else if (UseSHA1Intrinsics) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ }
+ if (UseSHA && has_Crypto_SHA256()) {
+ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+ }
+ } else if (UseSHA256Intrinsics) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+ }
+ if (UseSHA && has_Crypto_SHA512()) {
+ if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+ }
+ } else if (UseSHA512Intrinsics) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+ }
+
+ if (UseAdler32Intrinsics) {
+ warning("Adler32Intrinsics not available on this CPU.");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
+ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+ }
+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
+ }
+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+ }
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+ }
+
+ // z/Architecture supports 8-byte compare-exchange operations
+ // (see Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
+ // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
+ _supports_cx8 = true;
+
+ _supports_atomic_getadd4 = VM_Version::has_LoadAndALUAtomicV1();
+ _supports_atomic_getadd8 = VM_Version::has_LoadAndALUAtomicV1();
+
+ // z/Architecture supports unaligned memory accesses.
+ // Performance penalty is negligible. An additional tick or so
+ // is lost if the accessed data spans a cache line boundary.
+ // Unaligned accesses are not atomic, of course.
+ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+ }
+}
+
+
+void VM_Version::set_features_string() {
+
+ unsigned int ambiguity = 0;
+ if (is_z13()) {
+ _features_string = "System z G7-z13 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM, VectorInstr)";
+ ambiguity++;
+ }
+ if (is_ec12()) {
+ _features_string = "System z G6-EC12 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM)";
+ ambiguity++;
+ }
+ if (is_z196()) {
+ _features_string = "System z G5-z196 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update)";
+ ambiguity++;
+ }
+ if (is_z10()) {
+ _features_string = "System z G4-z10 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB)";
+ ambiguity++;
+ }
+ if (is_z9()) {
+ _features_string = "System z G3-z9 (LDISP_fast, ExtImm), out-of-support as of 2016-04-01";
+ ambiguity++;
+ }
+ if (is_z990()) {
+ _features_string = "System z G2-z990 (LDISP_fast), out-of-support as of 2014-07-01";
+ ambiguity++;
+ }
+ if (is_z900()) {
+ _features_string = "System z G1-z900 (LDISP), out-of-support as of 2014-07-01";
+ ambiguity++;
+ }
+
+ if (ambiguity == 0) {
+ _features_string = "z/Architecture (unknown generation)";
+ } else if (ambiguity > 1) {
+ tty->print_cr("*** WARNING *** Ambiguous z/Architecture detection, ambiguity = %d", ambiguity);
+ tty->print_cr(" oldest detected generation is %s", _features_string);
+ _features_string = "z/Architecture (ambiguous detection)";
+ }
+}
+
+// featureBuffer - bit array indicating availability of various features
+// featureNum - bit index of feature to be tested
+// Featurenum < 0 requests test for any nonzero bit in featureBuffer.
+// bufLen - length of featureBuffer in bits
+bool VM_Version::test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen) {
+ assert(bufLen > 0, "buffer len must be positive");
+ assert((bufLen & 0x0007) == 0, "unaligned buffer len");
+ assert(((intptr_t)featureBuffer&0x0007) == 0, "unaligned feature buffer");
+ if (featureNum < 0) {
+ // Any bit set at all?
+ bool anyBit = false;
+ for (size_t i = 0; i < bufLen/(8*sizeof(long)); i++) {
+ anyBit = anyBit || (featureBuffer[i] != 0);
+ }
+ return anyBit;
+ } else {
+ assert((unsigned int)featureNum < bufLen, "feature index out of range");
+ unsigned char* byteBuffer = (unsigned char*)featureBuffer;
+ int byteIndex = featureNum/(8*sizeof(char));
+ int bitIndex = featureNum%(8*sizeof(char));
+ // Indexed bit set?
+ return (byteBuffer[byteIndex] & (1U<<(7-bitIndex))) != 0;
+ }
+}
+
+void VM_Version::print_features_internal(const char* text, bool print_anyway) {
+ tty->print_cr("%s %s", text, features_string());
+ tty->print("%s", text);
+ for (unsigned int i = 0; i < _nfeatures; i++) {
+ tty->print(" 0x%16.16lx", _features[i]);
+ }
+ tty->cr();
+
+ if (Verbose || print_anyway) {
+ // z900
+ if (has_long_displacement() ) tty->print_cr("available: %s", "LongDispFacility");
+ // z990
+ if (has_long_displacement_fast() ) tty->print_cr("available: %s", "LongDispFacilityHighPerf");
+ if (has_ETF2() && has_ETF3() ) tty->print_cr("available: %s", "ETF2 and ETF3");
+ if (has_Crypto() ) tty->print_cr("available: %s", "CryptoFacility");
+ // z9
+ if (has_extended_immediate() ) tty->print_cr("available: %s", "ExtImmedFacility");
+ if (has_StoreFacilityListExtended()) tty->print_cr("available: %s", "StoreFacilityListExtended");
+ if (has_StoreClockFast() ) tty->print_cr("available: %s", "StoreClockFast");
+ if (has_ETF2Enhancements() ) tty->print_cr("available: %s", "ETF2 Enhancements");
+ if (has_ETF3Enhancements() ) tty->print_cr("available: %s", "ETF3 Enhancements");
+ if (has_HFPUnnormalized() ) tty->print_cr("available: %s", "HFPUnnormalizedFacility");
+ if (has_HFPMultiplyAndAdd() ) tty->print_cr("available: %s", "HFPMultiplyAndAddFacility");
+ // z10
+ if (has_ParsingEnhancements() ) tty->print_cr("available: %s", "Parsing Enhancements");
+ if (has_ExtractCPUtime() ) tty->print_cr("available: %s", "ExtractCPUTime");
+ if (has_CompareSwapStore() ) tty->print_cr("available: %s", "CompareSwapStore");
+ if (has_GnrlInstrExtensions() ) tty->print_cr("available: %s", "General Instruction Extensions");
+ if (has_CompareBranch() ) tty->print_cr(" available: %s", "Compare and Branch");
+ if (has_CompareTrap() ) tty->print_cr(" available: %s", "Compare and Trap");
+ if (has_RelativeLoadStore() ) tty->print_cr(" available: %s", "Relative Load/Store");
+ if (has_MultiplySingleImm32() ) tty->print_cr(" available: %s", "MultiplySingleImm32");
+ if (has_Prefetch() ) tty->print_cr(" available: %s", "Prefetch");
+ if (has_MoveImmToMem() ) tty->print_cr(" available: %s", "Direct Moves Immediate to Memory");
+ if (has_MemWithImmALUOps() ) tty->print_cr(" available: %s", "Direct ALU Ops Memory .op. Immediate");
+ if (has_ExtractCPUAttributes() ) tty->print_cr(" available: %s", "Extract CPU Atributes");
+ if (has_ExecuteExtensions() ) tty->print_cr("available: %s", "ExecuteExtensions");
+ if (has_FPSupportEnhancements() ) tty->print_cr("available: %s", "FPSupportEnhancements");
+ if (has_DecimalFloatingPoint() ) tty->print_cr("available: %s", "DecimalFloatingPoint");
+ // z196
+ if (has_DistinctOpnds() ) tty->print_cr("available: %s", "Distinct Operands");
+ if (has_InterlockedAccessV1() ) tty->print_cr(" available: %s", "InterlockedAccess V1 (fast)");
+ if (has_PopCount() ) tty->print_cr(" available: %s", "PopCount");
+ if (has_LoadStoreConditional() ) tty->print_cr(" available: %s", "LoadStoreConditional");
+ if (has_HighWordInstr() ) tty->print_cr(" available: %s", "HighWord Instructions");
+ if (has_FastSync() ) tty->print_cr(" available: %s", "FastSync (bcr 14,0)");
+ if (has_AtomicMemWithImmALUOps() ) tty->print_cr("available: %s", "Atomic Direct ALU Ops Memory .op. Immediate");
+ if (has_FPExtensions() ) tty->print_cr("available: %s", "Floatingpoint Extensions");
+ if (has_CryptoExt3() ) tty->print_cr("available: %s", "Crypto Extensions 3");
+ if (has_CryptoExt4() ) tty->print_cr("available: %s", "Crypto Extensions 4");
+ // EC12
+ if (has_MiscInstrExt() ) tty->print_cr("available: %s", "Miscelaneous Instruction Extensions");
+ if (has_ExecutionHint() ) tty->print_cr(" available: %s", "Execution Hints (branch prediction)");
+ if (has_ProcessorAssist() ) tty->print_cr(" available: %s", "Processor Assists");
+ if (has_LoadAndTrap() ) tty->print_cr(" available: %s", "Load and Trap");
+ if (has_TxMem() ) tty->print_cr("available: %s", "Transactional Memory");
+ if (has_InterlockedAccessV2() ) tty->print_cr(" available: %s", "InterlockedAccess V2 (fast)");
+ if (has_DFPZonedConversion() ) tty->print_cr(" available: %s", "DFP Zoned Conversions");
+ // z13
+ if (has_LoadStoreConditional2() ) tty->print_cr("available: %s", "Load/Store Conditional 2");
+ if (has_CryptoExt5() ) tty->print_cr("available: %s", "Crypto Extensions 5");
+ if (has_DFPPackedConversion() ) tty->print_cr("available: %s", "DFP Packed Conversions");
+ if (has_VectorFacility() ) tty->print_cr("available: %s", "Vector Facility");
+ // test switches
+ if (has_TestFeature1Impl() ) tty->print_cr("available: %s", "TestFeature1Impl");
+ if (has_TestFeature2Impl() ) tty->print_cr("available: %s", "TestFeature2Impl");
+ if (has_TestFeature4Impl() ) tty->print_cr("available: %s", "TestFeature4Impl");
+ if (has_TestFeature8Impl() ) tty->print_cr("available: %s", "TestFeature8Impl");
+
+ if (has_Crypto()) {
+ tty->cr();
+ tty->print_cr("detailled availability of %s capabilities:", "CryptoFacility");
+ if (test_feature_bit(&_cipher_features[0], -1, 2*Cipher::_featureBits)) {
+ tty->cr();
+ tty->print_cr(" available: %s", "Message Cipher Functions");
+ }
+ if (test_feature_bit(&_cipher_features[0], -1, (int)Cipher::_featureBits)) {
+ tty->print_cr(" available Crypto Features of KM (Cipher Message):");
+ for (unsigned int i = 0; i < Cipher::_featureBits; i++) {
+ if (test_feature_bit(&_cipher_features[0], i, (int)Cipher::_featureBits)) {
+ switch (i) {
+ case Cipher::_Query: tty->print_cr(" available: KM Query"); break;
+ case Cipher::_DEA: tty->print_cr(" available: KM DEA"); break;
+ case Cipher::_TDEA128: tty->print_cr(" available: KM TDEA-128"); break;
+ case Cipher::_TDEA192: tty->print_cr(" available: KM TDEA-192"); break;
+ case Cipher::_EncryptedDEA: tty->print_cr(" available: KM Encrypted DEA"); break;
+ case Cipher::_EncryptedDEA128: tty->print_cr(" available: KM Encrypted DEA-128"); break;
+ case Cipher::_EncryptedDEA192: tty->print_cr(" available: KM Encrypted DEA-192"); break;
+ case Cipher::_AES128: tty->print_cr(" available: KM AES-128"); break;
+ case Cipher::_AES192: tty->print_cr(" available: KM AES-192"); break;
+ case Cipher::_AES256: tty->print_cr(" available: KM AES-256"); break;
+ case Cipher::_EnccryptedAES128: tty->print_cr(" available: KM Encrypted-AES-128"); break;
+ case Cipher::_EnccryptedAES192: tty->print_cr(" available: KM Encrypted-AES-192"); break;
+ case Cipher::_EnccryptedAES256: tty->print_cr(" available: KM Encrypted-AES-256"); break;
+ case Cipher::_XTSAES128: tty->print_cr(" available: KM XTS-AES-128"); break;
+ case Cipher::_XTSAES256: tty->print_cr(" available: KM XTS-AES-256"); break;
+ case Cipher::_EncryptedXTSAES128: tty->print_cr(" available: KM XTS-Encrypted-AES-128"); break;
+ case Cipher::_EncryptedXTSAES256: tty->print_cr(" available: KM XTS-Encrypted-AES-256"); break;
+ default: tty->print_cr(" available: unknown KM code %d", i); break;
+ }
+ }
+ }
+ }
+ if (test_feature_bit(&_cipher_features[2], -1, (int)Cipher::_featureBits)) {
+ tty->print_cr(" available Crypto Features of KMC (Cipher Message with Chaining):");
+ for (unsigned int i = 0; i < Cipher::_featureBits; i++) {
+ if (test_feature_bit(&_cipher_features[2], i, (int)Cipher::_featureBits)) {
+ switch (i) {
+ case Cipher::_Query: tty->print_cr(" available: KMC Query"); break;
+ case Cipher::_DEA: tty->print_cr(" available: KMC DEA"); break;
+ case Cipher::_TDEA128: tty->print_cr(" available: KMC TDEA-128"); break;
+ case Cipher::_TDEA192: tty->print_cr(" available: KMC TDEA-192"); break;
+ case Cipher::_EncryptedDEA: tty->print_cr(" available: KMC Encrypted DEA"); break;
+ case Cipher::_EncryptedDEA128: tty->print_cr(" available: KMC Encrypted DEA-128"); break;
+ case Cipher::_EncryptedDEA192: tty->print_cr(" available: KMC Encrypted DEA-192"); break;
+ case Cipher::_AES128: tty->print_cr(" available: KMC AES-128"); break;
+ case Cipher::_AES192: tty->print_cr(" available: KMC AES-192"); break;
+ case Cipher::_AES256: tty->print_cr(" available: KMC AES-256"); break;
+ case Cipher::_EnccryptedAES128: tty->print_cr(" available: KMC Encrypted-AES-128"); break;
+ case Cipher::_EnccryptedAES192: tty->print_cr(" available: KMC Encrypted-AES-192"); break;
+ case Cipher::_EnccryptedAES256: tty->print_cr(" available: KMC Encrypted-AES-256"); break;
+ case Cipher::_PRNG: tty->print_cr(" available: KMC PRNG"); break;
+ default: tty->print_cr(" available: unknown KMC code %d", i); break;
+ }
+ }
+ }
+ }
+
+ if (test_feature_bit(&_msgdigest_features[0], -1, 2*MsgDigest::_featureBits)) {
+ tty->cr();
+ tty->print_cr(" available: %s", "Message Digest Functions for SHA");
+ }
+ if (test_feature_bit(&_msgdigest_features[0], -1, (int)MsgDigest::_featureBits)) {
+ tty->print_cr(" available Features of KIMD (Msg Digest):");
+ for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) {
+ if (test_feature_bit(&_msgdigest_features[0], i, (int)MsgDigest::_featureBits)) {
+ switch (i) {
+ case MsgDigest::_Query: tty->print_cr(" available: KIMD Query"); break;
+ case MsgDigest::_SHA1: tty->print_cr(" available: KIMD SHA-1"); break;
+ case MsgDigest::_SHA256: tty->print_cr(" available: KIMD SHA-256"); break;
+ case MsgDigest::_SHA512: tty->print_cr(" available: KIMD SHA-512"); break;
+ case MsgDigest::_GHASH: tty->print_cr(" available: KIMD GHASH"); break;
+ default: tty->print_cr(" available: unknown code %d", i); break;
+ }
+ }
+ }
+ }
+ if (test_feature_bit(&_msgdigest_features[2], -1, (int)MsgDigest::_featureBits)) {
+ tty->print_cr(" available Features of KLMD (Msg Digest):");
+ for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) {
+ if (test_feature_bit(&_msgdigest_features[2], i, (int)MsgDigest::_featureBits)) {
+ switch (i) {
+ case MsgDigest::_Query: tty->print_cr(" available: KLMD Query"); break;
+ case MsgDigest::_SHA1: tty->print_cr(" available: KLMD SHA-1"); break;
+ case MsgDigest::_SHA256: tty->print_cr(" available: KLMD SHA-256"); break;
+ case MsgDigest::_SHA512: tty->print_cr(" available: KLMD SHA-512"); break;
+ default: tty->print_cr(" available: unknown code %d", i); break;
+ }
+ }
+ }
+ }
+ }
+ if (ContendedPaddingWidth > 0) {
+ tty->cr();
+ tty->print_cr("ContendedPaddingWidth " INTX_FORMAT, ContendedPaddingWidth);
+ }
+ }
+}
+
+void VM_Version::print_features() {
+ print_features_internal("Version:");
+}
+
+void VM_Version::reset_features(bool reset) {
+ if (reset) {
+ for (unsigned int i = 0; i < _features_buffer_len; i++) {
+ VM_Version::_features[i] = 0;
+ }
+ }
+}
+
+
+void VM_Version::set_features_z900(bool reset) {
+ reset_features(reset);
+
+ set_has_long_displacement();
+ set_has_ETF2();
+}
+
+void VM_Version::set_features_z990(bool reset) {
+ reset_features(reset);
+
+ set_features_z900(false);
+ set_has_ETF3();
+ set_has_long_displacement_fast();
+ set_has_HFPMultiplyAndAdd();
+}
+
+void VM_Version::set_features_z9(bool reset) {
+ reset_features(reset);
+
+ set_features_z990(false);
+ set_has_StoreFacilityListExtended();
+ // set_has_Crypto(); // Do not set, crypto features must be retrieved separately.
+ set_has_ETF2Enhancements();
+ set_has_ETF3Enhancements();
+ set_has_extended_immediate();
+ set_has_StoreClockFast();
+ set_has_HFPUnnormalized();
+}
+
+void VM_Version::set_features_z10(bool reset) {
+ reset_features(reset);
+
+ set_features_z9(false);
+ set_has_CompareSwapStore();
+ set_has_RelativeLoadStore();
+ set_has_CompareBranch();
+ set_has_CompareTrap();
+ set_has_MultiplySingleImm32();
+ set_has_Prefetch();
+ set_has_MoveImmToMem();
+ set_has_MemWithImmALUOps();
+ set_has_ExecuteExtensions();
+ set_has_FPSupportEnhancements();
+ set_has_DecimalFloatingPoint();
+ set_has_ExtractCPUtime();
+ set_has_CryptoExt3();
+}
+
+void VM_Version::set_features_z196(bool reset) {
+ reset_features(reset);
+
+ set_features_z10(false);
+ set_has_InterlockedAccessV1();
+ set_has_PopCount();
+ set_has_LoadStoreConditional();
+ set_has_HighWordInstr();
+ set_has_FastSync();
+ set_has_FPExtensions();
+ set_has_DistinctOpnds();
+ set_has_CryptoExt4();
+}
+
+void VM_Version::set_features_ec12(bool reset) {
+ reset_features(reset);
+
+ set_features_z196(false);
+ set_has_MiscInstrExt();
+ set_has_InterlockedAccessV2();
+ set_has_LoadAndALUAtomicV2();
+ set_has_TxMem();
+}
+
+void VM_Version::set_features_z13(bool reset) {
+ reset_features(reset);
+
+ set_features_ec12(false);
+ set_has_LoadStoreConditional2();
+ set_has_CryptoExt5();
+ set_has_VectorFacility();
+}
+
+void VM_Version::set_features_from(const char* march) {
+ bool err = false;
+ bool prt = false;
+
+ if ((march != NULL) && (march[0] != '\0')) {
+ const int buf_len = 16;
+ const int hdr_len = 5;
+ char buf[buf_len];
+ if (strlen(march) >= hdr_len) {
+ memcpy(buf, march, hdr_len);
+ buf[hdr_len] = '\00';
+ } else {
+ buf[0] = '\00';
+ }
+
+ if (!strcmp(march, "z900")) {
+ set_features_z900();
+ } else if (!strcmp(march, "z990")) {
+ set_features_z990();
+ } else if (!strcmp(march, "z9")) {
+ set_features_z9();
+ } else if (!strcmp(march, "z10")) {
+ set_features_z10();
+ } else if (!strcmp(march, "z196")) {
+ set_features_z196();
+ } else if (!strcmp(march, "ec12")) {
+ set_features_ec12();
+ } else if (!strcmp(march, "z13")) {
+ set_features_z13();
+ } else if (!strcmp(buf, "ztest")) {
+ assert(!has_TestFeaturesImpl(), "possible facility list flag conflict");
+ if (strlen(march) > hdr_len) {
+ int itest = 0;
+ if ((strlen(march)-hdr_len) >= buf_len) err = true;
+ if (!err) {
+ memcpy(buf, &march[hdr_len], strlen(march)-hdr_len);
+ buf[strlen(march)-hdr_len] = '\00';
+ for (size_t i = 0; !err && (i < strlen(buf)); i++) {
+ itest = itest*10 + buf[i]-'0';
+ err = err || ((buf[i]-'0') < 0) || ((buf[i]-'0') > 9) || (itest > 15);
+ }
+ }
+ if (!err) {
+ prt = true;
+ if (itest & 0x01) { set_has_TestFeature1Impl(); }
+ if (itest & 0x02) { set_has_TestFeature2Impl(); }
+ if (itest & 0x04) { set_has_TestFeature4Impl(); }
+ if (itest & 0x08) { set_has_TestFeature8Impl(); }
+ }
+ } else {
+ prt = true;
+ set_has_TestFeature1Impl();
+ set_has_TestFeature2Impl();
+ set_has_TestFeature4Impl();
+ set_has_TestFeature8Impl();
+ }
+ } else {
+ err = true;
+ }
+ if (!err) {
+ set_features_string();
+ if (prt || PrintAssembly) {
+ print_features_internal("CPU Version as set by cmdline option:", prt);
+ }
+ } else {
+ tty->print_cr("***Warning: Unsupported ProcessorArchitecture: %s, internal settings left undisturbed.", march);
+ }
+ }
+
+}
+
+static long (*getFeatures)(unsigned long*, int, int) = NULL;
+
+void VM_Version::set_getFeatures(address entryPoint) {
+ if (getFeatures == NULL) {
+ getFeatures = (long(*)(unsigned long*, int, int))entryPoint;
+ }
+}
+
+long VM_Version::call_getFeatures(unsigned long* buffer, int buflen, int functionCode) {
+ VM_Version::_is_determine_features_test_running = true;
+ long functionResult = (*getFeatures)(buffer, buflen, functionCode);
+ VM_Version::_is_determine_features_test_running = false;
+ return functionResult;
+}
+
+// Helper function for "extract cache attribute" instruction.
+int VM_Version::calculate_ECAG_functionCode(unsigned int attributeIndication,
+ unsigned int levelIndication,
+ unsigned int typeIndication) {
+ return (attributeIndication<<4) | (levelIndication<<1) | typeIndication;
+}
+
+void VM_Version::determine_features() {
+
+ const int cbuf_size = _code_buffer_len;
+ const int buf_len = _features_buffer_len;
+
+ // Allocate code buffer space for the detection code.
+ ResourceMark rm;
+ CodeBuffer cbuf("determine CPU features", cbuf_size, 0);
+ MacroAssembler* a = new MacroAssembler(&cbuf);
+
+ // Emit code.
+ set_getFeatures(a->pc());
+ address code = a->pc();
+
+ // Try STFLE. Possible INVOP will cause defaults to be used.
+ Label getFEATURES;
+ Label getCPUFEATURES; // fcode = -1 (cache)
+ Label getCIPHERFEATURES; // fcode = -2 (cipher)
+ Label getMSGDIGESTFEATURES; // fcode = -3 (SHA)
+ Label checkLongDispFast;
+ Label noLongDisp;
+ Label posDisp, negDisp;
+ Label errRTN;
+ a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test.
+ a->z_brl(getFEATURES); // negative -> Get machine features.
+ a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility.
+ a->z_aghi(Z_R0, -1);
+ a->z_stfle(0, Z_ARG1);
+ a->z_lg(Z_R1, 0, Z_ARG1); // Get first DW of facility list.
+ a->z_lgr(Z_RET, Z_R0); // Calculate rtn value for success.
+ a->z_la(Z_RET, 1, Z_RET);
+ a->z_brnz(errRTN); // Instr failed if non-zero CC.
+ a->z_ltgr(Z_R1, Z_R1); // Instr failed if first DW == 0.
+ a->z_bcr(Assembler::bcondNotZero, Z_R14); // Successful return.
+
+ a->bind(errRTN);
+ a->z_lngr(Z_RET, Z_RET);
+ a->z_ltgr(Z_R1, Z_R1);
+ a->z_bcr(Assembler::bcondNotZero, Z_R14); // Return "buffer too small".
+ a->z_xgr(Z_RET, Z_RET);
+ a->z_br(Z_R14); // Return "operation aborted".
+
+ a->bind(getFEATURES);
+ a->z_cghi(Z_R0, -1); // -1: Extract CPU attributes, currently: cache layout only.
+ a->z_bre(getCPUFEATURES);
+ a->z_cghi(Z_R0, -2); // -2: Extract detailed crypto capabilities (cipher instructions).
+ a->z_bre(getCIPHERFEATURES);
+ a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions).
+ a->z_bre(getMSGDIGESTFEATURES);
+
+ a->z_xgr(Z_RET, Z_RET); // Not a valid function code.
+ a->z_br(Z_R14); // Return "operation aborted".
+
+ // Try KIMD/KLMD query function to get details about msg digest (secure hash, SHA) instructions.
+ a->bind(getMSGDIGESTFEATURES);
+ a->z_lghi(Z_R0,(int)MsgDigest::_Query); // query function code
+ a->z_lgr(Z_R1,Z_R2); // param block addr, 2*16 bytes min size
+ a->z_kimd(Z_R2,Z_R2); // Get available KIMD functions (bit pattern in param blk).
+ a->z_la(Z_R1,16,Z_R1); // next param block addr
+ a->z_klmd(Z_R2,Z_R2); // Get available KLMD functions (bit pattern in param blk).
+ a->z_lghi(Z_RET,4);
+ a->z_br(Z_R14);
+
+ // Try KM/KMC query function to get details about crypto instructions.
+ a->bind(getCIPHERFEATURES);
+ a->z_lghi(Z_R0,(int)Cipher::_Query); // query function code
+ a->z_lgr(Z_R1,Z_R2); // param block addr, 2*16 bytes min size (KIMD/KLMD output)
+ a->z_km(Z_R2,Z_R2); // get available KM functions
+ a->z_la(Z_R1,16,Z_R1); // next param block addr
+ a->z_kmc(Z_R2,Z_R2); // get available KMC functions
+ a->z_lghi(Z_RET,4);
+ a->z_br(Z_R14);
+
+ // Use EXTRACT CPU ATTRIBUTE instruction to get information about cache layout.
+ a->bind(getCPUFEATURES);
+ a->z_xgr(Z_R0,Z_R0); // as recommended in instruction documentation
+ a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents.
+ a->z_br(Z_R14);
+
+ // Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
+ a->bind(checkLongDispFast);
+ a->z_llill(Z_R0, 0xffff); // preset #iterations
+ a->z_larl(Z_R1, posDisp);
+ a->z_stck(0, Z_ARG1); // Get begin timestamp.
+
+ a->bind(posDisp); // Positive disp loop.
+ a->z_lg(Z_ARG2, 0, Z_ARG1);
+ a->z_bctgr(Z_R0, Z_R1);
+
+ a->z_stck(0, Z_ARG1); // Get end timestamp.
+ a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calculate elapsed time.
+ a->z_lcgr(Z_ARG2, Z_ARG2);
+ a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
+ a->z_stg(Z_ARG2, 8, Z_ARG1); // Store difference in buffer[1].
+
+ a->z_llill(Z_R0, 0xffff); // preset #iterations
+ a->z_larl(Z_R1, negDisp);
+ a->z_xgr(Z_ARG2, Z_ARG2); // Clear to detect absence of LongDisp facility.
+ a->z_stck(0, Z_ARG1); // Get begin timestamp.
+ a->z_la(Z_ARG1, 8, Z_ARG1);
+
+ a->bind(negDisp); // Negative disp loop.
+ a->z_lg(Z_ARG2, -8, Z_ARG1);
+ a->z_bctgr(Z_R0, Z_R1);
+
+ a->z_aghi(Z_ARG1, -8);
+ a->z_stck(0, Z_ARG1); // Get end timestamp.
+ a->z_ltgr(Z_ARG2, Z_ARG2); // Check for absence of LongDisp facility.
+ a->z_brz(noLongDisp);
+ a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calc elapsed time.
+ a->z_lcgr(Z_ARG2, Z_ARG2);
+ a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
+ a->z_stg(Z_ARG2, 0, Z_ARG1); // store difference in buffer[0]
+
+ a->z_llill(Z_RET,0xffff);
+ a->z_br(Z_R14);
+
+ a->bind(noLongDisp);
+ a->z_lghi(Z_RET,-1);
+ a->z_br(Z_R14);
+
+ address code_end = a->pc();
+ a->flush();
+
+ // Print the detection code.
+ bool printVerbose = Verbose || PrintAssembly || PrintStubCode;
+ if (printVerbose) {
+ ttyLocker ttyl;
+ tty->print_cr("Decoding CPU feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
+ tty->print_cr("Stub length is %ld bytes, codebuffer reserves %d bytes, %ld bytes spare.",
+ code_end-code, cbuf_size, cbuf_size-(code_end-code));
+
+ // Use existing decode function. This enables the [Code] format which is needed to DecodeErrorFile.
+ Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+ }
+
+ // Prepare for detection code execution and clear work buffer.
+ _nfeatures = 0;
+ _ncipher_features = 0;
+ unsigned long buffer[buf_len];
+
+ for (int i = 0; i < buf_len; i++) {
+ buffer[i] = 0L;
+ }
+
+ // execute code
+ // Illegal instructions will be replaced by 0 in signal handler.
+ // In case of problems, call_getFeatures will return a not-positive result.
+ long used_len = call_getFeatures(buffer, buf_len, 0);
+
+ bool ok;
+ if (used_len == 1) {
+ ok = true;
+ } else if (used_len > 1) {
+ unsigned int used_lenU = (unsigned int)used_len;
+ ok = true;
+ for (unsigned int i = 1; i < used_lenU; i++) {
+ ok = ok && (buffer[i] == 0L);
+ }
+ if (printVerbose && !ok) {
+ bool compact = false;
+ tty->print_cr("Note: feature list has %d (i.e. more than one) array elements.", used_lenU);
+ if (compact) {
+ tty->print("non-zero feature list elements:");
+ for (unsigned int i = 0; i < used_lenU; i++) {
+ tty->print(" [%d]: 0x%16.16lx", i, buffer[i]);
+ }
+ tty->cr();
+ } else {
+ for (unsigned int i = 0; i < used_lenU; i++) {
+ tty->print_cr("non-zero feature list[%d]: 0x%16.16lx", i, buffer[i]);
+ }
+ }
+
+ if (compact) {
+ tty->print_cr("Active features (compact view):");
+ for (unsigned int k = 0; k < used_lenU; k++) {
+ tty->print_cr(" buffer[%d]:", k);
+ for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) {
+ bool line = false;
+ for (unsigned int i = j*8; i < (j+1)*8; i++) {
+ bool bit = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8);
+ if (bit) {
+ if (!line) {
+ tty->print(" byte[%d]:", j);
+ line = true;
+ }
+ tty->print(" [%3.3d]", i);
+ }
+ }
+ if (line) {
+ tty->cr();
+ }
+ }
+ }
+ } else {
+ tty->print_cr("Active features (full view):");
+ for (unsigned int k = 0; k < used_lenU; k++) {
+ tty->print_cr(" buffer[%d]:", k);
+ for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) {
+ tty->print(" byte[%d]:", j);
+ for (unsigned int i = j*8; i < (j+1)*8; i++) {
+ bool bit = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8);
+ if (bit) {
+ tty->print(" [%3.3d]", i);
+ } else {
+ tty->print(" ");
+ }
+ }
+ tty->cr();
+ }
+ }
+ }
+ }
+ ok = true;
+ } else { // No features retrieved if we reach here. Buffer too short or instr not available.
+ if (used_len < 0) {
+ ok = false;
+ if (printVerbose) {
+ tty->print_cr("feature list buffer[%d] too short, required: buffer[%ld]", buf_len, -used_len);
+ }
+ } else {
+ if (printVerbose) {
+ tty->print_cr("feature list could not be retrieved. Running on z900 or z990? Trying to find out...");
+ }
+ used_len = call_getFeatures(buffer, 0, 0); // Must provide at least two DW buffer elements!!!!
+
+ ok = used_len > 0;
+ if (ok) {
+ if (buffer[1]*10 < buffer[0]) {
+ set_features_z900();
+ } else {
+ set_features_z990();
+ }
+
+ if (printVerbose) {
+ tty->print_cr("Note: high-speed long displacement test used %ld iterations.", used_len);
+ tty->print_cr(" Positive displacement loads took %8.8lu microseconds.", buffer[1]);
+ tty->print_cr(" Negative displacement loads took %8.8lu microseconds.", buffer[0]);
+ if (has_long_displacement_fast()) {
+ tty->print_cr(" assuming high-speed long displacement IS available.");
+ } else {
+ tty->print_cr(" assuming high-speed long displacement is NOT available.");
+ }
+ }
+ } else {
+ if (printVerbose) {
+ tty->print_cr("Note: high-speed long displacement test was not successful.");
+ tty->print_cr(" assuming long displacement is NOT available.");
+ }
+ }
+ return; // Do not copy buffer to _features, no test for cipher features.
+ }
+ }
+
+ if (ok) {
+ // Fill features buffer.
+ // Clear work buffer.
+ for (int i = 0; i < buf_len; i++) {
+ _features[i] = buffer[i];
+ _cipher_features[i] = 0;
+ _msgdigest_features[i] = 0;
+ buffer[i] = 0L;
+ }
+ _nfeatures = used_len;
+ } else {
+ for (int i = 0; i < buf_len; i++) {
+ _features[i] = 0;
+ _cipher_features[i] = 0;
+ _msgdigest_features[i] = 0;
+ buffer[i] = 0L;
+ }
+ _nfeatures = 0;
+ }
+
+ // Extract Crypto Facility details.
+ if (has_Crypto()) {
+ // Get cipher features.
+ used_len = call_getFeatures(buffer, -2, 0);
+ for (int i = 0; i < buf_len; i++) {
+ _cipher_features[i] = buffer[i];
+ }
+ _ncipher_features = used_len;
+
+ // Get msg digest features.
+ used_len = call_getFeatures(buffer, -3, 0);
+ for (int i = 0; i < buf_len; i++) {
+ _msgdigest_features[i] = buffer[i];
+ }
+ _nmsgdigest_features = used_len;
+ }
+
+ static int levelProperties[_max_cache_levels]; // All property indications per level.
+ static int levelScope[_max_cache_levels]; // private/shared
+ static const char* levelScopeText[4] = {"No cache ",
+ "CPU private",
+ "shared ",
+ "reserved "};
+
+ static int levelType[_max_cache_levels]; // D/I/mixed
+ static const char* levelTypeText[4] = {"separate D and I caches",
+ "I cache only ",
+ "D-cache only ",
+ "combined D/I cache "};
+
+ static unsigned int levelReserved[_max_cache_levels]; // reserved property bits
+ static unsigned int levelLineSize[_max_cache_levels];
+ static unsigned int levelTotalSize[_max_cache_levels];
+ static unsigned int levelAssociativity[_max_cache_levels];
+
+
+ // Extract Cache Layout details.
+ if (has_ExtractCPUAttributes() && printVerbose) { // For information only, as of now.
+ bool lineSize_mismatch;
+ bool print_something;
+ long functionResult;
+ unsigned int attributeIndication = 0; // 0..15
+ unsigned int levelIndication = 0; // 0..8
+ unsigned int typeIndication = 0; // 0..1 (D-Cache, I-Cache)
+ int functionCode = calculate_ECAG_functionCode(attributeIndication, levelIndication, typeIndication);
+
+ // Get cache topology.
+ functionResult = call_getFeatures(buffer, -1, functionCode);
+
+ for (unsigned int i = 0; i < _max_cache_levels; i++) {
+ if (functionResult > 0) {
+ int shiftVal = 8*(_max_cache_levels-(i+1));
+ levelProperties[i] = (functionResult & (0xffUL<<shiftVal)) >> shiftVal;
+ levelReserved[i] = (levelProperties[i] & 0xf0) >> 4;
+ levelScope[i] = (levelProperties[i] & 0x0c) >> 2;
+ levelType[i] = (levelProperties[i] & 0x03);
+ } else {
+ levelProperties[i] = 0;
+ levelReserved[i] = 0;
+ levelScope[i] = 0;
+ levelType[i] = 0;
+ }
+ levelLineSize[i] = 0;
+ levelTotalSize[i] = 0;
+ levelAssociativity[i] = 0;
+ }
+
+ tty->cr();
+ tty->print_cr("------------------------------------");
+ tty->print_cr("--- Cache Topology Information ---");
+ tty->print_cr("------------------------------------");
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ tty->print_cr(" Cache Level %d: <scope> %s | <type> %s",
+ i+1, levelScopeText[levelScope[i]], levelTypeText[levelType[i]]);
+ }
+
+ // Get D-cache details per level.
+ _Dcache_lineSize = 0;
+ lineSize_mismatch = false;
+ print_something = false;
+ typeIndication = 0; // 0..1 (D-Cache, I-Cache)
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if ((levelType[i] == 0) || (levelType[i] == 2)) {
+ print_something = true;
+
+ // Get cache line size of level i.
+ attributeIndication = 1;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache total size of level i.
+ attributeIndication = 2;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache associativity of level i.
+ attributeIndication = 3;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ _Dcache_lineSize = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize;
+ lineSize_mismatch = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i]);
+ } else {
+ levelLineSize[i] = 0;
+ }
+ }
+
+ if (print_something) {
+ tty->cr();
+ tty->print_cr("------------------------------------");
+ tty->print_cr("--- D-Cache Detail Information ---");
+ tty->print_cr("------------------------------------");
+ if (lineSize_mismatch) {
+ tty->print_cr("WARNING: D-Cache line size mismatch!");
+ }
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if (levelLineSize[i] > 0) {
+ tty->print_cr(" D-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d",
+ i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+ }
+ }
+ }
+
+ // Get I-cache details per level.
+ _Icache_lineSize = 0;
+ lineSize_mismatch = false;
+ print_something = false;
+ typeIndication = 1; // 0..1 (D-Cache, I-Cache)
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if ((levelType[i] == 0) || (levelType[i] == 1)) {
+ print_something = true;
+
+ // Get cache line size of level i.
+ attributeIndication = 1;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache total size of level i.
+ attributeIndication = 2;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache associativity of level i.
+ attributeIndication = 3;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ _Icache_lineSize = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize;
+ lineSize_mismatch = lineSize_mismatch || (_Icache_lineSize != levelLineSize[i]);
+ } else {
+ levelLineSize[i] = 0;
+ }
+ }
+
+ if (print_something) {
+ tty->cr();
+ tty->print_cr("------------------------------------");
+ tty->print_cr("--- I-Cache Detail Information ---");
+ tty->print_cr("------------------------------------");
+ if (lineSize_mismatch) {
+ tty->print_cr("WARNING: I-Cache line size mismatch!");
+ }
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if (levelLineSize[i] > 0) {
+ tty->print_cr(" I-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d",
+ i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+ }
+ }
+ }
+
+ // Get D/I-cache details per level.
+ lineSize_mismatch = false;
+ print_something = false;
+ typeIndication = 0; // 0..1 (D-Cache, I-Cache)
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if (levelType[i] == 3) {
+ print_something = true;
+
+ // Get cache line size of level i.
+ attributeIndication = 1;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache total size of level i.
+ attributeIndication = 2;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ // Get cache associativity of level i.
+ attributeIndication = 3;
+ functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication);
+ levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode);
+
+ _Dcache_lineSize = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize;
+ _Icache_lineSize = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize;
+ lineSize_mismatch = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i])
+ || (_Icache_lineSize != levelLineSize[i]);
+ } else {
+ levelLineSize[i] = 0;
+ }
+ }
+
+ if (print_something) {
+ tty->cr();
+ tty->print_cr("--------------------------------------");
+ tty->print_cr("--- D/I-Cache Detail Information ---");
+ tty->print_cr("--------------------------------------");
+ if (lineSize_mismatch) {
+ tty->print_cr("WARNING: D/I-Cache line size mismatch!");
+ }
+ for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) {
+ if (levelLineSize[i] > 0) {
+ tty->print_cr(" D/I-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d",
+ i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]);
+ }
+ }
+ }
+ tty->cr();
+ }
+ return;
+}
+
+unsigned long VM_Version::z_SIGILL() {
+ unsigned long ZeroBuffer = 0;
+ unsigned long work;
+ asm(
+ " LA %[work],%[buffer] \n\t" // Load address of buffer.
+ " LARL 14,+6 \n\t" // Load address of faulting instruction.
+ " BCR 15,%[work] \n\t" // Branch into buffer, execute whatever is in there.
+ : [buffer] "+Q" (ZeroBuffer) /* outputs */
+ , [work] "=&a" (work) /* outputs */
+ : /* inputs */
+ : "cc" /* clobbered */
+ );
+ return ZeroBuffer;
+}
+
+unsigned long VM_Version::z_SIGSEGV() {
+ unsigned long ZeroBuffer = 0;
+ unsigned long work;
+ asm(
+ " LG %[work],%[buffer] \n\t" // Load zero address.
+ " STG %[work],0(,%[work])\n\t" // Store to address zero.
+ : [buffer] "+Q" (ZeroBuffer) /* outputs */
+ , [work] "=&a" (work) /* outputs */
+ : /* inputs */
+ : "cc" /* clobbered */
+ );
+ return ZeroBuffer;
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vm_version_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VM_VERSION_S390_HPP
+#define CPU_S390_VM_VM_VERSION_S390_HPP
+
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+class VM_Version: public Abstract_VM_Version {
+
+ protected:
+// The following list contains the (approximate) announcement/availability
+// dates of the many System z generations in existence as of now which
+// implement the z/Architecture.
+// z900: 2000-10
+// z990: 2003-06
+// z9: 2005-09
+// z10: 2007-04
+// z10: 2008-02
+// z196: 2010-08
+// ec12: 2012-09
+// z13: 2015-03
+//
+// z/Architecture is the name of the 64-bit extension of the 31-bit s390
+// architecture.
+//
+// ----------------------------------------------
+// --- FeatureBitString Bits 0.. 63 (DW[0]) ---
+// ----------------------------------------------
+// 11222334445566
+// 04826048260482604
+#define StoreFacilityListExtendedMask 0x0100000000000000UL // z9
+#define ETF2Mask 0x0000800000000000UL // z900
+#define CryptoFacilityMask 0x0000400000000000UL // z990
+#define LongDispFacilityMask 0x0000200000000000UL // z900 with microcode update
+#define LongDispFacilityHighPerfMask 0x0000300000000000UL // z990
+#define HFPMultiplyAndAddMask 0x0000080000000000UL // z990
+#define ExtImmedFacilityMask 0x0000040000000000UL // z9
+#define ETF3Mask 0x0000020000000000UL // z990/z9 (?)
+#define HFPUnnormalizedMask 0x0000010000000000UL // z9
+#define ETF2EnhancementMask 0x0000008000000000UL // z9
+#define StoreClockFastMask 0x0000004000000000UL // z9
+#define ParsingEnhancementsMask 0x0000002000000000UL // z10(?)
+#define ETF3EnhancementMask 0x0000000200000000UL // z9
+#define ExtractCPUTimeMask 0x0000000100000000UL // z10
+#define CompareSwapStoreMask 0x00000000c0000000UL // z10
+#define GnrlInstrExtFacilityMask 0x0000000020000000UL // z10
+#define ExecuteExtensionsMask 0x0000000010000000UL // z10
+#define FPExtensionsMask 0x0000000004000000UL // z196
+#define FPSupportEnhancementsMask 0x0000000000400000UL // z10
+#define DecimalFloatingPointMask 0x0000000000300000UL // z10
+// z196 begin
+#define DistinctOpndsMask 0x0000000000040000UL // z196
+#define FastBCRSerializationMask DistinctOpndsMask
+#define HighWordMask DistinctOpndsMask
+#define LoadStoreConditionalMask DistinctOpndsMask
+#define PopulationCountMask DistinctOpndsMask
+#define InterlockedAccess1Mask DistinctOpndsMask
+// z196 end
+// EC12 begin
+#define DFPZonedConversionMask 0x0000000000008000UL // ec12
+#define MiscInstrExtMask 0x0000000000004000UL // ec12
+#define ExecutionHintMask MiscInstrExtMask
+#define LoadAndTrapMask MiscInstrExtMask
+#define ProcessorAssistMask MiscInstrExtMask
+#define ConstrainedTxExecutionMask 0x0000000000002000UL // ec12
+#define InterlockedAccess2Mask 0x0000000000000800UL // ec12
+// EC12 end
+// z13 begin
+#define LoadStoreConditional2Mask 0x0000000000000400UL // z13
+#define CryptoExtension5Mask 0x0000000000000040UL // z13
+// z13 end
+// Feature-DW[0] starts to fill up. Use of these masks is risky.
+#define TestFeature1ImplMask 0x0000000000000001UL
+#define TestFeature2ImplMask 0x0000000000000002UL
+#define TestFeature4ImplMask 0x0000000000000004UL
+#define TestFeature8ImplMask 0x0000000000000008UL
+// ----------------------------------------------
+// --- FeatureBitString Bits 64..127 (DW[1]) ---
+// ----------------------------------------------
+// 11111111
+// 66778889900011222
+// 48260482604826048
+#define TransactionalExecutionMask 0x0040000000000000UL // ec12
+#define CryptoExtension3Mask 0x0008000000000000UL // z196
+#define CryptoExtension4Mask 0x0004000000000000UL // z196
+#define DFPPackedConversionMask 0x0000800000000000UL // z13
+// ----------------------------------------------
+// --- FeatureBitString Bits 128..192 (DW[2]) ---
+// ----------------------------------------------
+// 11111111111111111
+// 23344455666778889
+// 82604826048260482
+#define VectorFacilityMask 0x4000000000000000UL // z13, not avail in VM guest mode!
+
+ enum {
+ _max_cache_levels = 8, // As limited by ECAG instruction.
+ _features_buffer_len = 4, // in DW
+ _code_buffer_len = 2*256 // For feature detection code.
+ };
+ static unsigned long _features[_features_buffer_len];
+ static unsigned long _cipher_features[_features_buffer_len];
+ static unsigned long _msgdigest_features[_features_buffer_len];
+ static unsigned int _nfeatures;
+ static unsigned int _ncipher_features;
+ static unsigned int _nmsgdigest_features;
+ static unsigned int _Dcache_lineSize;
+ static unsigned int _Icache_lineSize;
+ static bool _is_determine_features_test_running;
+
+ static bool test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen);
+ static void set_features_string();
+ static void print_features_internal(const char* text, bool print_anyway=false);
+ static void determine_features();
+ static long call_getFeatures(unsigned long* buffer, int buflen, int functionCode);
+ static void set_getFeatures(address entryPoint);
+ static int calculate_ECAG_functionCode(unsigned int attributeIndication,
+ unsigned int levelIndication,
+ unsigned int typeIndication);
+
+ // Setting features via march=z900|z990|z9|z10|z196|ec12|z13|ztest commandline option.
+ static void reset_features(bool reset);
+ static void set_features_z900(bool reset = true);
+ static void set_features_z990(bool reset = true);
+ static void set_features_z9(bool reset = true);
+ static void set_features_z10(bool reset = true);
+ static void set_features_z196(bool reset = true);
+ static void set_features_ec12(bool reset = true);
+ static void set_features_z13(bool reset = true);
+ static void set_features_from(const char* march);
+
+ // Get the CPU type from feature bit settings.
+ static bool is_z900() { return has_long_displacement() && !has_long_displacement_fast(); }
+ static bool is_z990() { return has_long_displacement_fast() && !has_extended_immediate(); }
+ static bool is_z9() { return has_extended_immediate() && !has_GnrlInstrExtensions(); }
+ static bool is_z10() { return has_GnrlInstrExtensions() && !has_DistinctOpnds(); }
+ static bool is_z196() { return has_DistinctOpnds() && !has_MiscInstrExt(); }
+ static bool is_ec12() { return has_MiscInstrExt() && !has_CryptoExt5(); }
+ static bool is_z13() { return has_CryptoExt5();}
+
+ // Get information about cache line sizes.
+ // As of now and the foreseeable future, line size of all levels will be the same and 256.
+ static unsigned int Dcache_lineSize(unsigned int level = 0) { return _Dcache_lineSize; }
+ static unsigned int Icache_lineSize(unsigned int level = 0) { return _Icache_lineSize; }
+
+ public:
+
+ // Need to use nested class with unscoped enum.
+ // C++11 declaration "enum class Cipher { ... } is not supported.
+ class CipherMode {
+ public:
+ enum {
+ cipher = 0x00,
+ decipher = 0x80
+ };
+ };
+ class Cipher {
+ public:
+ enum { // KM only!!! KMC uses different parmBlk sizes.
+ _Query = 0,
+ _DEA = 1,
+ _TDEA128 = 2,
+ _TDEA192 = 3,
+ _EncryptedDEA = 9,
+ _EncryptedDEA128 = 10,
+ _EncryptedDEA192 = 11,
+ _AES128 = 18,
+ _AES192 = 19,
+ _AES256 = 20,
+ _EnccryptedAES128 = 26,
+ _EnccryptedAES192 = 27,
+ _EnccryptedAES256 = 28,
+ _XTSAES128 = 50,
+ _XTSAES256 = 52,
+ _EncryptedXTSAES128 = 58,
+ _EncryptedXTSAES256 = 60,
+ _PRNG = 67,
+ _featureBits = 128,
+
+ // Parameter block sizes (in bytes) for KM instruction.
+ _Query_parmBlk = 16,
+ _DEA_parmBlk = 8,
+ _TDEA128_parmBlk = 16,
+ _TDEA192_parmBlk = 24,
+ _EncryptedDEA_parmBlk = 32,
+ _EncryptedDEA128_parmBlk = 40,
+ _EncryptedDEA192_parmBlk = 48,
+ _AES128_parmBlk = 16,
+ _AES192_parmBlk = 24,
+ _AES256_parmBlk = 32,
+ _EnccryptedAES128_parmBlk = 48,
+ _EnccryptedAES192_parmBlk = 56,
+ _EnccryptedAES256_parmBlk = 64,
+ _XTSAES128_parmBlk = 32,
+ _XTSAES256_parmBlk = 48,
+ _EncryptedXTSAES128_parmBlk = 64,
+ _EncryptedXTSAES256_parmBlk = 80,
+
+ // Parameter block sizes (in bytes) for KMC instruction.
+ _Query_parmBlk_C = 16,
+ _DEA_parmBlk_C = 16,
+ _TDEA128_parmBlk_C = 24,
+ _TDEA192_parmBlk_C = 32,
+ _EncryptedDEA_parmBlk_C = 40,
+ _EncryptedDEA128_parmBlk_C = 48,
+ _EncryptedDEA192_parmBlk_C = 56,
+ _AES128_parmBlk_C = 32,
+ _AES192_parmBlk_C = 40,
+ _AES256_parmBlk_C = 48,
+ _EnccryptedAES128_parmBlk_C = 64,
+ _EnccryptedAES192_parmBlk_C = 72,
+ _EnccryptedAES256_parmBlk_C = 80,
+ _XTSAES128_parmBlk_C = 32,
+ _XTSAES256_parmBlk_C = 48,
+ _EncryptedXTSAES128_parmBlk_C = 64,
+ _EncryptedXTSAES256_parmBlk_C = 80,
+ _PRNG_parmBlk_C = 32,
+
+ // Data block sizes (in bytes).
+ _Query_dataBlk = 0,
+ _DEA_dataBlk = 8,
+ _TDEA128_dataBlk = 8,
+ _TDEA192_dataBlk = 8,
+ _EncryptedDEA_dataBlk = 8,
+ _EncryptedDEA128_dataBlk = 8,
+ _EncryptedDEA192_dataBlk = 8,
+ _AES128_dataBlk = 16,
+ _AES192_dataBlk = 16,
+ _AES256_dataBlk = 16,
+ _EnccryptedAES128_dataBlk = 16,
+ _EnccryptedAES192_dataBlk = 16,
+ _EnccryptedAES256_dataBlk = 16,
+ _XTSAES128_dataBlk = 16,
+ _XTSAES256_dataBlk = 16,
+ _EncryptedXTSAES128_dataBlk = 16,
+ _EncryptedXTSAES256_dataBlk = 16,
+ _PRNG_dataBlk = 8,
+ };
+ };
+ class MsgDigest {
+ public:
+ enum {
+ _Query = 0,
+ _SHA1 = 1,
+ _SHA256 = 2,
+ _SHA512 = 3,
+ _GHASH = 65,
+ _featureBits = 128,
+
+ // Parameter block sizes (in bytes) for KIMD.
+ _Query_parmBlk_I = 16,
+ _SHA1_parmBlk_I = 20,
+ _SHA256_parmBlk_I = 32,
+ _SHA512_parmBlk_I = 64,
+ _GHASH_parmBlk_I = 32,
+
+ // Parameter block sizes (in bytes) for KLMD.
+ _Query_parmBlk_L = 16,
+ _SHA1_parmBlk_L = 28,
+ _SHA256_parmBlk_L = 40,
+ _SHA512_parmBlk_L = 80,
+
+ // Data block sizes (in bytes).
+ _Query_dataBlk = 0,
+ _SHA1_dataBlk = 64,
+ _SHA256_dataBlk = 64,
+ _SHA512_dataBlk = 128,
+ _GHASH_dataBlk = 16
+ };
+ };
+ class MsgAuthent {
+ public:
+ enum {
+ _Query = 0,
+ _DEA = 1,
+ _TDEA128 = 2,
+ _TDEA192 = 3,
+ _EncryptedDEA = 9,
+ _EncryptedDEA128 = 10,
+ _EncryptedDEA192 = 11,
+ _AES128 = 18,
+ _AES192 = 19,
+ _AES256 = 20,
+ _EnccryptedAES128 = 26,
+ _EnccryptedAES192 = 27,
+ _EnccryptedAES256 = 28,
+ _featureBits = 128,
+
+ _Query_parmBlk = 16,
+ _DEA_parmBlk = 16,
+ _TDEA128_parmBlk = 24,
+ _TDEA192_parmBlk = 32,
+ _EncryptedDEA_parmBlk = 40,
+ _EncryptedDEA128_parmBlk = 48,
+ _EncryptedDEA192_parmBlk = 56,
+ _AES128_parmBlk = 32,
+ _AES192_parmBlk = 40,
+ _AES256_parmBlk = 48,
+ _EnccryptedAES128_parmBlk = 64,
+ _EnccryptedAES192_parmBlk = 72,
+ _EnccryptedAES256_parmBlk = 80,
+
+ _Query_dataBlk = 0,
+ _DEA_dataBlk = 8,
+ _TDEA128_dataBlk = 8,
+ _TDEA192_dataBlk = 8,
+ _EncryptedDEA_dataBlk = 8,
+ _EncryptedDEA128_dataBlk = 8,
+ _EncryptedDEA192_dataBlk = 8,
+ _AES128_dataBlk = 16,
+ _AES192_dataBlk = 16,
+ _AES256_dataBlk = 16,
+ _EnccryptedAES128_dataBlk = 16,
+ _EnccryptedAES192_dataBlk = 16,
+ _EnccryptedAES256_dataBlk = 16
+ };
+ };
+
+ // Initialization
+ static void initialize();
+ static void print_features();
+ static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
+
+ // CPU feature query functions
+ static bool has_StoreFacilityListExtended() { return (_features[0] & StoreFacilityListExtendedMask) == StoreFacilityListExtendedMask; }
+ static bool has_Crypto() { return (_features[0] & CryptoFacilityMask) == CryptoFacilityMask; }
+ static bool has_ETF2() { return (_features[0] & ETF2Mask) == ETF2Mask; }
+ static bool has_ETF3() { return (_features[0] & ETF3Mask) == ETF3Mask; }
+ static bool has_ETF2Enhancements() { return (_features[0] & ETF2EnhancementMask) == ETF2EnhancementMask; }
+ static bool has_ETF3Enhancements() { return (_features[0] & ETF3EnhancementMask) == ETF3EnhancementMask; }
+ static bool has_ParsingEnhancements() { return (_features[0] & ParsingEnhancementsMask) == ParsingEnhancementsMask; }
+ static bool has_long_displacement() { return (_features[0] & LongDispFacilityMask) == LongDispFacilityMask; }
+ static bool has_long_displacement_fast() { return (_features[0] & LongDispFacilityHighPerfMask) == LongDispFacilityHighPerfMask; }
+ static bool has_extended_immediate() { return (_features[0] & ExtImmedFacilityMask) == ExtImmedFacilityMask; }
+ static bool has_StoreClockFast() { return (_features[0] & StoreClockFastMask) == StoreClockFastMask; }
+ static bool has_ExtractCPUtime() { return (_features[0] & ExtractCPUTimeMask) == ExtractCPUTimeMask; }
+ static bool has_CompareSwapStore() { return (_features[0] & CompareSwapStoreMask) == CompareSwapStoreMask; }
+
+ static bool has_HFPMultiplyAndAdd() { return (_features[0] & HFPMultiplyAndAddMask) == HFPMultiplyAndAddMask; }
+ static bool has_HFPUnnormalized() { return (_features[0] & HFPUnnormalizedMask) == HFPUnnormalizedMask; }
+
+ // Make sure we don't run on older ...
+ static bool has_GnrlInstrExtensions() { guarantee((_features[0] & GnrlInstrExtFacilityMask) == GnrlInstrExtFacilityMask, "We no more support older than z10."); return true; }
+ static bool has_CompareBranch() { return has_GnrlInstrExtensions() && is_z10(); } // Only z10 benefits from these.
+ static bool has_CompareTrap() { return has_GnrlInstrExtensions(); }
+ static bool has_RelativeLoadStore() { return has_GnrlInstrExtensions(); }
+ static bool has_MultiplySingleImm32() { return has_GnrlInstrExtensions(); }
+ static bool has_Prefetch() { return has_GnrlInstrExtensions() && (AllocatePrefetchStyle > 0); }
+ static bool has_PrefetchRaw() { return has_GnrlInstrExtensions(); }
+ static bool has_MoveImmToMem() { return has_GnrlInstrExtensions(); }
+ static bool has_ExtractCPUAttributes() { return has_GnrlInstrExtensions(); }
+ static bool has_ExecuteExtensions() { return (_features[0] & ExecuteExtensionsMask) == ExecuteExtensionsMask; }
+ // Memory-immediate arithmetic instructions. There is no performance penalty in using them.
+ // Moreover, these memory-immediate instructions are quasi-atomic (>99.99%) on z10
+ // and 100% atomic from z196 onwards, thanks to the specific operand serialization that comes new with z196.
+ static bool has_MemWithImmALUOps() { return has_GnrlInstrExtensions(); }
+ static bool has_AtomicMemWithImmALUOps() { return has_MemWithImmALUOps() && has_InterlockedAccessV1(); }
+ static bool has_FPExtensions() { return (_features[0] & FPExtensionsMask) == FPExtensionsMask; }
+ static bool has_FPSupportEnhancements() { return (_features[0] & FPSupportEnhancementsMask) == FPSupportEnhancementsMask; }
+ static bool has_DecimalFloatingPoint() { return (_features[0] & DecimalFloatingPointMask) == DecimalFloatingPointMask; }
+ static bool has_InterlockedAccessV1() { return (_features[0] & InterlockedAccess1Mask) == InterlockedAccess1Mask; }
+ static bool has_LoadAndALUAtomicV1() { return (_features[0] & InterlockedAccess1Mask) == InterlockedAccess1Mask; }
+ static bool has_PopCount() { return (_features[0] & PopulationCountMask) == PopulationCountMask; }
+ static bool has_LoadStoreConditional() { return (_features[0] & LoadStoreConditionalMask) == LoadStoreConditionalMask; }
+ static bool has_HighWordInstr() { return (_features[0] & HighWordMask) == HighWordMask; }
+ static bool has_FastSync() { return (_features[0] & FastBCRSerializationMask) == FastBCRSerializationMask; }
+ static bool has_DistinctOpnds() { return (_features[0] & DistinctOpndsMask) == DistinctOpndsMask; }
+ static bool has_CryptoExt3() { return (_features[1] & CryptoExtension3Mask) == CryptoExtension3Mask; }
+ static bool has_CryptoExt4() { return (_features[1] & CryptoExtension4Mask) == CryptoExtension4Mask; }
+ static bool has_DFPZonedConversion() { return (_features[0] & DFPZonedConversionMask) == DFPZonedConversionMask; }
+ static bool has_DFPPackedConversion() { return (_features[1] & DFPPackedConversionMask) == DFPPackedConversionMask; }
+ static bool has_MiscInstrExt() { return (_features[0] & MiscInstrExtMask) == MiscInstrExtMask; }
+ static bool has_ExecutionHint() { return (_features[0] & ExecutionHintMask) == ExecutionHintMask; }
+ static bool has_LoadAndTrap() { return (_features[0] & LoadAndTrapMask) == LoadAndTrapMask; }
+ static bool has_ProcessorAssist() { return (_features[0] & ProcessorAssistMask) == ProcessorAssistMask; }
+ static bool has_InterlockedAccessV2() { return (_features[0] & InterlockedAccess2Mask) == InterlockedAccess2Mask; }
+ static bool has_LoadAndALUAtomicV2() { return (_features[0] & InterlockedAccess2Mask) == InterlockedAccess2Mask; }
+ static bool has_TxMem() { return ((_features[1] & TransactionalExecutionMask) == TransactionalExecutionMask) &&
+ ((_features[0] & ConstrainedTxExecutionMask) == ConstrainedTxExecutionMask); }
+ static bool has_CryptoExt5() { return (_features[0] & CryptoExtension5Mask) == CryptoExtension5Mask; }
+ static bool has_LoadStoreConditional2() { return (_features[0] & LoadStoreConditional2Mask) == LoadStoreConditional2Mask; }
+ static bool has_VectorFacility() { return (_features[2] & VectorFacilityMask) == VectorFacilityMask; }
+
+ static bool has_TestFeatureImpl() { return (_features[0] & TestFeature1ImplMask) == TestFeature1ImplMask; }
+ static bool has_TestFeature1Impl() { return (_features[0] & TestFeature1ImplMask) == TestFeature1ImplMask; }
+ static bool has_TestFeature2Impl() { return (_features[0] & TestFeature2ImplMask) == TestFeature2ImplMask; }
+ static bool has_TestFeature4Impl() { return (_features[0] & TestFeature4ImplMask) == TestFeature4ImplMask; }
+ static bool has_TestFeature8Impl() { return (_features[0] & TestFeature8ImplMask) == TestFeature8ImplMask; }
+ static bool has_TestFeaturesImpl() { return has_TestFeature1Impl() || has_TestFeature2Impl() || has_TestFeature4Impl() || has_TestFeature8Impl(); }
+
+ // Crypto features query functions.
+ static bool has_Crypto_AES128() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES128, Cipher::_featureBits); }
+ static bool has_Crypto_AES192() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES192, Cipher::_featureBits); }
+ static bool has_Crypto_AES256() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES256, Cipher::_featureBits); }
+ static bool has_Crypto_AES() { return has_Crypto_AES128() || has_Crypto_AES192() || has_Crypto_AES256(); }
+
+ static bool has_Crypto_SHA1() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA1, MsgDigest::_featureBits); }
+ static bool has_Crypto_SHA256() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA256, MsgDigest::_featureBits); }
+ static bool has_Crypto_SHA512() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA512, MsgDigest::_featureBits); }
+ static bool has_Crypto_GHASH() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_GHASH, MsgDigest::_featureBits); }
+ static bool has_Crypto_SHA() { return has_Crypto_SHA1() || has_Crypto_SHA256() || has_Crypto_SHA512() || has_Crypto_GHASH(); }
+
+ // CPU feature setters (to force model-specific behaviour). Test/debugging only.
+ static void set_has_TestFeature1Impl() { _features[0] |= TestFeature1ImplMask; }
+ static void set_has_TestFeature2Impl() { _features[0] |= TestFeature2ImplMask; }
+ static void set_has_TestFeature4Impl() { _features[0] |= TestFeature4ImplMask; }
+ static void set_has_TestFeature8Impl() { _features[0] |= TestFeature8ImplMask; }
+ static void set_has_DecimalFloatingPoint() { _features[0] |= DecimalFloatingPointMask; }
+ static void set_has_FPSupportEnhancements() { _features[0] |= FPSupportEnhancementsMask; }
+ static void set_has_ExecuteExtensions() { _features[0] |= ExecuteExtensionsMask; }
+ static void set_has_MemWithImmALUOps() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_MoveImmToMem() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_Prefetch() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_MultiplySingleImm32() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_CompareBranch() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_CompareTrap() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_RelativeLoadStore() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_GnrlInstrExtensions() { _features[0] |= GnrlInstrExtFacilityMask; }
+ static void set_has_CompareSwapStore() { _features[0] |= CompareSwapStoreMask; }
+ static void set_has_HFPMultiplyAndAdd() { _features[0] |= HFPMultiplyAndAddMask; }
+ static void set_has_HFPUnnormalized() { _features[0] |= HFPUnnormalizedMask; }
+ static void set_has_ExtractCPUtime() { _features[0] |= ExtractCPUTimeMask; }
+ static void set_has_StoreClockFast() { _features[0] |= StoreClockFastMask; }
+ static void set_has_extended_immediate() { _features[0] |= ExtImmedFacilityMask; }
+ static void set_has_long_displacement_fast() { _features[0] |= LongDispFacilityHighPerfMask; }
+ static void set_has_long_displacement() { _features[0] |= LongDispFacilityMask; }
+ static void set_has_ETF2() { _features[0] |= ETF2Mask; }
+ static void set_has_ETF3() { _features[0] |= ETF3Mask; }
+ static void set_has_ETF2Enhancements() { _features[0] |= ETF2EnhancementMask; }
+ static void set_has_ETF3Enhancements() { _features[0] |= ETF3EnhancementMask; }
+ static void set_has_Crypto() { _features[0] |= CryptoFacilityMask; }
+ static void set_has_StoreFacilityListExtended() { _features[0] |= StoreFacilityListExtendedMask; }
+
+ static void set_has_InterlockedAccessV1() { _features[0] |= InterlockedAccess1Mask; }
+ static void set_has_PopCount() { _features[0] |= PopulationCountMask; }
+ static void set_has_LoadStoreConditional() { _features[0] |= LoadStoreConditionalMask; }
+ static void set_has_HighWordInstr() { _features[0] |= HighWordMask; }
+ static void set_has_FastSync() { _features[0] |= FastBCRSerializationMask; }
+ static void set_has_DistinctOpnds() { _features[0] |= DistinctOpndsMask; }
+ static void set_has_FPExtensions() { _features[0] |= FPExtensionsMask; }
+ static void set_has_MiscInstrExt() { _features[0] |= MiscInstrExtMask; }
+ static void set_has_ProcessorAssist() { _features[0] |= ProcessorAssistMask; }
+ static void set_has_InterlockedAccessV2() { _features[0] |= InterlockedAccess2Mask; }
+ static void set_has_LoadAndALUAtomicV2() { _features[0] |= InterlockedAccess2Mask; }
+ static void set_has_TxMem() { _features[0] |= ConstrainedTxExecutionMask; _features[1] |= TransactionalExecutionMask; }
+ static void set_has_CryptoExt3() { _features[1] |= CryptoExtension3Mask; }
+ static void set_has_CryptoExt4() { _features[1] |= CryptoExtension4Mask; }
+ static void set_has_LoadStoreConditional2() { _features[0] |= LoadStoreConditional2Mask; }
+ static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; }
+ static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; }
+
+ // Assembler testing.
+ static void allow_all();
+ static void revert();
+
+ // Generate trapping instructions into C-code.
+ // Sometimes helpful for debugging.
+ static unsigned long z_SIGILL();
+ static unsigned long z_SIGSEGV();
+};
+
+#endif // CPU_S390_VM_VM_VERSION_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+void VMRegImpl::set_regName() {
+ // Not clear why we have this duplication (triplication?)
+ Register reg = ::as_Register(0);
+ int i;
+ for (i = 0; i < ConcreteRegisterImpl::max_gpr;) {
+ regName[i++] = reg->name();
+ regName[i++] = reg->name();
+ reg = reg->successor();
+ }
+
+ FloatRegister freg = ::as_FloatRegister(0);
+ for (; i < ConcreteRegisterImpl::max_fpr;) {
+ regName[i++] = freg->name();
+ regName[i++] = freg->name();
+ freg = freg->successor();
+ }
+ for (; i < ConcreteRegisterImpl::number_of_registers; i ++) {
+ regName[i] = "NON-GPR-XMM";
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMREG_S390_HPP
+#define CPU_S390_VM_VMREG_S390_HPP
+
+inline bool is_Register() {
+ return (unsigned int)value() < (unsigned int)ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool is_FloatRegister() {
+ return value() >= ConcreteRegisterImpl::max_gpr &&
+ value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register as_Register() {
+ assert(is_Register() && is_even(value()), "even-aligned GPR name");
+ return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister as_FloatRegister() {
+ assert(is_FloatRegister() && is_even(value()), "must be");
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+}
+
+inline bool is_concrete() {
+ assert(is_reg(), "must be");
+ return is_even(value());
+}
+
+#endif // CPU_S390_VM_VMREG_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_VM_VMREG_S390_INLINE_HPP
+#define CPU_S390_VM_VMREG_S390_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+ if (this == noreg) {
+ return VMRegImpl::Bad();
+ }
+ return VMRegImpl::as_VMReg(encoding() << 1);
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+}
+
+inline VMReg ConditionRegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
+}
+
+#endif // CPU_S390_VM_VMREG_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_s390.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_s390.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// Machine-dependent part of VtableStubs: create vtableStub of correct
+// size and initialize its code.
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
+#endif
+
+// Used by compiler only; may use only caller saved, non-argument registers.
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+
+ const int code_length = VtableStub::pd_code_size_limit(true);
+ VtableStub *s = new(code_length) VtableStub(true, vtable_index);
+ if (s == NULL) { // Indicates OOM In the code cache.
+ return NULL;
+ }
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), code_length);
+ MacroAssembler *masm = new MacroAssembler(&cb);
+ address start_pc;
+ int padding_bytes = 0;
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ // Count unused bytes
+ // worst case actual size
+ padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+
+ // Use generic emitter for direct memory increment.
+ // Abuse Z_method as scratch register for generic emitter.
+ // It is loaded further down anyway before it is first used.
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
+ }
+#endif
+
+ assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
+
+ // Get receiver klass.
+ // Must do an explicit check if implicit checks are disabled.
+ address npe_addr = __ pc(); // npe == NULL ptr exception
+ __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
+ const Register rcvr_klass = Z_R1_scratch;
+ __ load_klass(rcvr_klass, Z_ARG1);
+
+ // Set method (in case of interpreted method), and destination address.
+ int entry_offset = in_bytes(InstanceKlass::vtable_start_offset()) +
+ vtable_index * vtableEntry::size_in_bytes();
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ Label L;
+ // Check offset vs vtable length.
+ const Register vtable_idx = Z_R0_scratch;
+
+ // Count unused bytes.
+ // worst case actual size
+ padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true);
+
+ assert(Immediate::is_uimm12(in_bytes(InstanceKlass::vtable_length_offset())), "disp to large");
+ __ z_cl(vtable_idx, in_bytes(InstanceKlass::vtable_length_offset()), rcvr_klass);
+ __ z_brl(L);
+ __ z_lghi(Z_ARG3, vtable_index); // Debug code, don't optimize.
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false);
+ // Count unused bytes (assume worst case here).
+ padding_bytes += 12;
+ __ bind(L);
+ }
+#endif
+
+ int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+
+ // Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC.
+ if (Displacement::is_validDisp(v_off)) {
+ __ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/);
+ // Account for the load_const in the else path.
+ padding_bytes += __ load_const_size();
+ } else {
+ // Worse case, offset does not fit in displacement field.
+ __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value.
+ __ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/);
+ }
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ Label L;
+ __ z_ltgr(Z_method, Z_method);
+ __ z_brne(L);
+ __ stop("Vtable entry is ZERO",102);
+ __ bind(L);
+ }
+#endif
+
+ address ame_addr = __ pc(); // ame = abstract method error
+
+ // Must do an explicit check if implicit checks are disabled.
+ __ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset()));
+ __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
+ __ z_br(Z_R1_scratch);
+
+ masm->flush();
+
+ s->set_exception_points(npe_addr, ame_addr);
+
+ return s;
+}
+
+VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+ const int code_length = VtableStub::pd_code_size_limit(false);
+ VtableStub *s = new(code_length) VtableStub(false, vtable_index);
+ if (s == NULL) { // Indicates OOM in the code cache.
+ return NULL;
+ }
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), code_length);
+ MacroAssembler *masm = new MacroAssembler(&cb);
+ address start_pc;
+ int padding_bytes = 0;
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ // Count unused bytes
+ // worst case actual size
+ padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+
+ // Use generic emitter for direct memory increment.
+ // Use Z_tmp_1 as scratch register for generic emitter.
+ __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1);
+ }
+#endif
+
+ assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
+
+ // Entry arguments:
+ // Z_method: Interface
+ // Z_ARG1: Receiver
+ const Register rcvr_klass = Z_tmp_1; // Used to compute itable_entry_addr.
+ // Use extra reg to avoid re-load.
+ const Register vtable_len = Z_tmp_2; // Used to compute itable_entry_addr.
+ const Register itable_entry_addr = Z_R1_scratch;
+ const Register itable_interface = Z_R0_scratch;
+
+ // Get receiver klass.
+ // Must do an explicit check if implicit checks are disabled.
+ address npe_addr = __ pc(); // npe == NULL ptr exception
+ __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
+ __ load_klass(rcvr_klass, Z_ARG1);
+
+ // Load start of itable entries into itable_entry.
+ __ z_llgf(vtable_len, Address(rcvr_klass, InstanceKlass::vtable_length_offset()));
+ __ z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
+
+ // Loop over all itable entries until desired interfaceOop(Rinterface) found.
+ const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset());
+ // Count unused bytes.
+ start_pc = __ pc();
+ __ add2reg_with_index(itable_entry_addr, vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), rcvr_klass, vtable_len);
+ padding_bytes += 20 - (__ pc() - start_pc);
+
+ const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
+ Label search;
+ __ bind(search);
+
+ // Handle IncompatibleClassChangeError in itable stubs.
+ // If the entry is NULL then we've reached the end of the table
+ // without finding the expected interface, so throw an exception.
+ NearLabel throw_icce;
+ __ load_and_test_long(itable_interface, Address(itable_entry_addr));
+ __ z_bre(throw_icce); // Throw the exception out-of-line.
+ // Count unused bytes.
+ start_pc = __ pc();
+ __ add2reg(itable_entry_addr, itable_offset_search_inc);
+ padding_bytes += 20 - (__ pc() - start_pc);
+ __ z_cgr(itable_interface, Z_method);
+ __ z_brne(search);
+
+ // Entry found. Itable_entry_addr points to the subsequent entry (itable_offset_search_inc too far).
+ // Get offset of vtable for interface.
+
+ const Register vtable_offset = Z_R1_scratch;
+ const Register itable_method = rcvr_klass; // Calculated before.
+
+ const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
+ itableOffsetEntry::interface_offset_in_bytes()) -
+ itable_offset_search_inc;
+ __ z_llgf(vtable_offset, vtable_offset_offset, itable_entry_addr);
+
+ // Compute itableMethodEntry and get method and entry point for compiler.
+ const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) +
+ itableMethodEntry::method_offset_in_bytes();
+
+ __ z_lg(Z_method, method_offset, vtable_offset, itable_method);
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ Label ok1;
+ __ z_ltgr(Z_method, Z_method);
+ __ z_brne(ok1);
+ __ stop("method is null",103);
+ __ bind(ok1);
+ }
+#endif
+
+ address ame_addr = __ pc();
+ // Must do an explicit check if implicit checks are disabled.
+ if (!ImplicitNullChecks) {
+ __ compare64_and_branch(Z_method, (intptr_t) 0, Assembler::bcondEqual, throw_icce);
+ }
+ __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
+ __ z_br(Z_R1_scratch);
+
+ // Handle IncompatibleClassChangeError in itable stubs.
+ __ bind(throw_icce);
+ // Count unused bytes
+ // worst case actual size
+ // We force resolving of the call site by jumping to
+ // the "handle wrong method" stub, and so let the
+ // interpreter runtime do all the dirty work.
+ padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+ __ z_br(Z_R1_scratch);
+
+ masm->flush();
+
+ s->set_exception_points(npe_addr, ame_addr);
+ return s;
+}
+
+// In order to tune these parameters, run the JVM with VM options
+// +PrintMiscellaneous and +WizardMode to see information about
+// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+ int size = DebugVtables ? 216 : 0;
+ if (CountCompiledCalls) {
+ size += 6 * 4;
+ }
+ if (is_vtable_stub) {
+ size += 52;
+ } else {
+ size += 104;
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ size += 16; // A guess.
+ }
+ return size;
+}
+
+int VtableStub::pd_code_alignment() {
+ const unsigned int icache_line_size = 32;
+ return icache_line_size;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_s390.hpp"
+
+// Note that the compare-and-swap instructions on System z perform
+// a serialization function before the storage operand is fetched
+// and again after the operation is completed.
+//
+// Used constraint modifiers:
+// = write-only access: Value on entry to inline-assembler code irrelevant.
+// + read/write access: Value on entry is used; on exit value is changed.
+// read-only access: Value on entry is used and never changed.
+// & early-clobber access: Might be modified before all read-only operands
+// have been used.
+// a address register operand (not GR0).
+// d general register operand (including GR0)
+// Q memory operand w/o index register.
+// 0..9 operand reference (by operand position).
+// Used for operands that fill multiple roles. One example would be a
+// write-only operand receiving its initial value from a read-only operand.
+// Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example.
+//
+
+// On System z, all store operations are atomic if the address where the data is stored into
+// is an integer multiple of the data length. Furthermore, all stores are ordered:
+// a store which occurs conceptually before another store becomes visible to other CPUs
+// before the other store becomes visible.
+inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
+inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
+inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
+inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
+inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
+inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
+inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
+
+
+//------------
+// Atomic::add
+//------------
+// These methods force the value in memory to be augmented by the passed increment.
+// Both, memory value and increment, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+//
+// The return value of the method is the value that was successfully stored. At the
+// time the caller receives back control, the value in memory may have changed already.
+
+inline jint Atomic::add(jint inc, volatile jint*dest) {
+ unsigned int old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+ __asm__ __volatile__ (
+ " LGFR 0,%[inc] \n\t" // save increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAA 2,0,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x20 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xf8 \n\t" // LAA minor opcode
+ " AR 2,0 \n\t" // calc new value in register
+ " LR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r0", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LLGF %[old],%[mem] \n\t" // get old value
+ "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result
+ " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+
+ return (jint)upd;
+}
+
+
+inline intptr_t Atomic::add_ptr(intptr_t inc, volatile intptr_t* dest) {
+ unsigned long old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+ __asm__ __volatile__ (
+ " LGR 0,%[inc] \n\t" // save increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAAG 2,0,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x20 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xe8 \n\t" // LAA minor opcode
+ " AGR 2,0 \n\t" // calc new value in register
+ " LGR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r0", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LG %[old],%[mem] \n\t" // get old value
+ "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result
+ " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+
+ return (intptr_t)upd;
+}
+
+inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) {
+ return (void*)add_ptr(add_value, (volatile intptr_t*)dest);
+}
+
+
+//------------
+// Atomic::inc
+//------------
+// These methods force the value in memory to be incremented (augmented by 1).
+// Both, memory value and increment, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+
+inline void Atomic::inc(volatile jint* dest) {
+ unsigned int old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+// tty->print_cr("Atomic::inc called... dest @%p", dest);
+ __asm__ __volatile__ (
+ " LGHI 2,1 \n\t" // load increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAA 2,2,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x22 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xf8 \n\t" // LAA minor opcode
+ " AGHI 2,1 \n\t" // calc new value in register
+ " LR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+// : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LLGF %[old],%[mem] \n\t" // get old value
+ "0: LA %[upd],1(,%[old]) \n\t" // calc result
+ " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+}
+
+inline void Atomic::inc_ptr(volatile intptr_t* dest) {
+ unsigned long old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+ __asm__ __volatile__ (
+ " LGHI 2,1 \n\t" // load increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAAG 2,2,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x22 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xe8 \n\t" // LAA minor opcode
+ " AGHI 2,1 \n\t" // calc new value in register
+ " LR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+// : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LG %[old],%[mem] \n\t" // get old value
+ "0: LA %[upd],1(,%[old]) \n\t" // calc result
+ " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+}
+
+inline void Atomic::inc_ptr(volatile void* dest) {
+ inc_ptr((volatile intptr_t*)dest);
+}
+
+//------------
+// Atomic::dec
+//------------
+// These methods force the value in memory to be decremented (augmented by -1).
+// Both, memory value and decrement, are treated as 32bit signed binary integers.
+// No overflow exceptions are recognized, and the condition code does not hold
+// information about the value in memory.
+//
+// The value in memory is updated by using a compare-and-swap instruction. The
+// instruction is retried as often as required.
+
+inline void Atomic::dec(volatile jint* dest) {
+ unsigned int old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+ __asm__ __volatile__ (
+ " LGHI 2,-1 \n\t" // load increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAA 2,2,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x22 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xf8 \n\t" // LAA minor opcode
+ " AGHI 2,-1 \n\t" // calc new value in register
+ " LR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+// : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LLGF %[old],%[mem] \n\t" // get old value
+ // LAY not supported by inline assembler
+ // "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
+ "0: LR %[upd],%[old] \n\t" // calc result
+ " AHI %[upd],-1 \n\t"
+ " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+}
+
+inline void Atomic::dec_ptr(volatile intptr_t* dest) {
+ unsigned long old, upd;
+
+ if (VM_Version::has_LoadAndALUAtomicV1()) {
+ __asm__ __volatile__ (
+ " LGHI 2,-1 \n\t" // load increment
+ " LA 3,%[mem] \n\t" // force data address into ARG2
+// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
+// " LAAG 2,2,0(3) \n\t" // actually coded instruction
+ " .byte 0xeb \n\t" // LAA main opcode
+ " .byte 0x22 \n\t" // R1,R3
+ " .byte 0x30 \n\t" // R2,disp1
+ " .byte 0x00 \n\t" // disp2,disp3
+ " .byte 0x00 \n\t" // disp4,disp5
+ " .byte 0xe8 \n\t" // LAA minor opcode
+ " AGHI 2,-1 \n\t" // calc new value in register
+ " LR %[upd],2 \n\t" // move to result register
+ //---< outputs >---
+ : [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+// : [inc] "a" (inc) // read-only.
+ //---< clobbered >---
+ : "cc", "r2", "r3"
+ );
+ } else {
+ __asm__ __volatile__ (
+ " LG %[old],%[mem] \n\t" // get old value
+// LAY not supported by inline assembler
+// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
+ "0: LGR %[upd],%[old] \n\t" // calc result
+ " AGHI %[upd],-1 \n\t"
+ " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&a" (old) // write-only, old counter value
+ , [upd] "=&d" (upd) // write-only, updated counter value
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ :
+ //---< clobbered >---
+ : "cc"
+ );
+ }
+}
+
+inline void Atomic::dec_ptr(volatile void* dest) {
+ dec_ptr((volatile intptr_t*)dest);
+}
+
+//-------------
+// Atomic::xchg
+//-------------
+// These methods force the value in memory to be replaced by the new value passed
+// in as argument.
+//
+// The value in memory is replaced by using a compare-and-swap instruction. The
+// instruction is retried as often as required. This makes sure that the new
+// value can be seen, at least for a very short period of time, by other CPUs.
+//
+// If we would use a normal "load(old value) store(new value)" sequence,
+// the new value could be lost unnoticed, due to a store(new value) from
+// another thread.
+//
+// The return value is the (unchanged) value from memory as it was when the
+// replacement succeeded.
+inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
+ unsigned int old;
+
+ __asm__ __volatile__ (
+ " LLGF %[old],%[mem] \n\t" // get old value
+ "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&d" (old) // write-only, prev value irrelevant
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [upd] "d" (xchg_val) // read-only, value to be written to memory
+ //---< clobbered >---
+ : "cc"
+ );
+
+ return (jint)old;
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
+ unsigned long old;
+
+ __asm__ __volatile__ (
+ " LG %[old],%[mem] \n\t" // get old value
+ "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem
+ " JNE 0b \n\t" // no success? -> retry
+ //---< outputs >---
+ : [old] "=&d" (old) // write-only, init from memory
+ , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
+ //---< inputs >---
+ : [upd] "d" (xchg_val) // read-only, value to be written to memory
+ //---< clobbered >---
+ : "cc"
+ );
+
+ return (intptr_t)old;
+}
+
+inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
+ return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+}
+
+//----------------
+// Atomic::cmpxchg
+//----------------
+// These methods compare the value in memory with a given compare value.
+// If both values compare equal, the value in memory is replaced with
+// the exchange value.
+//
+// The value in memory is compared and replaced by using a compare-and-swap
+// instruction. The instruction is NOT retried (one shot only).
+//
+// The return value is the (unchanged) value from memory as it was when the
+// compare-and-swap instruction completed. A successful exchange operation
+// is indicated by (return value == compare_value). If unsuccessful, a new
+// exchange value can be calculated based on the return value which is the
+// latest contents of the memory location.
+//
+// Inspecting the return value is the only way for the caller to determine
+// if the compare-and-swap instruction was successful:
+// - If return value and compare value compare equal, the compare-and-swap
+// instruction was successful and the value in memory was replaced by the
+// exchange value.
+// - If return value and compare value compare unequal, the compare-and-swap
+// instruction was not successful. The value in memory was left unchanged.
+//
+// The s390 processors always fence before and after the csg instructions.
+// Thus we ignore the memory ordering argument. The docu says: "A serialization
+// function is performed before the operand is fetched and again after the
+// operation is completed."
+
+jint Atomic::cmpxchg(jint xchg_val, volatile jint* dest, jint cmp_val, cmpxchg_memory_order unused) {
+ unsigned long old;
+
+ __asm__ __volatile__ (
+ " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem.
+ // outputs
+ : [old] "=&d" (old) // Write-only, prev value irrelevant.
+ , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically.
+ // inputs
+ : [upd] "d" (xchg_val)
+ , "0" (cmp_val) // Read-only, initial value for [old] (operand #0).
+ // clobbered
+ : "cc"
+ );
+
+ return (jint)old;
+}
+
+jlong Atomic::cmpxchg(jlong xchg_val, volatile jlong* dest, jlong cmp_val, cmpxchg_memory_order unused) {
+ unsigned long old;
+
+ __asm__ __volatile__ (
+ " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem.
+ // outputs
+ : [old] "=&d" (old) // Write-only, prev value irrelevant.
+ , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically.
+ // inputs
+ : [upd] "d" (xchg_val)
+ , "0" (cmp_val) // Read-only, initial value for [old] (operand #0).
+ // clobbered
+ : "cc"
+ );
+
+ return (jlong)old;
+}
+
+void* Atomic::cmpxchg_ptr(void *xchg_val, volatile void* dest, void* cmp_val, cmpxchg_memory_order unused) {
+ return (void*)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
+}
+
+intptr_t Atomic::cmpxchg_ptr(intptr_t xchg_val, volatile intptr_t* dest, intptr_t cmp_val, cmpxchg_memory_order unused) {
+ return (intptr_t)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
+}
+
+inline jlong Atomic::load(volatile jlong* src) { return *src; }
+
+#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+
+#include <byteswap.h>
+
+inline u2 swap_u2(u2 x) {
+ return bswap_16(x);
+}
+
+inline u4 swap_u4(u4 x) {
+ return bswap_32(x);
+}
+
+inline u8 swap_u8(u8 x) {
+ return bswap_64(x);
+}
+
+#endif // OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Sorted according to linux_x86.
+
+#ifndef OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+
+// Sets the default values for platform dependent flags used by the
+// runtime system (see globals.hpp).
+
+define_pd_global(bool, DontYieldALot, false);
+define_pd_global(intx, ThreadStackSize, 1024); // 0 => Use system default.
+define_pd_global(intx, VMThreadStackSize, 1024);
+// Some jck tests in lang/fp/fpl038 run out of compile thread stack.
+// Observed in pure dbg build, running with -Xcomp -Xbatch on z990.
+// We also increase the stack size for opt builds to be on the safe side.
+#ifdef ASSERT
+define_pd_global(intx, CompilerThreadStackSize, 4096);
+#else
+define_pd_global(intx, CompilerThreadStackSize, 2048);
+#endif
+
+// Allow extra space in DEBUG builds for asserts.
+define_pd_global(size_t, JVMInvokeMethodSlack, 8192);
+
+// Only used on 64 bit platforms.
+define_pd_global(size_t, HeapBaseMinAddress, 2*G);
+
+#endif // OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+
+#include "runtime/orderAccess.hpp"
+#include "vm_version_s390.hpp"
+
+// Implementation of class OrderAccess.
+
+//
+// machine barrier instructions:
+//
+// - z_sync two-way memory barrier, aka fence
+//
+// semantic barrier instructions:
+// (as defined in orderAccess.hpp)
+//
+// - z_release orders Store|Store, (maps to compiler barrier)
+// Load|Store
+// - z_acquire orders Load|Store, (maps to compiler barrier)
+// Load|Load
+// - z_fence orders Store|Store, (maps to z_sync)
+// Load|Store,
+// Load|Load,
+// Store|Load
+//
+
+
+// Only load-after-store-order is not guaranteed on z/Architecture, i.e. only 'fence'
+// is needed.
+
+// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions.
+#define inlasm_compiler_barrier() __asm__ volatile ("" : : : "memory");
+// "bcr 15, 0" is used as two way memory barrier.
+#define inlasm_zarch_sync() __asm__ __volatile__ ("bcr 15, 0" : : : "memory");
+
+// Release and acquire are empty on z/Architecture, but potential
+// optimizations of gcc must be forbidden by OrderAccess::release and
+// OrderAccess::acquire.
+#define inlasm_zarch_release() inlasm_compiler_barrier()
+#define inlasm_zarch_acquire() inlasm_compiler_barrier()
+#define inlasm_zarch_fence() inlasm_zarch_sync()
+
+inline void OrderAccess::loadload() { inlasm_compiler_barrier(); }
+inline void OrderAccess::storestore() { inlasm_compiler_barrier(); }
+inline void OrderAccess::loadstore() { inlasm_compiler_barrier(); }
+inline void OrderAccess::storeload() { inlasm_zarch_sync(); }
+
+inline void OrderAccess::acquire() { inlasm_zarch_acquire(); }
+inline void OrderAccess::release() { inlasm_zarch_release(); }
+inline void OrderAccess::fence() { inlasm_zarch_sync(); }
+
+template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (volatile jbyte* p) { register jbyte t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jint OrderAccess::specialized_load_acquire<jint> (volatile jint* p) { register jint t = *p; inlasm_zarch_acquire(); return t; }
+template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (volatile jlong* p) { register jlong t = *p; inlasm_zarch_acquire(); return t; }
+
+#undef inlasm_compiler_barrier
+#undef inlasm_zarch_sync
+#undef inlasm_zarch_release
+#undef inlasm_zarch_acquire
+#undef inlasm_zarch_fence
+
+#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
+
+#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// This file is organized as os_linux_x86.cpp.
+
+// no precompiled headers
+#include "asm/assembler.inline.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
+#include "code/vtableStubs.hpp"
+#include "compiler/disassembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "nativeInst_s390.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+
+address os::current_stack_pointer() {
+ intptr_t* csp;
+
+ // Inline assembly for `z_lgr regno(csp), Z_SP' (Z_SP = Z_R15):
+ __asm__ __volatile__ ("lgr %0, 15":"=r"(csp):);
+
+ assert(((uint64_t)csp & (frame::alignment_in_bytes-1)) == 0, "SP must be aligned");
+ return (address) csp;
+}
+
+char* os::non_memory_address_word() {
+ // Must never look like an address returned by reserve_memory,
+ // even in its subfields (as defined by the CPU immediate fields,
+ // if the CPU splits constants across multiple instructions).
+ return (char*) -1;
+}
+
+// OS specific thread initialization.
+void os::initialize_thread(Thread* thread) { }
+
+// Frame information (pc, sp, fp) retrieved via ucontext
+// always looks like a C-frame according to the frame
+// conventions in frame_s390.hpp.
+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
+ return (address)uc->uc_mcontext.psw.addr;
+}
+
+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
+ uc->uc_mcontext.psw.addr = (unsigned long)pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
+ return (intptr_t*)uc->uc_mcontext.gregs[15/*REG_SP*/];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
+ return NULL;
+}
+
+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
+ intptr_t** ret_sp, intptr_t** ret_fp) {
+
+ ExtendedPC epc;
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+
+ if (uc != NULL) {
+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+ if (ret_sp) { *ret_sp = os::Linux::ucontext_get_sp(uc); }
+ if (ret_fp) { *ret_fp = os::Linux::ucontext_get_fp(uc); }
+ } else {
+ // Construct empty ExtendedPC for return value checking.
+ epc = ExtendedPC(NULL);
+ if (ret_sp) { *ret_sp = (intptr_t *)NULL; }
+ if (ret_fp) { *ret_fp = (intptr_t *)NULL; }
+ }
+
+ return epc;
+}
+
+frame os::fetch_frame_from_context(const void* ucVoid) {
+ intptr_t* sp;
+ intptr_t* fp;
+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+ return frame(sp, epc.pc());
+}
+
+frame os::get_sender_for_C_frame(frame* fr) {
+ if (*fr->sp() == 0) {
+ // fr is the last C frame.
+ return frame();
+ }
+
+ // If its not one of our frames, the return pc is saved at gpr14
+ // stack slot. The call_stub stores the return_pc to the stack slot
+ // of gpr10.
+ if ((Interpreter::code() != NULL && Interpreter::contains(fr->pc())) ||
+ (CodeCache::contains(fr->pc()) && !StubRoutines::contains(fr->pc()))) {
+ return frame(fr->sender_sp(), fr->sender_pc());
+ } else {
+ if (StubRoutines::contains(fr->pc())) {
+ StubCodeDesc* desc = StubCodeDesc::desc_for(fr->pc());
+ if (desc && !strcmp(desc->name(),"call_stub")) {
+ return frame(fr->sender_sp(), fr->callstub_sender_pc());
+ } else {
+ return frame(fr->sender_sp(), fr->sender_pc());
+ }
+ } else {
+ return frame(fr->sender_sp(), fr->native_sender_pc());
+ }
+ }
+}
+
+frame os::current_frame() {
+ intptr_t* csp = (intptr_t*) *((intptr_t*) os::current_stack_pointer());
+ assert (csp != NULL, "sp should not be NULL");
+ // Pass a dummy pc. This way we don't have to load it from the
+ // stack, since we don't know in which slot we can find it.
+ frame topframe(csp, (address)0x8);
+ if (os::is_first_C_frame(&topframe)) {
+ // Stack is not walkable.
+ return frame();
+ } else {
+ frame senderFrame = os::get_sender_for_C_frame(&topframe);
+ assert(senderFrame.pc() != NULL, "Sender pc should not be NULL");
+ // Return sender of sender of current topframe which hopefully
+ // both have pc != NULL.
+ frame tmp = os::get_sender_for_C_frame(&topframe);
+ return os::get_sender_for_C_frame(&tmp);
+ }
+}
+
+// Utility functions
+
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+ siginfo_t* info,
+ void* ucVoid,
+ int abort_if_unrecognized) {
+ ucontext_t* uc = (ucontext_t*) ucVoid;
+
+ Thread* t = Thread::current_or_null_safe();
+
+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+ // (no destructors can be run).
+ os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
+ SignalHandlerMark shm(t);
+
+ // Note: it's not uncommon that JNI code uses signal/sigset to install
+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+ // or have a SIGILL handler when detecting CPU type). When that happens,
+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+ // that do not require siginfo/ucontext first.
+
+ if (sig == SIGPIPE) {
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
+ return true;
+ } else {
+ if (PrintMiscellaneous && (WizardMode || Verbose)) {
+ warning("Ignoring SIGPIPE - see bug 4229104");
+ }
+ return true;
+ }
+ }
+
+ JavaThread* thread = NULL;
+ VMThread* vmthread = NULL;
+ if (os::Linux::signal_handlers_are_installed) {
+ if (t != NULL) {
+ if(t->is_Java_thread()) {
+ thread = (JavaThread*)t;
+ } else if(t->is_VM_thread()) {
+ vmthread = (VMThread *)t;
+ }
+ }
+ }
+
+ // Moved SafeFetch32 handling outside thread!=NULL conditional block to make
+ // it work if no associated JavaThread object exists.
+ if (uc) {
+ address const pc = os::Linux::ucontext_get_pc(uc);
+ if (pc && StubRoutines::is_safefetch_fault(pc)) {
+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
+ return true;
+ }
+ }
+
+ // Decide if this trap can be handled by a stub.
+ address stub = NULL;
+ address pc = NULL; // Pc as retrieved from PSW. Usually points past failing instruction.
+ address trap_pc = NULL; // Pc of the instruction causing the trap.
+
+ //%note os_trap_1
+ if (info != NULL && uc != NULL && thread != NULL) {
+ pc = os::Linux::ucontext_get_pc(uc);
+ if (TraceTraps) {
+ tty->print_cr(" pc at " INTPTR_FORMAT, p2i(pc));
+ }
+ if ((unsigned long)(pc - (address)info->si_addr) <= (unsigned long)Assembler::instr_maxlen() ) {
+ trap_pc = (address)info->si_addr;
+ if (TraceTraps) {
+ tty->print_cr("trap_pc at " INTPTR_FORMAT, p2i(trap_pc));
+ }
+ }
+
+ // Handle ALL stack overflow variations here
+ if (sig == SIGSEGV) {
+ address addr = (address)info->si_addr; // Address causing SIGSEGV, usually mem ref target.
+
+ // Check if fault address is within thread stack.
+ if (thread->on_local_stack(addr)) {
+ // stack overflow
+ if (thread->in_stack_yellow_reserved_zone(addr)) {
+ thread->disable_stack_yellow_reserved_zone();
+ if (thread->thread_state() == _thread_in_Java) {
+ // Throw a stack overflow exception.
+ // Guard pages will be reenabled while unwinding the stack.
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+ } else {
+ // Thread was in the vm or native code. Return and try to finish.
+ return 1;
+ }
+ } else if (thread->in_stack_red_zone(addr)) {
+ // Fatal red zone violation. Disable the guard pages and fall through
+ // to handle_unexpected_exception way down below.
+ thread->disable_stack_red_zone();
+ tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+ // This is a likely cause, but hard to verify. Let's just print
+ // it as a hint.
+ tty->print_raw_cr("Please check if any of your loaded .so files has "
+ "enabled executable stack (see man page execstack(8))");
+ } else {
+ // Accessing stack address below sp may cause SEGV if current
+ // thread has MAP_GROWSDOWN stack. This should only happen when
+ // current thread was created by user code with MAP_GROWSDOWN flag
+ // and then attached to VM. See notes in os_linux.cpp.
+ if (thread->osthread()->expanding_stack() == 0) {
+ thread->osthread()->set_expanding_stack();
+ if (os::Linux::manually_expand_stack(thread, addr)) {
+ thread->osthread()->clear_expanding_stack();
+ return 1;
+ }
+ thread->osthread()->clear_expanding_stack();
+ } else {
+ fatal("recursive segv. expanding stack.");
+ }
+ }
+ }
+ }
+
+ if (thread->thread_state() == _thread_in_Java) {
+ // Java thread running in Java code => find exception handler if any
+ // a fault inside compiled code, the interpreter, or a stub
+
+ // Handle signal from NativeJump::patch_verified_entry().
+ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+ if (TraceTraps) {
+ tty->print_cr("trap: zombie_not_entrant (SIGILL)");
+ }
+ stub = SharedRuntime::get_handle_wrong_method_stub();
+ }
+
+ else if (sig == SIGSEGV &&
+ os::is_poll_address((address)info->si_addr)) {
+ if (TraceTraps) {
+ tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
+ }
+ stub = SharedRuntime::get_poll_stub(pc);
+
+ // Info->si_addr only points to the page base address, so we
+ // must extract the real si_addr from the instruction and the
+ // ucontext.
+ assert(((NativeInstruction*)pc)->is_safepoint_poll(), "must be safepoint poll");
+ const address real_si_addr = ((NativeInstruction*)pc)->get_poll_address(uc);
+ }
+
+ // SIGTRAP-based implicit null check in compiled code.
+ else if ((sig == SIGFPE) &&
+ TrapBasedNullChecks &&
+ (trap_pc != NULL) &&
+ Assembler::is_sigtrap_zero_check(trap_pc)) {
+ if (TraceTraps) {
+ tty->print_cr("trap: NULL_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc));
+ }
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL);
+ }
+
+ else if (sig == SIGSEGV && ImplicitNullChecks &&
+ CodeCache::contains((void*) pc) &&
+ !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) {
+ if (TraceTraps) {
+ tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
+ }
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+ }
+
+ // SIGTRAP-based implicit range check in compiled code.
+ else if (sig == SIGFPE && TrapBasedRangeChecks &&
+ (trap_pc != NULL) &&
+ Assembler::is_sigtrap_range_check(trap_pc)) {
+ if (TraceTraps) {
+ tty->print_cr("trap: RANGE_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc));
+ }
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL);
+ }
+
+ else if (sig == SIGFPE && info->si_code == FPE_INTDIV) {
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+ }
+
+ else if (sig == SIGBUS) {
+ // BugId 4454115: A read from a MappedByteBuffer can fault here if the
+ // underlying file has been truncated. Do not crash the VM in such a case.
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+ nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL;
+ if (nm != NULL && nm->has_unsafe_access()) {
+ // We don't really need a stub here! Just set the pending exeption and
+ // continue at the next instruction after the faulting read. Returning
+ // garbage from this read is ok.
+ thread->set_pending_unsafe_access_error();
+ uc->uc_mcontext.psw.addr = ((unsigned long)pc) + Assembler::instr_len(pc);
+ return true;
+ }
+ }
+ }
+
+ else { // thread->thread_state() != _thread_in_Java
+ if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
+ // SIGILL must be caused by VM_Version::determine_features().
+ //*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
+ // Flushing of icache is not necessary.
+ stub = pc; // Continue with next instruction.
+ } else if (thread->thread_state() == _thread_in_vm &&
+ sig == SIGBUS && thread->doing_unsafe_access()) {
+ // We don't really need a stub here! Just set the pending exeption and
+ // continue at the next instruction after the faulting read. Returning
+ // garbage from this read is ok.
+ thread->set_pending_unsafe_access_error();
+ os::Linux::ucontext_set_pc(uc, pc + Assembler::instr_len(pc));
+ return true;
+ }
+ }
+
+ // Check to see if we caught the safepoint code in the
+ // process of write protecting the memory serialization page.
+ // It write enables the page immediately after protecting it
+ // so we can just return to retry the write.
+ // Info->si_addr need not be the exact address, it is only
+ // guaranteed to be on the same page as the address that caused
+ // the SIGSEGV.
+ if ((sig == SIGSEGV) &&
+ (os::get_memory_serialize_page() ==
+ (address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
+ return true;
+ }
+ }
+
+ if (stub != NULL) {
+ // Save all thread context in case we need to restore it.
+ if (thread != NULL) thread->set_saved_exception_pc(pc);
+ os::Linux::ucontext_set_pc(uc, stub);
+ return true;
+ }
+
+ // signal-chaining
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
+ return true;
+ }
+
+ if (!abort_if_unrecognized) {
+ // caller wants another chance, so give it to him
+ return false;
+ }
+
+ if (pc == NULL && uc != NULL) {
+ pc = os::Linux::ucontext_get_pc(uc);
+ }
+
+ // unmask current signal
+ sigset_t newset;
+ sigemptyset(&newset);
+ sigaddset(&newset, sig);
+ sigprocmask(SIG_UNBLOCK, &newset, NULL);
+
+ VMError::report_and_die(t, sig, pc, info, ucVoid);
+
+ ShouldNotReachHere();
+ return false;
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+ // Nothing to do on z/Architecture.
+}
+
+int os::Linux::get_fpu_control_word(void) {
+ // Nothing to do on z/Architecture.
+ return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+ // Nothing to do on z/Architecture.
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Posix::_compiler_thread_min_stack_allowed = 128 * K;
+size_t os::Posix::_java_thread_min_stack_allowed = 128 * K;
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 128 * K;
+
+// return default stack size for thr_type
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
+ // default stack size (compiler thread needs larger stack)
+ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1024 * K);
+ return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+ // z/Architecture: put 2 guard pages right in the middle of thread stack. This value
+ // should be consistent with the value used by register stack handling code.
+ return 2 * page_size();
+}
+
+// Java thread:
+//
+// Low memory addresses
+// +------------------------+
+// | |\
+// | glibc guard page | - Right in the middle of stack, 2 pages
+// | |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+// | |\
+// | HotSpot Guard Pages | - red and yellow pages
+// | |/
+// +------------------------+ JavaThread::stack_yellow_zone_base()
+// | |\
+// | Normal Stack | -
+// | |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+// Low memory addresses
+// +------------------------+
+// | |\
+// | glibc guard page | - Right in the middle of stack, 2 pages
+// | |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+// | |\
+// | Normal Stack | -
+// | |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P2 is the address returned from pthread_attr_getstackaddr(), P2 - P1
+// is the stack size returned by pthread_attr_getstacksize().
+
+
+static void current_stack_region(address * bottom, size_t * size) {
+ if (os::Linux::is_initial_thread()) {
+ // Initial thread needs special handling because pthread_getattr_np()
+ // may return bogus value.
+ *bottom = os::Linux::initial_thread_stack_bottom();
+ *size = os::Linux::initial_thread_stack_size();
+ } else {
+ pthread_attr_t attr;
+
+ int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+ // JVM needs to know exact stack location, abort if it fails
+ if (rslt != 0) {
+ if (rslt == ENOMEM) {
+ vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+ } else {
+ fatal("pthread_getattr_np failed with errno = %d", rslt);
+ }
+ }
+
+ if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+ fatal("Can not locate current stack attributes!");
+ }
+
+ pthread_attr_destroy(&attr);
+
+ }
+ assert(os::current_stack_pointer() >= *bottom &&
+ os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+ address bottom;
+ size_t size;
+ current_stack_region(&bottom, &size);
+ return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+ // stack size includes normal stack and HotSpot guard pages
+ address bottom;
+ size_t size;
+ current_stack_region(&bottom, &size);
+ return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+void os::print_context(outputStream *st, const void *context) {
+ if (context == NULL) return;
+
+ const ucontext_t* uc = (const ucontext_t*)context;
+
+ st->print_cr("Processor state:");
+ st->print_cr("----------------");
+ st->print_cr(" ip = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.addr);
+ st->print_cr(" proc mask = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.mask);
+ st->print_cr(" fpc reg = 0x%8.8x " , uc->uc_mcontext.fpregs.fpc);
+ st->cr();
+
+ st->print_cr("General Purpose Registers:");
+ st->print_cr("--------------------------");
+ for( int i = 0; i < 16; i+=2 ) {
+ st->print(" r%-2d = " INTPTR_FORMAT " " , i, uc->uc_mcontext.gregs[i]);
+ st->print(" r%-2d = " INTPTR_FORMAT " |", i+1, uc->uc_mcontext.gregs[i+1]);
+ st->print(" r%-2d = %23.1ld " , i, uc->uc_mcontext.gregs[i]);
+ st->print(" r%-2d = %23.1ld " , i+1, uc->uc_mcontext.gregs[i+1]);
+ st->cr();
+ }
+ st->cr();
+
+ st->print_cr("Access Registers:");
+ st->print_cr("-----------------");
+ for( int i = 0; i < 16; i+=2 ) {
+ st->print(" ar%-2d = 0x%8.8x ", i, uc->uc_mcontext.aregs[i]);
+ st->print(" ar%-2d = 0x%8.8x ", i+1, uc->uc_mcontext.aregs[i+1]);
+ st->cr();
+ }
+ st->cr();
+
+ st->print_cr("Float Registers:");
+ st->print_cr("----------------");
+ for (int i = 0; i < 16; i += 2) {
+ st->print(" fr%-2d = " INTPTR_FORMAT " " , i, (int64_t)(uc->uc_mcontext.fpregs.fprs[i].d));
+ st->print(" fr%-2d = " INTPTR_FORMAT " |", i+1, (int64_t)(uc->uc_mcontext.fpregs.fprs[i+1].d));
+ st->print(" fr%-2d = %23.15e " , i, (uc->uc_mcontext.fpregs.fprs[i].d));
+ st->print(" fr%-2d = %23.15e " , i+1, (uc->uc_mcontext.fpregs.fprs[i+1].d));
+ st->cr();
+ }
+ st->cr();
+ st->cr();
+
+ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
+ print_hex_dump(st, (address)sp, (address)(sp + 128), sizeof(intptr_t));
+ st->cr();
+
+ // Note: it may be unsafe to inspect memory near pc. For example, pc may
+ // point to garbage if entry point in an nmethod is corrupted. Leave
+ // this at the end, and hope for the best.
+ address pc = os::Linux::ucontext_get_pc(uc);
+ if (Verbose) { st->print_cr("pc at " PTR_FORMAT, p2i(pc)); }
+ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
+ print_hex_dump(st, pc-64, pc+64, /*intrsize=*/4);
+ st->cr();
+}
+
+void os::print_register_info(outputStream *st, const void *context) {
+ st->print("Not ported\n");
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
+
+int os::extra_bang_size_in_bytes() {
+ // z/Architecture does not require the additional stack bang.
+ return 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+
+ static void setup_fpu() {}
+
+ // Used to register dynamic code cache area with the OS.
+ static bool register_code_area(char *low, char *high) { return true; }
+
+#endif // OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
+#define OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+inline void Prefetch::read(void* loc, intx interval) {
+ // No prefetch instructions on z/Architecture -> implement trivially.
+}
+
+inline void Prefetch::write(void* loc, intx interval) {
+ // No prefetch instructions on z/Architecture -> implement trivially.
+}
+
+#endif // OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/thread.hpp"
+
+// Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Linux/S390x.
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) {
+ Unimplemented();
+ return false;
+}
+
+void JavaThread::cache_global_variables() { }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
+
+ private:
+
+ void pd_initialize() {
+ _anchor.clear();
+ _last_interpreter_fp = NULL;
+ }
+
+ // The `last' frame is the youngest Java frame on the thread's stack.
+ frame pd_last_frame() {
+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+
+ intptr_t* sp = last_Java_sp();
+ address pc = _anchor.last_Java_pc();
+
+ // Last_Java_pc ist not set if we come here from compiled code.
+ if (pc == NULL) {
+ pc = (address) *(sp + 14);
+ }
+
+ return frame(sp, pc);
+ }
+
+ public:
+ void set_base_of_stack_pointer(intptr_t* base_sp) {}
+ intptr_t* base_of_stack_pointer() { return NULL; }
+ void record_base_of_stack_pointer() {}
+
+ // These routines are only used on cpu architectures that
+ // have separate register stacks (Itanium).
+ static bool register_stack_overflow() { return false; }
+ static void enable_register_stack_guard() {}
+ static void disable_register_stack_guard() {}
+
+ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava);
+
+ protected:
+
+ // -Xprof support
+ //
+ // In order to find the last Java fp from an async profile
+ // tick, we store the current interpreter fp in the thread.
+ // This value is only valid while we are in the C++ interpreter
+ // and profiling.
+ intptr_t *_last_interpreter_fp;
+
+ public:
+
+ static ByteSize last_interpreter_fp_offset() {
+ return byte_offset_of(JavaThread, _last_interpreter_fp);
+ }
+
+ intptr_t* last_interpreter_fp() { return _last_interpreter_fp; }
+
+#endif // OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp Thu Oct 13 14:49:34 2016 +0200
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, pid_t) \
+ nonstatic_field(OSThread, _pthread_id, pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Posix Thread IDs */ \
+ /**********************/ \
+ \
+ declare_integer_type(pid_t) \
+ declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP
+