8168503: JEP 297: Unified arm32/arm64 Port
Reviewed-by: kvn, enevill, ihse, dholmes, erikj, coleenp, cjplummer
--- a/hotspot/make/gensrc/GensrcAdlc.gmk Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/make/gensrc/GensrcAdlc.gmk Mon Dec 19 12:39:01 2016 -0500
@@ -114,6 +114,10 @@
ADLCFLAGS += -U_LP64
endif
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm)
+ ADLCFLAGS += -DARM=1
+ endif
+
##############################################################################
# Concatenate all ad source files into a single file, which will be fed to
# adlc. Also include a #line directive at the start of every included file
--- a/hotspot/make/lib/CompileJvm.gmk Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/make/lib/CompileJvm.gmk Mon Dec 19 12:39:01 2016 -0500
@@ -63,8 +63,8 @@
# INCLUDE_SUFFIX_* is only meant for including the proper
# platform files. Don't use it to guard code. Use the value of
# HOTSPOT_TARGET_CPU_DEFINE etc. instead.
-# Remaining TARGET_ARCH_* is needed to distinguish closed and open
-# 64-bit ARM ports (also called AARCH64).
+# Remaining TARGET_ARCH_* is needed to select the cpu specific
+# sources for 64-bit ARM ports (arm versus aarch64).
JVM_CFLAGS_TARGET_DEFINES += \
-DTARGET_ARCH_$(HOTSPOT_TARGET_CPU_ARCH) \
-DINCLUDE_SUFFIX_OS=_$(HOTSPOT_TARGET_OS) \
@@ -139,6 +139,20 @@
################################################################################
# Platform specific setup
+# ARM source selection
+
+ifeq ($(OPENJDK_TARGET_OS)-$(OPENJDK_TARGET_CPU), linux-arm)
+ JVM_EXCLUDE_PATTERNS += arm_64
+
+else ifeq ($(OPENJDK_TARGET_OS)-$(OPENJDK_TARGET_CPU), linux-aarch64)
+ # For 64-bit arm builds, we use the 64 bit hotspot/src/cpu/arm
+ # hotspot sources if HOTSPOT_TARGET_CPU_ARCH is set to arm.
+ # Exclude the aarch64 and 32 bit arm files for this build.
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm)
+ JVM_EXCLUDE_PATTERNS += arm_32 aarch64
+ endif
+endif
+
ifneq ($(filter $(OPENJDK_TARGET_OS), linux macosx windows), )
JVM_PRECOMPILED_HEADER := $(HOTSPOT_TOPDIR)/src/share/vm/precompiled/precompiled.hpp
endif
--- a/hotspot/make/lib/JvmFeatures.gmk Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/make/lib/JvmFeatures.gmk Mon Dec 19 12:39:01 2016 -0500
@@ -154,3 +154,108 @@
compiledIC_aot_x86_64.cpp compilerRuntime.cpp \
aotCodeHeap.cpp aotCompiledMethod.cpp aotLoader.cpp compiledIC_aot.cpp
endif
+################################################################################
+
+ifeq ($(call check-jvm-feature, link-time-opt), true)
+ # NOTE: Disable automatic opimization level and let the explicit cflag control
+ # optimization level instead. This activates O3 on slowdebug builds, just
+ # like the old build, but it's probably not right.
+ JVM_OPTIMIZATION :=
+ JVM_CFLAGS_FEATURES += -O3 -flto
+ JVM_LDFLAGS_FEATURES += -O3 -flto -fwhole-program -fno-strict-aliasing
+endif
+
+ifeq ($(call check-jvm-feature, minimal), true)
+ ifeq ($(call check-jvm-feature, link-time-opt), false)
+ JVM_OPTIMIZATION := SIZE
+ OPT_SPEED_SRC := \
+ allocation.cpp \
+ assembler.cpp \
+ assembler_linux_arm.cpp \
+ barrierSet.cpp \
+ basicLock.cpp \
+ biasedLocking.cpp \
+ bytecode.cpp \
+ bytecodeInterpreter.cpp \
+ bytecodeInterpreter_x86.cpp \
+ c1_Compilation.cpp \
+ c1_Compiler.cpp \
+ c1_GraphBuilder.cpp \
+ c1_LinearScan.cpp \
+ c1_LIR.cpp \
+ ciEnv.cpp \
+ ciObjectFactory.cpp \
+ codeBlob.cpp \
+ constantPool.cpp \
+ constMethod.cpp \
+ classLoader.cpp \
+ classLoaderData.cpp \
+ classFileParser.cpp \
+ classFileStream.cpp \
+ cpCache.cpp \
+ defNewGeneration.cpp \
+ frame_arm.cpp \
+ genCollectedHeap.cpp \
+ generation.cpp \
+ genMarkSweep.cpp \
+ growableArray.cpp \
+ handles.cpp \
+ hashtable.cpp \
+ heap.cpp \
+ icache.cpp \
+ icache_arm.cpp \
+ instanceKlass.cpp \
+ invocationCounter.cpp \
+ iterator.cpp \
+ javaCalls.cpp \
+ javaClasses.cpp \
+ jniFastGetField_arm.cpp \
+ jvm.cpp \
+ jvm_linux.cpp \
+ linkResolver.cpp \
+ klass.cpp \
+ klassVtable.cpp \
+ markSweep.cpp \
+ memRegion.cpp \
+ memoryPool.cpp \
+ method.cpp \
+ methodHandles.cpp \
+ methodHandles_arm.cpp \
+ methodLiveness.cpp \
+ metablock.cpp \
+ metaspace.cpp \
+ mutex.cpp \
+ mutex_linux.cpp \
+ mutexLocker.cpp \
+ nativeLookup.cpp \
+ objArrayKlass.cpp \
+ os_linux.cpp \
+ os_linux_arm.cpp \
+ placeHolders.cpp \
+ quickSort.cpp \
+ resourceArea.cpp \
+ rewriter.cpp \
+ sharedRuntime.cpp \
+ signature.cpp \
+ space.cpp \
+ stackMapTable.cpp \
+ symbolTable.cpp \
+ systemDictionary.cpp \
+ symbol.cpp \
+ synchronizer.cpp \
+ threadLS_bsd_x86.cpp \
+ threadLS_linux_arm.cpp \
+ threadLS_linux_x86.cpp \
+ timer.cpp \
+ typeArrayKlass.cpp \
+ unsafe.cpp \
+ utf8.cpp \
+ vmSymbols.cpp \
+ #
+
+ $(foreach s, $(OPT_SPEED_SRC), \
+ $(eval BUILD_LIBJVM_$s_OPTIMIZATION := HIGHEST_JVM))
+
+ BUILD_LIBJVM_systemDictionary.cpp_CXXFLAGS := -fno-optimize-sibling-calls
+ endif
+endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/abstractInterpreter_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecode.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/constMethod.hpp"
+#include "oops/method.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/synchronizer.hpp"
+#include "utilities/macros.hpp"
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+ int i = 0;
+ switch (type) {
+#ifdef AARCH64
+ case T_BOOLEAN: i = 0; break;
+ case T_CHAR : i = 1; break;
+ case T_BYTE : i = 2; break;
+ case T_SHORT : i = 3; break;
+ case T_INT : // fall through
+ case T_LONG : // fall through
+ case T_VOID : // fall through
+ case T_FLOAT : // fall through
+ case T_DOUBLE : i = 4; break;
+ case T_OBJECT : // fall through
+ case T_ARRAY : i = 5; break;
+#else
+ case T_VOID : i = 0; break;
+ case T_BOOLEAN: i = 1; break;
+ case T_CHAR : i = 2; break;
+ case T_BYTE : i = 3; break;
+ case T_SHORT : i = 4; break;
+ case T_INT : i = 5; break;
+ case T_OBJECT : // fall through
+ case T_ARRAY : i = 6; break;
+ case T_LONG : i = 7; break;
+ case T_FLOAT : i = 8; break;
+ case T_DOUBLE : i = 9; break;
+#endif // AARCH64
+ default : ShouldNotReachHere();
+ }
+ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
+ return i;
+}
+
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+ switch (method_kind(m)) {
+ case Interpreter::java_lang_math_sin : // fall thru
+ case Interpreter::java_lang_math_cos : // fall thru
+ case Interpreter::java_lang_math_tan : // fall thru
+ case Interpreter::java_lang_math_abs : // fall thru
+ case Interpreter::java_lang_math_log : // fall thru
+ case Interpreter::java_lang_math_log10 : // fall thru
+ case Interpreter::java_lang_math_sqrt :
+ return false;
+ default:
+ return true;
+ }
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+ const int stub_code = AARCH64_ONLY(24) NOT_AARCH64(12); // see generate_call_stub
+ // Save space for one monitor to get into the interpreted method in case
+ // the method is synchronized
+ int monitor_size = method->is_synchronized() ?
+ 1*frame::interpreter_frame_monitor_size() : 0;
+
+ // total overhead size: monitor_size + (sender SP, thru expr stack bottom).
+ // be sure to change this if you add/subtract anything to/from the overhead area
+ const int overhead_size = monitor_size +
+ (frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset);
+ const int method_stack = (method->max_locals() + method->max_stack()) *
+ Interpreter::stackElementWords;
+ return overhead_size + method_stack + stub_code;
+}
+
+// asm based interpreter deoptimization helpers
+int AbstractInterpreter::size_activation(int max_stack,
+ int tempcount,
+ int extra_args,
+ int moncount,
+ int callee_param_count,
+ int callee_locals,
+ bool is_top_frame) {
+ // Note: This calculation must exactly parallel the frame setup
+ // in TemplateInterpreterGenerator::generate_fixed_frame.
+ // fixed size of an interpreter frame:
+ int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset;
+
+ // Our locals were accounted for by the caller (or last_frame_adjust on the transistion)
+ // Since the callee parameters already account for the callee's params we only need to account for
+ // the extra locals.
+
+ int size = overhead +
+ ((callee_locals - callee_param_count)*Interpreter::stackElementWords) +
+ (moncount*frame::interpreter_frame_monitor_size()) +
+ tempcount*Interpreter::stackElementWords + extra_args;
+
+#ifdef AARCH64
+ size = round_to(size, StackAlignmentInBytes/BytesPerWord);
+#endif // AARCH64
+
+ return size;
+}
+
+void AbstractInterpreter::layout_activation(Method* method,
+ int tempcount,
+ int popframe_extra_args,
+ int moncount,
+ int caller_actual_parameters,
+ int callee_param_count,
+ int callee_locals,
+ frame* caller,
+ frame* interpreter_frame,
+ bool is_top_frame,
+ bool is_bottom_frame) {
+
+ // Set up the method, locals, and monitors.
+ // The frame interpreter_frame is guaranteed to be the right size,
+ // as determined by a previous call to the size_activation() method.
+ // It is also guaranteed to be walkable even though it is in a skeletal state
+ // NOTE: return size is in words not bytes
+
+ // fixed size of an interpreter frame:
+ int max_locals = method->max_locals() * Interpreter::stackElementWords;
+ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
+
+#ifdef ASSERT
+ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable");
+#endif
+
+ interpreter_frame->interpreter_frame_set_method(method);
+ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+ // and sender_sp is (fp + sender_sp_offset*wordSize)
+
+#ifdef AARCH64
+ intptr_t* locals;
+ if (caller->is_interpreted_frame()) {
+ // attach locals to the expression stack of caller interpreter frame
+ locals = caller->interpreter_frame_tos_address() + caller_actual_parameters*Interpreter::stackElementWords - 1;
+ } else {
+ assert (is_bottom_frame, "should be");
+ locals = interpreter_frame->fp() + frame::sender_sp_offset + method->max_locals() - 1;
+ }
+
+ if (TraceDeoptimization) {
+ tty->print_cr("layout_activation:");
+
+ if (caller->is_entry_frame()) {
+ tty->print("entry ");
+ }
+ if (caller->is_compiled_frame()) {
+ tty->print("compiled ");
+ }
+ if (caller->is_interpreted_frame()) {
+ tty->print("interpreted ");
+ }
+ tty->print_cr("caller: sp=%p, unextended_sp=%p, fp=%p, pc=%p", caller->sp(), caller->unextended_sp(), caller->fp(), caller->pc());
+ tty->print_cr("interpreter_frame: sp=%p, unextended_sp=%p, fp=%p, pc=%p", interpreter_frame->sp(), interpreter_frame->unextended_sp(), interpreter_frame->fp(), interpreter_frame->pc());
+ tty->print_cr("method: max_locals = %d, size_of_parameters = %d", method->max_locals(), method->size_of_parameters());
+ tty->print_cr("caller_actual_parameters = %d", caller_actual_parameters);
+ tty->print_cr("locals = %p", locals);
+ }
+
+#ifdef ASSERT
+ if (caller_actual_parameters != method->size_of_parameters()) {
+ assert(caller->is_interpreted_frame(), "adjusted caller_actual_parameters, but caller is not interpreter frame");
+ Bytecode_invoke inv(caller->interpreter_frame_method(), caller->interpreter_frame_bci());
+
+ if (is_bottom_frame) {
+ assert(caller_actual_parameters == 0, "invalid adjusted caller_actual_parameters value for bottom frame");
+ assert(inv.is_invokedynamic() || inv.is_invokehandle(), "adjusted caller_actual_parameters for bottom frame, but not invokedynamic/invokehandle");
+ } else {
+ assert(caller_actual_parameters == method->size_of_parameters()+1, "invalid adjusted caller_actual_parameters value");
+ assert(!inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name()), "adjusted caller_actual_parameters, but no member arg");
+ }
+ }
+ if (caller->is_interpreted_frame()) {
+ intptr_t* locals_base = (locals - method->max_locals()*Interpreter::stackElementWords + 1);
+ locals_base = (intptr_t*)round_down((intptr_t)locals_base, StackAlignmentInBytes);
+ assert(interpreter_frame->sender_sp() <= locals_base, "interpreter-to-interpreter frame chaining");
+
+ } else if (caller->is_compiled_frame()) {
+ assert(locals + 1 <= caller->unextended_sp(), "compiled-to-interpreter frame chaining");
+
+ } else {
+ assert(caller->is_entry_frame(), "should be");
+ assert(locals + 1 <= caller->fp(), "entry-to-interpreter frame chaining");
+ }
+#endif // ASSERT
+
+#else
+ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+#endif // AARCH64
+
+ interpreter_frame->interpreter_frame_set_locals(locals);
+ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+ BasicObjectLock* monbot = montop - moncount;
+ interpreter_frame->interpreter_frame_set_monitor_end(monbot);
+
+ // Set last_sp
+ intptr_t* stack_top = (intptr_t*) monbot -
+ tempcount*Interpreter::stackElementWords -
+ popframe_extra_args;
+#ifdef AARCH64
+ interpreter_frame->interpreter_frame_set_stack_top(stack_top);
+
+ intptr_t* extended_sp = (intptr_t*) monbot -
+ (method->max_stack() + 1) * Interpreter::stackElementWords - // +1 is reserved slot for exception handler
+ popframe_extra_args;
+ extended_sp = (intptr_t*)round_down((intptr_t)extended_sp, StackAlignmentInBytes);
+ interpreter_frame->interpreter_frame_set_extended_sp(extended_sp);
+#else
+ interpreter_frame->interpreter_frame_set_last_sp(stack_top);
+#endif // AARCH64
+
+ // All frames but the initial (oldest) interpreter frame we fill in have a
+ // value for sender_sp that allows walking the stack but isn't
+ // truly correct. Correct the value here.
+
+#ifdef AARCH64
+ if (caller->is_interpreted_frame()) {
+ intptr_t* sender_sp = (intptr_t*)round_down((intptr_t)caller->interpreter_frame_tos_address(), StackAlignmentInBytes);
+ interpreter_frame->set_interpreter_frame_sender_sp(sender_sp);
+
+ } else {
+ // in case of non-interpreter caller sender_sp of the oldest frame is already
+ // set to valid value
+ }
+#else
+ if (extra_locals != 0 &&
+ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
+ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
+ }
+#endif // AARCH64
+
+ *interpreter_frame->interpreter_frame_cache_addr() =
+ method->constants()->cache();
+ *interpreter_frame->interpreter_frame_mirror_addr() =
+ method->method_holder()->java_mirror();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/arm.ad Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,14428 @@
+//
+// Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// ARM Architecture Description File
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+// int_def <name> ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+// #define <name> (<expression>)
+// // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+// assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+// The default cost (of an ALU instruction).
+ int_def DEFAULT_COST ( 100, 100);
+ int_def HUGE_COST (1000000, 1000000);
+
+// Memory refs are twice as expensive as run-of-the-mill.
+ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2);
+
+// Branches are even more expensive.
+ int_def BRANCH_COST ( 300, DEFAULT_COST * 3);
+ int_def CALL_COST ( 300, DEFAULT_COST * 3);
+%}
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+// Does destination need to be loaded in a register then passed to a
+// branch instruction?
+extern bool maybe_far_call(const CallNode *n);
+extern bool maybe_far_call(const MachCallNode *n);
+static inline bool cache_reachable() {
+ return MacroAssembler::_cache_fully_reachable();
+}
+
+#ifdef AARCH64
+#define ldr_32 ldr_w
+#define str_32 str_w
+#else
+#define ldr_32 ldr
+#define str_32 str
+#define tst_32 tst
+#define teq_32 teq
+#endif
+#if 1
+extern bool PrintOptoAssembly;
+#endif
+
+class c2 {
+public:
+ static OptoRegPair return_value(int ideal_reg);
+};
+
+class CallStubImpl {
+
+ //--------------------------------------------------------------
+ //---< Used for optimization in Compile::Shorten_branches >---
+ //--------------------------------------------------------------
+
+ public:
+ // Size of call trampoline stub.
+ static uint size_call_trampoline() {
+ return 0; // no call trampolines on this platform
+ }
+
+ // number of relocations needed by a call trampoline stub
+ static uint reloc_call_trampoline() {
+ return 0; // no call trampolines on this platform
+ }
+};
+
+class HandlerImpl {
+
+ public:
+
+ static int emit_exception_handler(CodeBuffer &cbuf);
+ static int emit_deopt_handler(CodeBuffer& cbuf);
+
+ static uint size_exception_handler() {
+#ifdef AARCH64
+ // ldr_literal; br; (pad); <literal>
+ return 3 * Assembler::InstructionSize + wordSize;
+#else
+ return ( 3 * 4 );
+#endif
+ }
+
+
+ static uint size_deopt_handler() {
+ return ( 9 * 4 );
+ }
+
+};
+
+%}
+
+source %{
+#define __ _masm.
+
+static FloatRegister reg_to_FloatRegister_object(int register_encoding);
+static Register reg_to_register_object(int register_encoding);
+
+
+// ****************************************************************************
+
+// REQUIRED FUNCTIONALITY
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since ARM does not have absolute addressing, it does.
+bool SafePointNode::needs_polling_address_input() {
+ return true;
+}
+
+// emit an interrupt that is caught by the debugger (for debugging compiler)
+void emit_break(CodeBuffer &cbuf) {
+ MacroAssembler _masm(&cbuf);
+ __ breakpoint();
+}
+
+#ifndef PRODUCT
+void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
+ st->print("TA");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+
+void emit_nop(CodeBuffer &cbuf) {
+ MacroAssembler _masm(&cbuf);
+ __ nop();
+}
+
+
+void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
+ int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
+ int call_site_offset = cbuf.insts()->mark_off();
+ MacroAssembler _masm(&cbuf);
+ __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
+ address target = (address)m->method();
+ assert(n->as_MachCall()->entry_point() == target, "sanity");
+ assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity");
+ assert(cache_reachable() == __ cache_fully_reachable(), "sanity");
+
+ assert(target != NULL, "need real address");
+
+ int ret_addr_offset = -1;
+ if (rspec.type() == relocInfo::runtime_call_type) {
+ __ call(target, rspec);
+ ret_addr_offset = __ offset();
+ } else {
+ // scratches Rtemp
+ ret_addr_offset = __ patchable_call(target, rspec, true);
+ }
+ assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()");
+}
+
+//=============================================================================
+// REQUIRED FUNCTIONALITY for encoding
+void emit_lo(CodeBuffer &cbuf, int val) { }
+void emit_hi(CodeBuffer &cbuf, int val) { }
+
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+#ifdef AARCH64
+ return 0;
+#else
+ int offset = -(size() / 2);
+ // flds, fldd: 8-bit offset multiplied by 4: +/- 1024
+ // ldr, ldrb : 12-bit offset: +/- 4096
+ if (!Assembler::is_simm10(offset)) {
+ offset = Assembler::min_simm10();
+ }
+ return offset;
+#endif
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+ ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ Compile* C = ra_->C;
+ Compile::ConstantTable& constant_table = C->constant_table();
+ MacroAssembler _masm(&cbuf);
+
+ Register r = as_Register(ra_->get_encode(this));
+ CodeSection* consts_section = __ code()->consts();
+ int consts_size = consts_section->align_at_start(consts_section->size());
+ assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);
+
+ // Materialize the constant table base.
+ address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
+ RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
+ __ mov_address(r, baseaddr, rspec);
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
+#ifdef AARCH64
+ return 5 * Assembler::InstructionSize;
+#else
+ return 8;
+#endif
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+ char reg[128];
+ ra_->dump_register(this, reg);
+ st->print("MOV_SLOW &constanttable,%s\t! constant table base", reg);
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ Compile* C = ra_->C;
+
+ for (int i = 0; i < OptoPrologueNops; i++) {
+ st->print_cr("NOP"); st->print("\t");
+ }
+#ifdef AARCH64
+ if (OptoPrologueNops <= 0) {
+ st->print_cr("NOP\t! required for safe patching");
+ st->print("\t");
+ }
+#endif
+
+ size_t framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ int bangsize = C->bang_size_in_bytes();
+ // Remove two words for return addr and rbp,
+ framesize -= 2*wordSize;
+ bangsize -= 2*wordSize;
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+ if (C->need_stack_bang(bangsize)) {
+ st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t");
+ }
+ st->print_cr("PUSH R_FP|R_LR_LR"); st->print("\t");
+ if (framesize != 0) {
+ st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize);
+ }
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ Compile* C = ra_->C;
+ MacroAssembler _masm(&cbuf);
+
+ for (int i = 0; i < OptoPrologueNops; i++) {
+ __ nop();
+ }
+#ifdef AARCH64
+ if (OptoPrologueNops <= 0) {
+ __ nop(); // required for safe patching by patch_verified_entry()
+ }
+#endif
+
+ size_t framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ int bangsize = C->bang_size_in_bytes();
+ // Remove two words for return addr and fp,
+ framesize -= 2*wordSize;
+ bangsize -= 2*wordSize;
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+ if (C->need_stack_bang(bangsize)) {
+ __ arm_stack_overflow_check(bangsize, Rtemp);
+ }
+
+ __ raw_push(FP, LR);
+ if (framesize != 0) {
+ __ sub_slow(SP, SP, framesize);
+ }
+
+ // offset from scratch buffer is not valid
+ if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
+ C->set_frame_complete( __ offset() );
+ }
+
+ if (C->has_mach_constant_base_node()) {
+ // NOTE: We set the table base offset here because users might be
+ // emitted before MachConstantBaseNode.
+ Compile::ConstantTable& constant_table = C->constant_table();
+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+ }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+int MachPrologNode::reloc() const {
+ return 10; // a large enough number
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ Compile* C = ra_->C;
+
+ size_t framesize = C->frame_size_in_bytes();
+ framesize -= 2*wordSize;
+
+ if (framesize != 0) {
+ st->print("ADD R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize);
+ }
+ st->print("POP R_FP|R_LR_LR");
+
+ if (do_polling() && ra_->C->is_method_compilation()) {
+ st->print("\n\t");
+#ifdef AARCH64
+ if (MacroAssembler::page_reachable_from_cache(os::get_polling_page())) {
+ st->print("ADRP Rtemp, #PollAddr\t! Load Polling address\n\t");
+ st->print("LDR ZR,[Rtemp + #PollAddr & 0xfff]\t!Poll for Safepointing");
+ } else {
+ st->print("mov_slow Rtemp, #PollAddr\t! Load Polling address\n\t");
+ st->print("LDR ZR,[Rtemp]\t!Poll for Safepointing");
+ }
+#else
+ st->print("MOV Rtemp, #PollAddr\t! Load Polling address\n\t");
+ st->print("LDR Rtemp,[Rtemp]\t!Poll for Safepointing");
+#endif
+ }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+ Compile* C = ra_->C;
+
+ size_t framesize = C->frame_size_in_bytes();
+ framesize -= 2*wordSize;
+ if (framesize != 0) {
+ __ add_slow(SP, SP, framesize);
+ }
+ __ raw_pop(FP, LR);
+
+ // If this does safepoint polling, then do it here
+ if (do_polling() && ra_->C->is_method_compilation()) {
+#ifdef AARCH64
+ if (false && MacroAssembler::page_reachable_from_cache(os::get_polling_page())) {
+/* FIXME: TODO
+ __ relocate(relocInfo::xxx);
+ __ adrp(Rtemp, (intptr_t)os::get_polling_page());
+ __ relocate(relocInfo::poll_return_type);
+ int offset = os::get_polling_page() & 0xfff;
+ __ ldr(ZR, Address(Rtemp + offset));
+*/
+ } else {
+ __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference);
+ __ relocate(relocInfo::poll_return_type);
+ __ ldr(ZR, Address(Rtemp));
+ }
+#else
+ // mov_slow here is usually one or two instruction
+ __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference);
+ __ relocate(relocInfo::poll_return_type);
+ __ ldr(Rtemp, Address(Rtemp));
+#endif
+ }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+#ifdef AARCH64
+ // allow for added alignment nop from mov_address bind_literal
+ return MachNode::size(ra_) + 1 * Assembler::InstructionSize;
+#else
+ return MachNode::size(ra_);
+#endif
+}
+
+int MachEpilogNode::reloc() const {
+ return 16; // a large enough number
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+ return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const {
+ assert( do_polling(), "no return for this epilog node");
+ // return MacroAssembler::size_of_sethi(os::get_polling_page());
+ Unimplemented();
+ return 0;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+static enum RC rc_class( OptoReg::Name reg ) {
+ if (!OptoReg::is_valid(reg)) return rc_bad;
+ if (OptoReg::is_stack(reg)) return rc_stack;
+ VMReg r = OptoReg::as_VMReg(reg);
+ if (r->is_Register()) return rc_int;
+ assert(r->is_FloatRegister(), "must be");
+ return rc_float;
+}
+
+static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) {
+#ifdef AARCH64
+ return is_memoryHD(offset);
+#else
+ int rlo = Matcher::_regEncode[src_first];
+ int rhi = Matcher::_regEncode[src_second];
+ if (!((rlo&1)==0 && (rlo+1 == rhi))) {
+ tty->print_cr("CAUGHT BAD LDRD/STRD");
+ }
+ return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
+#endif
+}
+
+uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
+ PhaseRegAlloc *ra_,
+ bool do_size,
+ outputStream* st ) const {
+ // Get registers to move
+ OptoReg::Name src_second = ra_->get_reg_second(in(1));
+ OptoReg::Name src_first = ra_->get_reg_first(in(1));
+ OptoReg::Name dst_second = ra_->get_reg_second(this );
+ OptoReg::Name dst_first = ra_->get_reg_first(this );
+
+ enum RC src_second_rc = rc_class(src_second);
+ enum RC src_first_rc = rc_class(src_first);
+ enum RC dst_second_rc = rc_class(dst_second);
+ enum RC dst_first_rc = rc_class(dst_first);
+
+ assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
+
+ // Generate spill code!
+ int size = 0;
+
+ if (src_first == dst_first && src_second == dst_second)
+ return size; // Self copy, no move
+
+#ifdef TODO
+ if (bottom_type()->isa_vect() != NULL) {
+ }
+#endif
+
+ // Shared code does not expect instruction set capability based bailouts here.
+ // Handle offset unreachable bailout with minimal change in shared code.
+ // Bailout only for real instruction emit.
+ // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
+
+ MacroAssembler _masm(cbuf);
+
+ // --------------------------------------
+ // Check for mem-mem move. Load into unused float registers and fall into
+ // the float-store case.
+ if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
+ int offset = ra_->reg2offset(src_first);
+ if (cbuf && !is_memoryfp(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ if (src_second_rc != rc_bad) {
+ assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+ src_first = OptoReg::Name(R_mem_copy_lo_num);
+ src_second = OptoReg::Name(R_mem_copy_hi_num);
+ src_first_rc = rc_float;
+ src_second_rc = rc_float;
+ if (cbuf) {
+ __ ldr_double(Rmemcopy, Address(SP, offset));
+ } else if (!do_size) {
+ st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+ }
+ } else {
+ src_first = OptoReg::Name(R_mem_copy_lo_num);
+ src_first_rc = rc_float;
+ if (cbuf) {
+ __ ldr_float(Rmemcopy, Address(SP, offset));
+ } else if (!do_size) {
+ st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+ }
+ }
+ size += 4;
+ }
+ }
+
+ if (src_second_rc == rc_stack && dst_second_rc == rc_stack) {
+ Unimplemented();
+ }
+
+ // --------------------------------------
+ // Check for integer reg-reg copy
+ if (src_first_rc == rc_int && dst_first_rc == rc_int) {
+ // Else normal reg-reg copy
+ assert( src_second != dst_first, "smashed second before evacuating it" );
+ if (cbuf) {
+ __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ st->print("MOV R_%s, R_%s\t# spill",
+ Matcher::regName[dst_first],
+ Matcher::regName[src_first]);
+#endif
+ }
+#ifdef AARCH64
+ if (src_first+1 == src_second && dst_first+1 == dst_second) {
+ return size + 4;
+ }
+#endif
+ size += 4;
+ }
+
+ // Check for integer store
+ if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
+ int offset = ra_->reg2offset(dst_first);
+ if (cbuf && !is_memoryI(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) {
+ assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+ if (cbuf) {
+ __ str_64(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(STR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ str_32(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(STR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
+#endif
+ }
+ }
+ }
+ size += 4;
+ }
+
+ // Check for integer load
+ if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
+ int offset = ra_->reg2offset(src_first);
+ if (cbuf && !is_memoryI(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) {
+ assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+ if (cbuf) {
+ __ ldr_64(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(LDR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ ldr_32(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(LDR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
+#endif
+ }
+ }
+ }
+ size += 4;
+ }
+
+ // Check for float reg-reg copy
+ if (src_first_rc == rc_float && dst_first_rc == rc_float) {
+ if (src_second_rc != rc_bad) {
+ assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+ if (cbuf) {
+ __ mov_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ st->print(MOV_DOUBLE " R_%s, R_%s\t# spill",
+ Matcher::regName[dst_first],
+ Matcher::regName[src_first]);
+#endif
+ }
+ return 4;
+ }
+ if (cbuf) {
+ __ mov_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ st->print(MOV_FLOAT " R_%s, R_%s\t# spill",
+ Matcher::regName[dst_first],
+ Matcher::regName[src_first]);
+#endif
+ }
+ size = 4;
+ }
+
+ // Check for float store
+ if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
+ int offset = ra_->reg2offset(dst_first);
+ if (cbuf && !is_memoryfp(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ // Further check for aligned-adjacent pair, so we can use a double store
+ if (src_second_rc != rc_bad) {
+ assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
+ if (cbuf) {
+ __ str_double(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(STR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ str_float(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(STR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+#endif
+ }
+ }
+ }
+ size += 4;
+ }
+
+ // Check for float load
+ if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
+ int offset = ra_->reg2offset(src_first);
+ if (cbuf && !is_memoryfp(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ // Further check for aligned-adjacent pair, so we can use a double store
+ if (src_second_rc != rc_bad) {
+ assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
+ if (cbuf) {
+ __ ldr_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ ldr_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
+#endif
+ }
+ }
+ }
+ size += 4;
+ }
+
+ // check for int reg -> float reg move
+ if (src_first_rc == rc_int && dst_first_rc == rc_float) {
+ // Further check for aligned-adjacent pair, so we can use a single instruction
+ if (src_second_rc != rc_bad) {
+ assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+ assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
+ assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
+ if (cbuf) {
+#ifdef AARCH64
+ __ fmov_dx(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
+#else
+ __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
+#endif
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+#ifdef AARCH64
+ st->print("FMOV_DX R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#else
+ st->print("FMDRR R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second));
+#endif
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ fmsr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(FMSR " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#endif
+ }
+ size += 4;
+ }
+ }
+
+ // check for float reg -> int reg move
+ if (src_first_rc == rc_float && dst_first_rc == rc_int) {
+ // Further check for aligned-adjacent pair, so we can use a single instruction
+ if (src_second_rc != rc_bad) {
+ assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
+ assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+ assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
+ if (cbuf) {
+#ifdef AARCH64
+ __ fmov_xd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#else
+ __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#endif
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+#ifdef AARCH64
+ st->print("FMOV_XD R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#else
+ st->print("FMRRD R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first));
+#endif
+#endif
+ }
+ return size + 4;
+ } else {
+ if (cbuf) {
+ __ fmrs(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print(FMRS " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#endif
+ }
+ size += 4;
+ }
+ }
+
+ // --------------------------------------------------------------------
+ // Check for hi bits still needing moving. Only happens for misaligned
+ // arguments to native calls.
+ if (src_second == dst_second)
+ return size; // Self copy; no move
+ assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
+
+#ifndef AARCH64
+ // Check for integer reg-reg copy. Hi bits are stuck up in the top
+ // 32-bits of a 64-bit register, but are needed in low bits of another
+ // register (else it's a hi-bits-to-hi-bits copy which should have
+ // happened already as part of a 64-bit move)
+ if (src_second_rc == rc_int && dst_second_rc == rc_int) {
+ if (cbuf) {
+ __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second]));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print("MOV R_%s, R_%s\t# spill high",
+ Matcher::regName[dst_second],
+ Matcher::regName[src_second]);
+#endif
+ }
+ return size+4;
+ }
+
+ // Check for high word integer store
+ if (src_second_rc == rc_int && dst_second_rc == rc_stack) {
+ int offset = ra_->reg2offset(dst_second);
+
+ if (cbuf && !is_memoryP(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ if (cbuf) {
+ __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print("STR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset);
+#endif
+ }
+ }
+ return size + 4;
+ }
+
+ // Check for high word integer load
+ if (dst_second_rc == rc_int && src_second_rc == rc_stack) {
+ int offset = ra_->reg2offset(src_second);
+ if (cbuf && !is_memoryP(offset)) {
+ ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+ return 0;
+ } else {
+ if (cbuf) {
+ __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(SP, offset));
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ st->print("LDR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset);
+#endif
+ }
+ }
+ return size + 4;
+ }
+#endif
+
+ Unimplemented();
+ return 0; // Mute compiler
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ implementation( NULL, ra_, false, st );
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ implementation( &cbuf, ra_, false, NULL );
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+ return implementation( NULL, ra_, true, NULL );
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
+ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+ MacroAssembler _masm(&cbuf);
+ for(int i = 0; i < _count; i += 1) {
+ __ nop();
+ }
+}
+
+uint MachNopNode::size(PhaseRegAlloc *ra_) const {
+ return 4 * _count;
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ int reg = ra_->get_reg_first(this);
+ st->print("ADD %s,R_SP+#%d",Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ int reg = ra_->get_encode(this);
+ Register dst = reg_to_register_object(reg);
+
+ if (is_aimm(offset)) {
+ __ add(dst, SP, offset);
+ } else {
+ __ mov_slow(dst, offset);
+#ifdef AARCH64
+ __ add(dst, SP, dst, ex_lsl);
+#else
+ __ add(dst, SP, dst);
+#endif
+ }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
+ assert(ra_ == ra_->C->regalloc(), "sanity");
+ return ra_->C->scratch_emit_size(this);
+}
+
+//=============================================================================
+#ifndef PRODUCT
+#ifdef AARCH64
+#define R_RTEMP "R_R16"
+#else
+#define R_RTEMP "R_R12"
+#endif
+void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ st->print_cr("\nUEP:");
+ if (UseCompressedClassPointers) {
+ st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
+ st->print_cr("\tdecode_klass " R_RTEMP);
+ } else {
+ st->print_cr("\tLDR " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
+ }
+ st->print_cr("\tCMP " R_RTEMP ",R_R8" );
+ st->print ("\tB.NE SharedRuntime::handle_ic_miss_stub");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ MacroAssembler _masm(&cbuf);
+ Register iCache = reg_to_register_object(Matcher::inline_cache_reg_encode());
+ assert(iCache == Ricklass, "should be");
+ Register receiver = R0;
+
+ __ load_klass(Rtemp, receiver);
+ __ cmp(Rtemp, iCache);
+#ifdef AARCH64
+ Label match;
+ __ b(match, eq);
+ __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
+ __ bind(match);
+#else
+ __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
+#endif
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+
+//=============================================================================
+
+// Emit exception handler code.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+ MacroAssembler _masm(&cbuf);
+
+ address base = __ start_a_stub(size_exception_handler());
+ if (base == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return 0; // CodeBuffer::expand failed
+ }
+
+ int offset = __ offset();
+
+ // OK to trash LR, because exception blob will kill it
+ __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, LR_tmp);
+
+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+
+ __ end_a_stub();
+
+ return offset;
+}
+
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+ // Can't use any of the current frame's registers as we may have deopted
+ // at a poll and everything can be live.
+ MacroAssembler _masm(&cbuf);
+
+ address base = __ start_a_stub(size_deopt_handler());
+ if (base == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return 0; // CodeBuffer::expand failed
+ }
+
+ int offset = __ offset();
+ address deopt_pc = __ pc();
+
+#ifdef AARCH64
+ // See LR saved by caller in sharedRuntime_arm.cpp
+ // see also hse1 ws
+ // see also LIR_Assembler::emit_deopt_handler
+
+ __ raw_push(LR, LR); // preserve LR in both slots
+ __ mov_relative_address(LR, deopt_pc);
+ __ str(LR, Address(SP, 1 * wordSize)); // save deopt PC
+ // OK to kill LR, because deopt blob will restore it from SP[0]
+ __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, LR_tmp);
+#else
+ __ sub(SP, SP, wordSize); // make room for saved PC
+ __ push(LR); // save LR that may be live when we get here
+ __ mov_relative_address(LR, deopt_pc);
+ __ str(LR, Address(SP, wordSize)); // save deopt PC
+ __ pop(LR); // restore LR
+ __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
+#endif
+
+ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+
+ __ end_a_stub();
+ return offset;
+}
+
+const bool Matcher::match_rule_supported(int opcode) {
+ if (!has_match_rule(opcode))
+ return false;
+
+ switch (opcode) {
+ case Op_PopCountI:
+ case Op_PopCountL:
+ if (!UsePopCountInstruction)
+ return false;
+ break;
+ case Op_LShiftCntV:
+ case Op_RShiftCntV:
+ case Op_AddVB:
+ case Op_AddVS:
+ case Op_AddVI:
+ case Op_AddVL:
+ case Op_SubVB:
+ case Op_SubVS:
+ case Op_SubVI:
+ case Op_SubVL:
+ case Op_MulVS:
+ case Op_MulVI:
+ case Op_LShiftVB:
+ case Op_LShiftVS:
+ case Op_LShiftVI:
+ case Op_LShiftVL:
+ case Op_RShiftVB:
+ case Op_RShiftVS:
+ case Op_RShiftVI:
+ case Op_RShiftVL:
+ case Op_URShiftVB:
+ case Op_URShiftVS:
+ case Op_URShiftVI:
+ case Op_URShiftVL:
+ case Op_AndV:
+ case Op_OrV:
+ case Op_XorV:
+ return VM_Version::has_simd();
+ case Op_LoadVector:
+ case Op_StoreVector:
+ case Op_AddVF:
+ case Op_SubVF:
+ case Op_MulVF:
+#ifdef AARCH64
+ return VM_Version::has_simd();
+#else
+ return VM_Version::has_vfp() || VM_Version::has_simd();
+#endif
+ case Op_AddVD:
+ case Op_SubVD:
+ case Op_MulVD:
+ case Op_DivVF:
+ case Op_DivVD:
+#ifdef AARCH64
+ return VM_Version::has_simd();
+#else
+ return VM_Version::has_vfp();
+#endif
+ }
+
+ return true; // Per default match rules are supported.
+}
+
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+ // TODO
+ // identify extra cases that we might want to provide match rules for
+ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+ bool ret_value = match_rule_supported(opcode);
+ // Add rules here.
+
+ return ret_value; // Per default match rules are supported.
+}
+
+const bool Matcher::has_predicated_vectors(void) {
+ return false;
+}
+
+const int Matcher::float_pressure(int default_pressure_threshold) {
+ return default_pressure_threshold;
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+ return regnum - 32; // The FP registers are in the second chunk
+}
+
+// Vector width in bytes
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+ return MaxVectorSize;
+}
+
+// Vector ideal reg corresponding to specified size in bytes
+const int Matcher::vector_ideal_reg(int size) {
+ assert(MaxVectorSize >= size, "");
+ switch(size) {
+ case 8: return Op_VecD;
+ case 16: return Op_VecX;
+ }
+ ShouldNotReachHere();
+ return 0;
+}
+
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+ return vector_ideal_reg(size);
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+ assert(is_java_primitive(bt), "only primitive type vectors");
+ return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+ assert(is_java_primitive(bt), "only primitive type vectors");
+ return 8/type2aelembytes(bt);
+}
+
+// ARM doesn't support misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+ return false;
+}
+
+// ARM doesn't support AES intrinsics
+const bool Matcher::pass_original_key_for_aes() {
+ return false;
+}
+
+const bool Matcher::convL2FSupported(void) {
+#ifdef AARCH64
+ return true;
+#else
+ return false;
+#endif
+}
+
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+// this method should return false for offset 0.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+ // The passed offset is relative to address of the branch.
+ // On ARM a branch displacement is calculated relative to address
+ // of the branch + 8.
+ //
+ // offset -= 8;
+ // return (Assembler::is_simm24(offset));
+ return false;
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+#ifdef AARCH64
+ return (value == 0);
+#else
+ return false;
+#endif
+}
+
+// No scaling for the parameter the ClearArray node.
+const bool Matcher::init_array_count_is_in_bytes = true;
+
+#ifdef AARCH64
+const int Matcher::long_cmove_cost() { return 1; }
+#else
+// Needs 2 CMOV's for longs.
+const int Matcher::long_cmove_cost() { return 2; }
+#endif
+
+#ifdef AARCH64
+const int Matcher::float_cmove_cost() { return 1; }
+#else
+// CMOVF/CMOVD are expensive on ARM.
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+#endif
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+// FIXME: does this handle vector shifts as well?
+#ifdef AARCH64
+const bool Matcher::need_masked_shift_count = false;
+#else
+const bool Matcher::need_masked_shift_count = true;
+#endif
+
+const bool Matcher::convi2l_type_required = true;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
+bool Matcher::narrow_oop_use_complex_address() {
+ NOT_LP64(ShouldNotCallThis());
+ assert(UseCompressedOops, "only for compressed oops code");
+ return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+ NOT_LP64(ShouldNotCallThis());
+ assert(UseCompressedClassPointers, "only for compressed klass code");
+ return false;
+}
+
+bool Matcher::const_oop_prefer_decode() {
+ NOT_LP64(ShouldNotCallThis());
+ return true;
+}
+
+bool Matcher::const_klass_prefer_decode() {
+ NOT_LP64(ShouldNotCallThis());
+ return true;
+}
+
+// Is it better to copy float constants, or load them directly from memory?
+// Intel can load a float constant from a direct address, requiring no
+// extra registers. Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed. Else we split the double into 2 integer pieces and move it
+// piece-by-piece. Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+#ifdef AARCH64
+// On stack replacement support:
+// We don't need Load[DL]_unaligned support, because interpreter stack
+// has correct alignment
+const bool Matcher::misaligned_doubles_ok = true;
+#else
+const bool Matcher::misaligned_doubles_ok = false;
+#endif
+
+// No-op on ARM.
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+}
+
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Are floats converted to double when stored to stack during deoptimization?
+// ARM does not handle callee-save floats.
+bool Matcher::float_in_double() {
+ return false;
+}
+
+// Do ints take an entire long register or just half?
+// Note that we if-def off of _LP64.
+// The relevant question is how the int is callee-saved. In _LP64
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits, in not-_LP64 only the low 32 bits is written.
+#ifdef _LP64
+const bool Matcher::int_in_long = true;
+#else
+const bool Matcher::int_in_long = false;
+#endif
+
+// Return whether or not this register is ever used as an argument. This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg( int reg ) {
+#ifdef AARCH64
+ if (reg >= R_R0_num && reg < R_R8_num) return true;
+ if (reg >= R_V0_num && reg <= R_V7b_num && ((reg & 3) < 2)) return true;
+#else
+ if (reg == R_R0_num ||
+ reg == R_R1_num ||
+ reg == R_R2_num ||
+ reg == R_R3_num) return true;
+
+ if (reg >= R_S0_num &&
+ reg <= R_S13_num) return true;
+#endif
+ return false;
+}
+
+bool Matcher::is_spillable_arg( int reg ) {
+ return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
+ return false;
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+ return FP_REGP_mask();
+}
+
+bool maybe_far_call(const CallNode *n) {
+ return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point());
+}
+
+bool maybe_far_call(const MachCallNode *n) {
+ return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point());
+}
+
+%}
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams. Encoding classes are parameterized macros used by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction. Operands specify their base encoding interface with the
+// interface keyword. There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried. CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried. MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried. COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+//
+// Instructions specify two basic values for encoding. Again, a function
+// is available to check if the constant displacement is an oop. They use the
+// ins_encode keyword to specify their encoding classes (which must be
+// a sequence of enc_class names, and their parameters, specified in
+// the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode. Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+ enc_class call_epilog %{
+ // nothing
+ %}
+
+ enc_class Java_To_Runtime (method meth) %{
+ // CALL directly to the runtime
+ emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+ %}
+
+ enc_class Java_Static_Call (method meth) %{
+ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
+ // who we intended to call.
+
+ if ( !_method) {
+ emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+ } else {
+ int method_index = resolved_method_index(cbuf);
+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+ : static_call_Relocation::spec(method_index);
+ emit_call_reloc(cbuf, as_MachCall(), $meth, rspec);
+
+ // Emit stubs for static call.
+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+ if (stub == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ }
+ %}
+
+ enc_class save_last_PC %{
+ // preserve mark
+ address mark = cbuf.insts()->mark();
+ debug_only(int off0 = cbuf.insts_size());
+ MacroAssembler _masm(&cbuf);
+ int ret_addr_offset = as_MachCall()->ret_addr_offset();
+ __ adr(LR, mark + ret_addr_offset);
+ __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
+ debug_only(int off1 = cbuf.insts_size());
+ assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction");
+ // restore mark
+ cbuf.insts()->set_mark(mark);
+ %}
+
+ enc_class preserve_SP %{
+ // preserve mark
+ address mark = cbuf.insts()->mark();
+ debug_only(int off0 = cbuf.insts_size());
+ MacroAssembler _masm(&cbuf);
+ // FP is preserved across all calls, even compiled calls.
+ // Use it to preserve SP in places where the callee might change the SP.
+ __ mov(Rmh_SP_save, SP);
+ debug_only(int off1 = cbuf.insts_size());
+ assert(off1 - off0 == 4, "correct size prediction");
+ // restore mark
+ cbuf.insts()->set_mark(mark);
+ %}
+
+ enc_class restore_SP %{
+ MacroAssembler _masm(&cbuf);
+ __ mov(SP, Rmh_SP_save);
+ %}
+
+ enc_class Java_Dynamic_Call (method meth) %{
+ MacroAssembler _masm(&cbuf);
+ Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
+ assert(R8_ic_reg == Ricklass, "should be");
+ __ set_inst_mark();
+#ifdef AARCH64
+// TODO: see C1 LIR_Assembler::ic_call()
+ InlinedAddress oop_literal((address)Universe::non_oop_word());
+ int offset = __ offset();
+ int fixed_size = mov_oop_size * 4;
+ if (VM_Version::prefer_moves_over_load_literal()) {
+ uintptr_t val = (uintptr_t)Universe::non_oop_word();
+ __ movz(R8_ic_reg, (val >> 0) & 0xffff, 0);
+ __ movk(R8_ic_reg, (val >> 16) & 0xffff, 16);
+ __ movk(R8_ic_reg, (val >> 32) & 0xffff, 32);
+ __ movk(R8_ic_reg, (val >> 48) & 0xffff, 48);
+ } else {
+ __ ldr_literal(R8_ic_reg, oop_literal);
+ }
+ assert(__ offset() - offset == fixed_size, "bad mov_oop size");
+#else
+ __ movw(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff);
+ __ movt(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16);
+#endif
+ address virtual_call_oop_addr = __ inst_mark();
+ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
+ // who we intended to call.
+ int method_index = resolved_method_index(cbuf);
+ __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
+ emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none);
+#ifdef AARCH64
+ if (!VM_Version::prefer_moves_over_load_literal()) {
+ Label skip_literal;
+ __ b(skip_literal);
+ int off2 = __ offset();
+ __ bind_literal(oop_literal);
+ if (__ offset() - off2 == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ __ nop();
+ }
+ __ bind(skip_literal);
+ }
+#endif
+ %}
+
+ enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
+ // FIXME: load from constant table?
+ // Load a constant replicated "count" times with width "width"
+ int count = $cnt$$constant;
+ int width = $wth$$constant;
+ assert(count*width == 4, "sanity");
+ int val = $src$$constant;
+ if (width < 4) {
+ int bit_width = width * 8;
+ val &= (((int)1) << bit_width) - 1; // mask off sign bits
+ for (int i = 0; i < count - 1; i++) {
+ val |= (val << bit_width);
+ }
+ }
+ MacroAssembler _masm(&cbuf);
+
+ if (val == -1) {
+ __ mvn($tmp$$Register, 0);
+ } else if (val == 0) {
+ __ mov($tmp$$Register, 0);
+ } else {
+ __ movw($tmp$$Register, val & 0xffff);
+ __ movt($tmp$$Register, (unsigned int)val >> 16);
+ }
+ __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+ %}
+
+ enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{
+ // Replicate float con 2 times and pack into vector (8 bytes) in regD.
+ float fval = $src$$constant;
+ int val = *((int*)&fval);
+ MacroAssembler _masm(&cbuf);
+
+ if (val == -1) {
+ __ mvn($tmp$$Register, 0);
+ } else if (val == 0) {
+ __ mov($tmp$$Register, 0);
+ } else {
+ __ movw($tmp$$Register, val & 0xffff);
+ __ movt($tmp$$Register, (unsigned int)val >> 16);
+ }
+ __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+ %}
+
+ enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
+ Label Ldone, Lloop;
+ MacroAssembler _masm(&cbuf);
+
+ Register str1_reg = $str1$$Register;
+ Register str2_reg = $str2$$Register;
+ Register cnt1_reg = $cnt1$$Register; // int
+ Register cnt2_reg = $cnt2$$Register; // int
+ Register tmp1_reg = $tmp1$$Register;
+ Register tmp2_reg = $tmp2$$Register;
+ Register result_reg = $result$$Register;
+
+ assert_different_registers(str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp1_reg, tmp2_reg);
+
+ // Compute the minimum of the string lengths(str1_reg) and the
+ // difference of the string lengths (stack)
+
+ // See if the lengths are different, and calculate min in str1_reg.
+ // Stash diff in tmp2 in case we need it for a tie-breaker.
+ __ subs_32(tmp2_reg, cnt1_reg, cnt2_reg);
+#ifdef AARCH64
+ Label Lskip;
+ __ _lsl_w(cnt1_reg, cnt1_reg, exact_log2(sizeof(jchar))); // scale the limit
+ __ b(Lskip, mi);
+ __ _lsl_w(cnt1_reg, cnt2_reg, exact_log2(sizeof(jchar))); // scale the limit
+ __ bind(Lskip);
+#else
+ __ mov(cnt1_reg, AsmOperand(cnt1_reg, lsl, exact_log2(sizeof(jchar)))); // scale the limit
+ __ mov(cnt1_reg, AsmOperand(cnt2_reg, lsl, exact_log2(sizeof(jchar))), pl); // scale the limit
+#endif
+
+ // reallocate cnt1_reg, cnt2_reg, result_reg
+ // Note: limit_reg holds the string length pre-scaled by 2
+ Register limit_reg = cnt1_reg;
+ Register chr2_reg = cnt2_reg;
+ Register chr1_reg = tmp1_reg;
+ // str{12} are the base pointers
+
+ // Is the minimum length zero?
+ __ cmp_32(limit_reg, 0);
+ if (result_reg != tmp2_reg) {
+ __ mov(result_reg, tmp2_reg, eq);
+ }
+ __ b(Ldone, eq);
+
+ // Load first characters
+ __ ldrh(chr1_reg, Address(str1_reg, 0));
+ __ ldrh(chr2_reg, Address(str2_reg, 0));
+
+ // Compare first characters
+ __ subs(chr1_reg, chr1_reg, chr2_reg);
+ if (result_reg != chr1_reg) {
+ __ mov(result_reg, chr1_reg, ne);
+ }
+ __ b(Ldone, ne);
+
+ {
+ // Check after comparing first character to see if strings are equivalent
+ // Check if the strings start at same location
+ __ cmp(str1_reg, str2_reg);
+ // Check if the length difference is zero
+ __ cond_cmp(tmp2_reg, 0, eq);
+ __ mov(result_reg, 0, eq); // result is zero
+ __ b(Ldone, eq);
+ // Strings might not be equal
+ }
+
+ __ subs(chr1_reg, limit_reg, 1 * sizeof(jchar));
+ if (result_reg != tmp2_reg) {
+ __ mov(result_reg, tmp2_reg, eq);
+ }
+ __ b(Ldone, eq);
+
+ // Shift str1_reg and str2_reg to the end of the arrays, negate limit
+ __ add(str1_reg, str1_reg, limit_reg);
+ __ add(str2_reg, str2_reg, limit_reg);
+ __ neg(limit_reg, chr1_reg); // limit = -(limit-2)
+
+ // Compare the rest of the characters
+ __ bind(Lloop);
+ __ ldrh(chr1_reg, Address(str1_reg, limit_reg));
+ __ ldrh(chr2_reg, Address(str2_reg, limit_reg));
+ __ subs(chr1_reg, chr1_reg, chr2_reg);
+ if (result_reg != chr1_reg) {
+ __ mov(result_reg, chr1_reg, ne);
+ }
+ __ b(Ldone, ne);
+
+ __ adds(limit_reg, limit_reg, sizeof(jchar));
+ __ b(Lloop, ne);
+
+ // If strings are equal up to min length, return the length difference.
+ if (result_reg != tmp2_reg) {
+ __ mov(result_reg, tmp2_reg);
+ }
+
+ // Otherwise, return the difference between the first mismatched chars.
+ __ bind(Ldone);
+ %}
+
+ enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
+ Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone, Lequal;
+ MacroAssembler _masm(&cbuf);
+
+ Register str1_reg = $str1$$Register;
+ Register str2_reg = $str2$$Register;
+ Register cnt_reg = $cnt$$Register; // int
+ Register tmp1_reg = $tmp1$$Register;
+ Register tmp2_reg = $tmp2$$Register;
+ Register result_reg = $result$$Register;
+
+ assert_different_registers(str1_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, result_reg);
+
+ __ cmp(str1_reg, str2_reg); //same char[] ?
+ __ b(Lequal, eq);
+
+ __ cbz_32(cnt_reg, Lequal); // count == 0
+
+ //rename registers
+ Register limit_reg = cnt_reg;
+ Register chr1_reg = tmp1_reg;
+ Register chr2_reg = tmp2_reg;
+
+ __ logical_shift_left(limit_reg, limit_reg, exact_log2(sizeof(jchar)));
+
+ //check for alignment and position the pointers to the ends
+ __ orr(chr1_reg, str1_reg, str2_reg);
+ __ tst(chr1_reg, 0x3);
+
+ // notZero means at least one not 4-byte aligned.
+ // We could optimize the case when both arrays are not aligned
+ // but it is not frequent case and it requires additional checks.
+ __ b(Lchar, ne);
+
+ // Compare char[] arrays aligned to 4 bytes.
+ __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
+ chr1_reg, chr2_reg, Ldone);
+
+ __ b(Lequal); // equal
+
+ // char by char compare
+ __ bind(Lchar);
+ __ mov(result_reg, 0);
+ __ add(str1_reg, limit_reg, str1_reg);
+ __ add(str2_reg, limit_reg, str2_reg);
+ __ neg(limit_reg, limit_reg); //negate count
+
+ // Lchar_loop
+ __ bind(Lchar_loop);
+ __ ldrh(chr1_reg, Address(str1_reg, limit_reg));
+ __ ldrh(chr2_reg, Address(str2_reg, limit_reg));
+ __ cmp(chr1_reg, chr2_reg);
+ __ b(Ldone, ne);
+ __ adds(limit_reg, limit_reg, sizeof(jchar));
+ __ b(Lchar_loop, ne);
+
+ __ bind(Lequal);
+ __ mov(result_reg, 1); //equal
+
+ __ bind(Ldone);
+ %}
+
+ enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
+ Label Lvector, Ldone, Lloop, Lequal;
+ MacroAssembler _masm(&cbuf);
+
+ Register ary1_reg = $ary1$$Register;
+ Register ary2_reg = $ary2$$Register;
+ Register tmp1_reg = $tmp1$$Register;
+ Register tmp2_reg = $tmp2$$Register;
+ Register tmp3_reg = $tmp3$$Register;
+ Register result_reg = $result$$Register;
+
+ assert_different_registers(ary1_reg, ary2_reg, tmp1_reg, tmp2_reg, tmp3_reg, result_reg);
+
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
+ int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+ // return true if the same array
+#ifdef AARCH64
+ __ cmp(ary1_reg, ary2_reg);
+ __ b(Lequal, eq);
+
+ __ mov(result_reg, 0);
+
+ __ cbz(ary1_reg, Ldone); // not equal
+
+ __ cbz(ary2_reg, Ldone); // not equal
+#else
+ __ teq(ary1_reg, ary2_reg);
+ __ mov(result_reg, 1, eq);
+ __ b(Ldone, eq); // equal
+
+ __ tst(ary1_reg, ary1_reg);
+ __ mov(result_reg, 0, eq);
+ __ b(Ldone, eq); // not equal
+
+ __ tst(ary2_reg, ary2_reg);
+ __ mov(result_reg, 0, eq);
+ __ b(Ldone, eq); // not equal
+#endif
+
+ //load the lengths of arrays
+ __ ldr_s32(tmp1_reg, Address(ary1_reg, length_offset)); // int
+ __ ldr_s32(tmp2_reg, Address(ary2_reg, length_offset)); // int
+
+ // return false if the two arrays are not equal length
+#ifdef AARCH64
+ __ cmp_w(tmp1_reg, tmp2_reg);
+ __ b(Ldone, ne); // not equal
+
+ __ cbz_w(tmp1_reg, Lequal); // zero-length arrays are equal
+#else
+ __ teq_32(tmp1_reg, tmp2_reg);
+ __ mov(result_reg, 0, ne);
+ __ b(Ldone, ne); // not equal
+
+ __ tst(tmp1_reg, tmp1_reg);
+ __ mov(result_reg, 1, eq);
+ __ b(Ldone, eq); // zero-length arrays are equal
+#endif
+
+ // load array addresses
+ __ add(ary1_reg, ary1_reg, base_offset);
+ __ add(ary2_reg, ary2_reg, base_offset);
+
+ // renaming registers
+ Register chr1_reg = tmp3_reg; // for characters in ary1
+ Register chr2_reg = tmp2_reg; // for characters in ary2
+ Register limit_reg = tmp1_reg; // length
+
+ // set byte count
+ __ logical_shift_left_32(limit_reg, limit_reg, exact_log2(sizeof(jchar)));
+
+ // Compare char[] arrays aligned to 4 bytes.
+ __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
+ chr1_reg, chr2_reg, Ldone);
+ __ bind(Lequal);
+ __ mov(result_reg, 1); //equal
+
+ __ bind(Ldone);
+ %}
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+// S T A C K L A Y O U T Allocators stack-slot number
+// | (to get allocators register number
+// G Owned by | | v add VMRegImpl::stack0)
+// r CALLER | |
+// o | +--------+ pad to even-align allocators stack-slot
+// w V | pad0 | numbers; owned by CALLER
+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned
+// h ^ | in | 5
+// | | args | 4 Holes in incoming args owned by SELF
+// | | | | 3
+// | | +--------+
+// V | | old out| Empty on Intel, window on Sparc
+// | old |preserve| Must be even aligned.
+// | SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned
+// | | in | 3 area for Intel ret address
+// Owned by |preserve| Empty on Sparc.
+// SELF +--------+
+// | | pad2 | 2 pad to align old SP
+// | +--------+ 1
+// | | locks | 0
+// | +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned
+// | | pad1 | 11 pad to align new SP
+// | +--------+
+// | | | 10
+// | | spills | 9 spills
+// V | | 8 (pad0 slot for callee)
+// -----------+--------+----> Matcher::_out_arg_limit, unaligned
+// ^ | out | 7
+// | | args | 6 Holes in outgoing args owned by CALLEE
+// Owned by +--------+
+// CALLEE | new out| 6 Empty on Intel, window on Sparc
+// | new |preserve| Must be even-aligned.
+// | SP-+--------+----> Matcher::_new_SP, even aligned
+// | | |
+//
+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
+// known from SELF's arguments and the Java calling convention.
+// Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+// area, those holes are owned by SELF. Holes in the outgoing area
+// are owned by the CALLEE. Holes should not be nessecary in the
+// incoming area, as the Java calling convention is completely under
+// the control of the AD file. Doubles can be sorted and packed to
+// avoid holes. Holes in the outgoing arguments may be nessecary for
+// varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
+// even aligned with pad0 as needed.
+// Region 6 is even aligned. Region 6-7 is NOT even aligned;
+// region 6-11 is even aligned; it may be padded out more so that
+// the region from SP to FP meets the minimum stack alignment.
+
+frame %{
+ // What direction does stack grow in (assumed to be same for native & Java)
+ stack_direction(TOWARDS_LOW);
+
+ // These two registers define part of the calling convention
+ // between compiled code and the interpreter.
+ inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C
+ interpreter_method_oop_reg(R_Rmethod); // Method Oop Register when calling interpreter
+
+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+ cisc_spilling_operand_name(indOffset);
+
+ // Number of stack slots consumed by a Monitor enter
+ sync_stack_slots(1 * VMRegImpl::slots_per_word);
+
+ // Compiled code's Frame Pointer
+#ifdef AARCH64
+ frame_pointer(R_SP);
+#else
+ frame_pointer(R_R13);
+#endif
+
+ // Stack alignment requirement
+ stack_alignment(StackAlignmentInBytes);
+ // LP64: Alignment size in bytes (128-bit -> 16 bytes)
+ // !LP64: Alignment size in bytes (64-bit -> 8 bytes)
+
+ // Number of stack slots between incoming argument block and the start of
+ // a new frame. The PROLOG must add this many slots to the stack. The
+ // EPILOG must remove this many slots.
+ // FP + LR
+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
+
+ // Number of outgoing stack slots killed above the out_preserve_stack_slots
+ // for calls to C. Supports the var-args backing area for register parms.
+ // ADLC doesn't support parsing expressions, so I folded the math by hand.
+ varargs_C_out_slots_killed( 0);
+
+ // The after-PROLOG location of the return address. Location of
+ // return address specifies a type (REG or STACK) and a number
+ // representing the register number (i.e. - use a register name) or
+ // stack slot.
+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+ // Otherwise, it is above the locks and verification slot and alignment word
+ return_addr(STACK - 1*VMRegImpl::slots_per_word +
+ round_to((Compile::current()->in_preserve_stack_slots() +
+ Compile::current()->fixed_slots()),
+ stack_alignment_in_slots()));
+
+ // Body of function which returns an OptoRegs array locating
+ // arguments either in registers or in stack slots for calling
+ // java
+ calling_convention %{
+ (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing);
+
+ %}
+
+ // Body of function which returns an OptoRegs array locating
+ // arguments either in registers or in stack slots for callin
+ // C.
+ c_calling_convention %{
+ // This is obviously always outgoing
+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+ %}
+
+ // Location of compiled Java return values. Same as C
+ return_value %{
+ return c2::return_value(ideal_reg);
+ %}
+
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
+ins_attrib ins_size(32); // Required size attribute (in bits)
+ins_attrib ins_short_branch(0); // Required flag: is this instruction a
+ // non-matching short branch variant of some
+ // long branch?
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+// Integer Immediate: 32-bit
+operand immI() %{
+ match(ConI);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 8-bit unsigned - for VMOV
+operand immU8() %{
+ predicate(0 <= n->get_int() && (n->get_int() <= 255));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 16-bit
+operand immI16() %{
+ predicate((n->get_int() >> 16) == 0 && VM_Version::supports_movw());
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+#ifndef AARCH64
+// Integer Immediate: offset for half and double word loads and stores
+operand immIHD() %{
+ predicate(is_memoryHD(n->get_int()));
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: offset for fp loads and stores
+operand immIFP() %{
+ predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+#endif
+
+// Valid scale values for addressing modes and shifts
+operand immU5() %{
+ predicate(0 <= n->get_int() && (n->get_int() <= 31));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 6-bit
+operand immU6Big() %{
+ predicate(n->get_int() >= 32 && n->get_int() <= 63);
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 0-bit
+operand immI0() %{
+ predicate(n->get_int() == 0);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 1
+operand immI_1() %{
+ predicate(n->get_int() == 1);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 2
+operand immI_2() %{
+ predicate(n->get_int() == 2);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 3
+operand immI_3() %{
+ predicate(n->get_int() == 3);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 4
+operand immI_4() %{
+ predicate(n->get_int() == 4);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 8
+operand immI_8() %{
+ predicate(n->get_int() == 8);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Int Immediate non-negative
+operand immU31()
+%{
+ predicate(n->get_int() >= 0);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the values 32-63
+operand immI_32_63() %{
+ predicate(n->get_int() >= 32 && n->get_int() <= 63);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Immediates for special shifts (sign extend)
+
+// Integer Immediate: the value 16
+operand immI_16() %{
+ predicate(n->get_int() == 16);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 24
+operand immI_24() %{
+ predicate(n->get_int() == 24);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 255
+operand immI_255() %{
+ predicate( n->get_int() == 255 );
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 65535
+operand immI_65535() %{
+ predicate(n->get_int() == 65535);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediates for arithmetic instructions
+
+operand aimmI() %{
+ predicate(is_aimm(n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand aimmIneg() %{
+ predicate(is_aimm(-n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand aimmU31() %{
+ predicate((0 <= n->get_int()) && is_aimm(n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediates for logical instructions
+
+operand limmI() %{
+ predicate(is_limmI(n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand limmIlow8() %{
+ predicate(is_limmI_low(n->get_int(), 8));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand limmU31() %{
+ predicate(0 <= n->get_int() && is_limmI(n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand limmIn() %{
+ predicate(is_limmI(~n->get_int()));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+#ifdef AARCH64
+// Long Immediate: for logical instruction
+operand limmL() %{
+ predicate(is_limmL(n->get_long()));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand limmLn() %{
+ predicate(is_limmL(~n->get_long()));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: for arithmetic instruction
+operand aimmL() %{
+ predicate(is_aimm(n->get_long()));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand aimmLneg() %{
+ predicate(is_aimm(-n->get_long()));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+#endif // AARCH64
+
+// Long Immediate: the value FF
+operand immL_FF() %{
+ predicate( n->get_long() == 0xFFL );
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFF
+operand immL_FFFF() %{
+ predicate( n->get_long() == 0xFFFFL );
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 32 or 64-bit
+operand immP() %{
+ match(ConP);
+
+ op_cost(5);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immP0() %{
+ predicate(n->get_ptr() == 0);
+ match(ConP);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immP_poll() %{
+ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
+ match(ConP);
+
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immN()
+%{
+ match(ConN);
+
+ op_cost(10);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immNKlass()
+%{
+ match(ConNKlass);
+
+ op_cost(10);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immN0()
+%{
+ predicate(n->get_narrowcon() == 0);
+ match(ConN);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immL() %{
+ match(ConL);
+ op_cost(40);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immL0() %{
+ predicate(n->get_long() == 0L);
+ match(ConL);
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 16-bit
+operand immL16() %{
+ predicate(n->get_long() >= 0 && n->get_long() < (1<<16) && VM_Version::supports_movw());
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits() %{
+ predicate(n->get_long() == 0xFFFFFFFFL);
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Double Immediate
+operand immD() %{
+ match(ConD);
+
+ op_cost(40);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Double Immediate: +0.0d.
+operand immD0() %{
+ predicate(jlong_cast(n->getd()) == 0);
+
+ match(ConD);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand imm8D() %{
+ predicate(Assembler::double_num(n->getd()).can_be_imm8());
+ match(ConD);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF() %{
+ match(ConF);
+
+ op_cost(20);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate: +0.0f
+operand immF0() %{
+ predicate(jint_cast(n->getf()) == 0);
+ match(ConF);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate: encoded as 8 bits
+operand imm8F() %{
+ predicate(Assembler::float_num(n->getf()).can_be_imm8());
+ match(ConF);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Register Operands
+// Integer Register
+operand iRegI() %{
+ constraint(ALLOC_IN_RC(int_reg));
+ match(RegI);
+ match(R0RegI);
+ match(R1RegI);
+ match(R2RegI);
+ match(R3RegI);
+#ifdef AARCH64
+ match(ZRRegI);
+#else
+ match(R12RegI);
+#endif
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer Register
+operand iRegP() %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(RegP);
+ match(R0RegP);
+ match(R1RegP);
+ match(R2RegP);
+ match(RExceptionRegP);
+ match(R8RegP);
+ match(R9RegP);
+ match(RthreadRegP); // FIXME: move to sp_ptr_RegP?
+ match(R12RegP);
+ match(LRRegP);
+
+ match(sp_ptr_RegP);
+ match(store_ptr_RegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// GPRs + Rthread + SP
+operand sp_ptr_RegP() %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(RegP);
+ match(iRegP);
+ match(SPRegP); // FIXME: check cost
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+#ifdef AARCH64
+// Like sp_ptr_reg, but exclude regs (Aarch64 SP) that can't be
+// stored directly. Includes ZR, so can't be used as a destination.
+operand store_ptr_RegP() %{
+ constraint(ALLOC_IN_RC(store_ptr_reg));
+ match(RegP);
+ match(iRegP);
+ match(ZRRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand store_RegI() %{
+ constraint(ALLOC_IN_RC(store_reg));
+ match(RegI);
+ match(iRegI);
+ match(ZRRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand store_RegL() %{
+ constraint(ALLOC_IN_RC(store_ptr_reg));
+ match(RegL);
+ match(iRegL);
+ match(ZRRegL);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand store_RegN() %{
+ constraint(ALLOC_IN_RC(store_reg));
+ match(RegN);
+ match(iRegN);
+ match(ZRRegN);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+#endif
+
+operand R0RegP() %{
+ constraint(ALLOC_IN_RC(R0_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R1RegP() %{
+ constraint(ALLOC_IN_RC(R1_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R2RegP() %{
+ constraint(ALLOC_IN_RC(R2_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand RExceptionRegP() %{
+ constraint(ALLOC_IN_RC(Rexception_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand RthreadRegP() %{
+ constraint(ALLOC_IN_RC(Rthread_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand IPRegP() %{
+ constraint(ALLOC_IN_RC(IP_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand LRRegP() %{
+ constraint(ALLOC_IN_RC(LR_regP));
+ match(iRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R0RegI() %{
+ constraint(ALLOC_IN_RC(R0_regI));
+ match(iRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R1RegI() %{
+ constraint(ALLOC_IN_RC(R1_regI));
+ match(iRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R2RegI() %{
+ constraint(ALLOC_IN_RC(R2_regI));
+ match(iRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R3RegI() %{
+ constraint(ALLOC_IN_RC(R3_regI));
+ match(iRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+#ifndef AARCH64
+operand R12RegI() %{
+ constraint(ALLOC_IN_RC(R12_regI));
+ match(iRegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+#endif
+
+// Long Register
+operand iRegL() %{
+ constraint(ALLOC_IN_RC(long_reg));
+ match(RegL);
+#ifdef AARCH64
+ match(iRegLd);
+#else
+ match(R0R1RegL);
+ match(R2R3RegL);
+#endif
+//match(iRegLex);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegLd() %{
+ constraint(ALLOC_IN_RC(long_reg_align));
+ match(iRegL); // FIXME: allows unaligned R11/R12?
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+#ifndef AARCH64
+// first long arg, or return value
+operand R0R1RegL() %{
+ constraint(ALLOC_IN_RC(R0R1_regL));
+ match(iRegL);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand R2R3RegL() %{
+ constraint(ALLOC_IN_RC(R2R3_regL));
+ match(iRegL);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+#endif
+
+// Condition Code Flag Register
+operand flagsReg() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr" %}
+ interface(REG_INTER);
+%}
+
+// Result of compare to 0 (TST)
+operand flagsReg_EQNELTGE() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr_EQNELTGE" %}
+ interface(REG_INTER);
+%}
+
+// Condition Code Register, unsigned comparisons.
+operand flagsRegU() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+#ifdef TODO
+ match(RegFlagsP);
+#endif
+
+ format %{ "apsr_U" %}
+ interface(REG_INTER);
+%}
+
+// Condition Code Register, pointer comparisons.
+operand flagsRegP() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr_P" %}
+ interface(REG_INTER);
+%}
+
+// Condition Code Register, long comparisons.
+#ifndef AARCH64
+operand flagsRegL_LTGE() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr_L_LTGE" %}
+ interface(REG_INTER);
+%}
+
+operand flagsRegL_EQNE() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr_L_EQNE" %}
+ interface(REG_INTER);
+%}
+
+operand flagsRegL_LEGT() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "apsr_L_LEGT" %}
+ interface(REG_INTER);
+%}
+#endif
+
+// Condition Code Register, floating comparisons, unordered same as "less".
+operand flagsRegF() %{
+ constraint(ALLOC_IN_RC(float_flags));
+ match(RegFlags);
+
+ format %{ "fpscr_F" %}
+ interface(REG_INTER);
+%}
+
+// Vectors
+operand vecD() %{
+ constraint(ALLOC_IN_RC(actual_dflt_reg));
+ match(VecD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vecX() %{
+ constraint(ALLOC_IN_RC(vectorx_reg));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand regD() %{
+ constraint(ALLOC_IN_RC(actual_dflt_reg));
+ match(RegD);
+ match(regD_low);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand regF() %{
+ constraint(ALLOC_IN_RC(sflt_reg));
+ match(RegF);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand regD_low() %{
+ constraint(ALLOC_IN_RC(dflt_low_reg));
+ match(RegD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_regP(iRegP reg) %{
+ constraint(ALLOC_IN_RC(Ricklass_regP));
+ match(reg);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_regP(iRegP reg) %{
+ constraint(ALLOC_IN_RC(Rmethod_regP));
+ match(reg);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+
+//----------Complex Operands---------------------------------------------------
+// Indirect Memory Reference
+operand indirect(sp_ptr_RegP reg) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(reg);
+
+ op_cost(100);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+#ifdef AARCH64
+// Indirect with scaled*1 uimm12 offset
+operand indOffsetU12ScaleB(sp_ptr_RegP reg, immUL12 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with scaled*2 uimm12 offset
+operand indOffsetU12ScaleS(sp_ptr_RegP reg, immUL12x2 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with scaled*4 uimm12 offset
+operand indOffsetU12ScaleI(sp_ptr_RegP reg, immUL12x4 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with scaled*8 uimm12 offset
+operand indOffsetU12ScaleL(sp_ptr_RegP reg, immUL12x8 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with scaled*16 uimm12 offset
+operand indOffsetU12ScaleQ(sp_ptr_RegP reg, immUL12x16 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+#else // ! AARCH64
+
+// Indirect with Offset in ]-4096, 4096[
+operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with offset for float load/store
+operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Offset for half and double words
+operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Offset and Offset+4 in ]-1024, 1024[
+operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+
+// Indirect with Offset and Offset+4 in ]-4096, 4096[
+operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{
+ constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(AddP reg offset);
+
+ op_cost(100);
+ format %{ "[$reg + $offset]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+#ifdef AARCH64
+ index(0xff); // 0xff => no index
+#else
+ index(0xf); // PC => no index
+#endif
+ scale(0x0);
+ disp($offset);
+ %}
+%}
+#endif // !AARCH64
+
+// Indirect with Register Index
+operand indIndex(iRegP addr, iRegX index) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr index);
+
+ op_cost(100);
+ format %{ "[$addr + $index]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+#ifdef AARCH64
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleS(iRegP addr, iRegX index, immI_1 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX index scale));
+
+ op_cost(100);
+ format %{"[$addr + $index << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x0);
+ %}
+%}
+
+// Indirect Memory Times Scale Plus 32-bit Index Register
+operand indIndexIScaleS(iRegP addr, iRegI index, immI_1 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX (ConvI2L index) scale));
+
+ op_cost(100);
+ format %{"[$addr + $index.w << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x7fffffff); // sxtw
+ %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleI(iRegP addr, iRegX index, immI_2 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX index scale));
+
+ op_cost(100);
+ format %{"[$addr + $index << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x0);
+ %}
+%}
+
+// Indirect Memory Times Scale Plus 32-bit Index Register
+operand indIndexIScaleI(iRegP addr, iRegI index, immI_2 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX (ConvI2L index) scale));
+
+ op_cost(100);
+ format %{"[$addr + $index.w << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x7fffffff); // sxtw
+ %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleL(iRegP addr, iRegX index, immI_3 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX index scale));
+
+ op_cost(100);
+ format %{"[$addr + $index << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x0);
+ %}
+%}
+
+// Indirect Memory Times Scale Plus 32-bit Index Register
+operand indIndexIScaleL(iRegP addr, iRegI index, immI_3 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX (ConvI2L index) scale));
+
+ op_cost(100);
+ format %{"[$addr + $index.w << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x7fffffff); // sxtw
+ %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleQ(iRegP addr, iRegX index, immI_4 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX index scale));
+
+ op_cost(100);
+ format %{"[$addr + $index << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x0);
+ %}
+%}
+
+// Indirect Memory Times Scale Plus 32-bit Index Register
+operand indIndexIScaleQ(iRegP addr, iRegI index, immI_4 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX (ConvI2L index) scale));
+
+ op_cost(100);
+ format %{"[$addr + $index.w << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x7fffffff); // sxtw
+ %}
+%}
+#else
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP addr (LShiftX index scale));
+
+ op_cost(100);
+ format %{"[$addr + $index << $scale]" %}
+ interface(MEMORY_INTER) %{
+ base($addr);
+ index($index);
+ scale($scale);
+ disp(0x0);
+ %}
+%}
+#endif
+
+// Operands for expressing Control Flow
+// NOTE: Label is a predefined operand which should not be redefined in
+// the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op - This is the operation of the comparison, and is limited to
+// the following set of codes:
+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+operand cmpOp() %{
+ match(Bool);
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0xb);
+ greater_equal(0xa);
+ less_equal(0xd);
+ greater(0xc);
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+// integer comparison with 0, signed
+operand cmpOp0() %{
+ match(Bool);
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0x4);
+ greater_equal(0x5);
+ less_equal(0xd); // unsupported
+ greater(0xc); // unsupported
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+// Comparison Op, unsigned
+operand cmpOpU() %{
+ match(Bool);
+
+ format %{ "u" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0x3);
+ greater_equal(0x2);
+ less_equal(0x9);
+ greater(0x8);
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+// Comparison Op, pointer (same as unsigned)
+operand cmpOpP() %{
+ match(Bool);
+
+ format %{ "p" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0x3);
+ greater_equal(0x2);
+ less_equal(0x9);
+ greater(0x8);
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+operand cmpOpL() %{
+ match(Bool);
+
+ format %{ "L" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0xb);
+ greater_equal(0xa);
+ less_equal(0xd);
+ greater(0xc);
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+operand cmpOpL_commute() %{
+ match(Bool);
+
+ format %{ "L" %}
+ interface(COND_INTER) %{
+ equal(0x0);
+ not_equal(0x1);
+ less(0xc);
+ greater_equal(0xd);
+ less_equal(0xa);
+ greater(0xb);
+ overflow(0x0); // unsupported/unimplemented
+ no_overflow(0x0); // unsupported/unimplemented
+ %}
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used to simplify
+// instruction definitions by not requiring the AD writer to specify separate
+// instructions for every form of operand when the instruction accepts
+// multiple operand types with the same basic encoding and format. The classic
+// case of this is memory operands.
+#ifdef AARCH64
+opclass memoryB(indirect, indIndex, indOffsetU12ScaleB);
+opclass memoryS(indirect, indIndex, indIndexScaleS, indIndexIScaleS, indOffsetU12ScaleS);
+opclass memoryI(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI);
+opclass memoryL(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
+opclass memoryP(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
+opclass memoryQ(indirect, indIndex, indIndexScaleQ, indIndexIScaleQ, indOffsetU12ScaleQ);
+opclass memoryF(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI);
+opclass memoryD(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
+
+opclass memoryScaledS(indIndexScaleS, indIndexIScaleS);
+opclass memoryScaledI(indIndexScaleI, indIndexIScaleI);
+opclass memoryScaledL(indIndexScaleL, indIndexIScaleL);
+opclass memoryScaledP(indIndexScaleL, indIndexIScaleL);
+opclass memoryScaledQ(indIndexScaleQ, indIndexIScaleQ);
+opclass memoryScaledF(indIndexScaleI, indIndexIScaleI);
+opclass memoryScaledD(indIndexScaleL, indIndexIScaleL);
+// when ldrex/strex is used:
+opclass memoryex ( indirect );
+opclass indIndexMemory( indIndex );
+opclass memoryvld ( indirect /* , write back mode not implemented */ );
+
+#else
+
+opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale );
+opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale );
+opclass memoryF ( indirect, indOffsetFP );
+opclass memoryF2 ( indirect, indOffsetFPx2 );
+opclass memoryD ( indirect, indOffsetFP );
+opclass memoryfp( indirect, indOffsetFP );
+opclass memoryB ( indirect, indIndex, indOffsetHD );
+opclass memoryS ( indirect, indIndex, indOffsetHD );
+opclass memoryL ( indirect, indIndex, indOffsetHD );
+
+opclass memoryScaledI(indIndexScale);
+opclass memoryScaledP(indIndexScale);
+
+// when ldrex/strex is used:
+opclass memoryex ( indirect );
+opclass indIndexMemory( indIndex );
+opclass memorylong ( indirect, indOffset12x2 );
+opclass memoryvld ( indirect /* , write back mode not implemented */ );
+#endif
+
+//----------PIPELINE-----------------------------------------------------------
+pipeline %{
+
+//----------ATTRIBUTES---------------------------------------------------------
+attributes %{
+ fixed_size_instructions; // Fixed size instructions
+ max_instructions_per_bundle = 4; // Up to 4 instructions per bundle
+ instruction_unit_size = 4; // An instruction is 4 bytes long
+ instruction_fetch_unit_size = 16; // The processor fetches one line
+ instruction_fetch_units = 1; // of 16 bytes
+
+ // List of nop instructions
+ nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR );
+%}
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+// Integer ALU reg-reg operation
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ single_instruction;
+ dst : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg long operation
+pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{
+ instruction_count(2);
+ dst : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+ IALU : R;
+%}
+
+// Integer ALU reg-reg long dependent operation
+pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ cr : E(write);
+ IALU : R(2);
+%}
+
+// Integer ALU reg-imm operaion
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1) %{
+ single_instruction;
+ dst : E(write);
+ src1 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg operation with condition code
+pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+ single_instruction;
+ dst : E(write);
+ cr : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU zero-reg operation
+pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{
+ single_instruction;
+ dst : E(write);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU zero-reg operation with condition code only
+pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{
+ single_instruction;
+ cr : E(write);
+ src : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg operation with condition code only
+pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
+ single_instruction;
+ cr : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-imm operation with condition code only
+pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1) %{
+ single_instruction;
+ cr : E(write);
+ src1 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg-zero operation with condition code only
+pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{
+ single_instruction;
+ cr : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-imm-zero operation with condition code only
+pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI0 zero) %{
+ single_instruction;
+ cr : E(write);
+ src1 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg operation with condition code, src1 modified
+pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
+ single_instruction;
+ cr : E(write);
+ src1 : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{
+ multiple_bundles;
+ dst : E(write)+4;
+ cr : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R(3);
+ BR : R(2);
+%}
+
+// Integer ALU operation
+pipe_class ialu_none(iRegI dst) %{
+ single_instruction;
+ dst : E(write);
+ IALU : R;
+%}
+
+// Integer ALU reg operation
+pipe_class ialu_reg(iRegI dst, iRegI src) %{
+ single_instruction; may_have_no_code;
+ dst : E(write);
+ src : R(read);
+ IALU : R;
+%}
+
+// Integer ALU reg conditional operation
+// This instruction has a 1 cycle stall, and cannot execute
+// in the same cycle as the instruction setting the condition
+// code. We kludge this by pretending to read the condition code
+// 1 cycle earlier, and by marking the functional units as busy
+// for 2 cycles with the result available 1 cycle later than
+// is really the case.
+pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{
+ single_instruction;
+ op2_out : C(write);
+ op1 : R(read);
+ cr : R(read); // This is really E, with a 1 cycle stall
+ BR : R(2);
+ MS : R(2);
+%}
+
+// Integer ALU reg operation
+pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{
+ single_instruction; may_have_no_code;
+ dst : E(write);
+ src : R(read);
+ IALU : R;
+%}
+pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{
+ single_instruction; may_have_no_code;
+ dst : E(write);
+ src : R(read);
+ IALU : R;
+%}
+
+// Two integer ALU reg operations
+pipe_class ialu_reg_2(iRegL dst, iRegL src) %{
+ instruction_count(2);
+ dst : E(write);
+ src : R(read);
+ A0 : R;
+ A1 : R;
+%}
+
+// Two integer ALU reg operations
+pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{
+ instruction_count(2); may_have_no_code;
+ dst : E(write);
+ src : R(read);
+ A0 : R;
+ A1 : R;
+%}
+
+// Integer ALU imm operation
+pipe_class ialu_imm(iRegI dst) %{
+ single_instruction;
+ dst : E(write);
+ IALU : R;
+%}
+
+pipe_class ialu_imm_n(iRegI dst) %{
+ single_instruction;
+ dst : E(write);
+ IALU : R;
+%}
+
+// Integer ALU reg-reg with carry operation
+pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{
+ single_instruction;
+ dst : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ IALU : R;
+%}
+
+// Integer ALU cc operation
+pipe_class ialu_cc(iRegI dst, flagsReg cc) %{
+ single_instruction;
+ dst : E(write);
+ cc : R(read);
+ IALU : R;
+%}
+
+// Integer ALU cc / second IALU operation
+pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{
+ instruction_count(1); multiple_bundles;
+ dst : E(write)+1;
+ src : R(read);
+ IALU : R;
+%}
+
+// Integer ALU cc / second IALU operation
+pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{
+ instruction_count(1); multiple_bundles;
+ dst : E(write)+1;
+ p : R(read);
+ q : R(read);
+ IALU : R;
+%}
+
+// Integer ALU hi-lo-reg operation
+pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{
+ instruction_count(1); multiple_bundles;
+ dst : E(write)+1;
+ IALU : R(2);
+%}
+
+// Long Constant
+pipe_class loadConL( iRegL dst, immL src ) %{
+ instruction_count(2); multiple_bundles;
+ dst : E(write)+1;
+ IALU : R(2);
+ IALU : R(2);
+%}
+
+// Pointer Constant
+pipe_class loadConP( iRegP dst, immP src ) %{
+ instruction_count(0); multiple_bundles;
+ fixed_latency(6);
+%}
+
+// Polling Address
+pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{
+ dst : E(write);
+ IALU : R;
+%}
+
+// Long Constant small
+pipe_class loadConLlo( iRegL dst, immL src ) %{
+ instruction_count(2);
+ dst : E(write);
+ IALU : R;
+ IALU : R;
+%}
+
+// [PHH] This is wrong for 64-bit. See LdImmF/D.
+pipe_class loadConFD(regF dst, immF src, iRegP tmp) %{
+ instruction_count(1); multiple_bundles;
+ src : R(read);
+ dst : M(write)+1;
+ IALU : R;
+ MS : E;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop() %{
+ single_instruction;
+ IALU : R;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop_A0() %{
+ single_instruction;
+ A0 : R;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop_A1() %{
+ single_instruction;
+ A1 : R;
+%}
+
+// Integer Multiply reg-reg operation
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ single_instruction;
+ dst : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ MS : R(5);
+%}
+
+pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ single_instruction;
+ dst : E(write)+4;
+ src1 : R(read);
+ src2 : R(read);
+ MS : R(6);
+%}
+
+// Integer Divide reg-reg
+pipe_class sdiv_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : E(write);
+ temp : E(write);
+ src1 : R(read);
+ src2 : R(read);
+ temp : R(read);
+ MS : R(38);
+%}
+
+// Long Divide
+pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ dst : E(write)+71;
+ src1 : R(read);
+ src2 : R(read)+1;
+ MS : R(70);
+%}
+
+// Floating Point Add Float
+pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FA : R;
+%}
+
+// Floating Point Add Double
+pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FA : R;
+%}
+
+// Floating Point Conditional Move based on integer flags
+pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ cr : R(read);
+ FA : R(2);
+ BR : R(2);
+%}
+
+// Floating Point Conditional Move based on integer flags
+pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ cr : R(read);
+ FA : R(2);
+ BR : R(2);
+%}
+
+// Floating Point Multiply Float
+pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FM : R;
+%}
+
+// Floating Point Multiply Double
+pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FM : R;
+%}
+
+// Floating Point Divide Float
+pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FM : R;
+ FDIV : C(14);
+%}
+
+// Floating Point Divide Double
+pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{
+ single_instruction;
+ dst : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FM : R;
+ FDIV : C(17);
+%}
+
+// Floating Point Move/Negate/Abs Float
+pipe_class faddF_reg(regF dst, regF src) %{
+ single_instruction;
+ dst : W(write);
+ src : E(read);
+ FA : R(1);
+%}
+
+// Floating Point Move/Negate/Abs Double
+pipe_class faddD_reg(regD dst, regD src) %{
+ single_instruction;
+ dst : W(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert F->D
+pipe_class fcvtF2D(regD dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert I->D
+pipe_class fcvtI2D(regD dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert LHi->D
+pipe_class fcvtLHi2D(regD dst, regD src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert L->D
+pipe_class fcvtL2D(regD dst, iRegL src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert L->F
+pipe_class fcvtL2F(regF dst, iRegL src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert D->F
+pipe_class fcvtD2F(regD dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert I->L
+pipe_class fcvtI2L(regD dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert D->F
+pipe_class fcvtD2I(iRegI dst, regD src, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : X(write)+6;
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert D->L
+pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : X(write)+6;
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert F->I
+pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : X(write)+6;
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert F->L
+pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{
+ instruction_count(1); multiple_bundles;
+ dst : X(write)+6;
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Convert I->F
+pipe_class fcvtI2F(regF dst, regF src) %{
+ single_instruction;
+ dst : X(write);
+ src : E(read);
+ FA : R;
+%}
+
+// Floating Point Compare
+pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{
+ single_instruction;
+ cr : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FA : R;
+%}
+
+// Floating Point Compare
+pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{
+ single_instruction;
+ cr : X(write);
+ src1 : E(read);
+ src2 : E(read);
+ FA : R;
+%}
+
+// Floating Add Nop
+pipe_class fadd_nop() %{
+ single_instruction;
+ FA : R;
+%}
+
+// Integer Store to Memory
+pipe_class istore_mem_reg(memoryI mem, iRegI src) %{
+ single_instruction;
+ mem : R(read);
+ src : C(read);
+ MS : R;
+%}
+
+// Integer Store to Memory
+pipe_class istore_mem_spORreg(memoryI mem, sp_ptr_RegP src) %{
+ single_instruction;
+ mem : R(read);
+ src : C(read);
+ MS : R;
+%}
+
+// Float Store
+pipe_class fstoreF_mem_reg(memoryF mem, RegF src) %{
+ single_instruction;
+ mem : R(read);
+ src : C(read);
+ MS : R;
+%}
+
+// Float Store
+pipe_class fstoreF_mem_zero(memoryF mem, immF0 src) %{
+ single_instruction;
+ mem : R(read);
+ MS : R;
+%}
+
+// Double Store
+pipe_class fstoreD_mem_reg(memoryD mem, RegD src) %{
+ instruction_count(1);
+ mem : R(read);
+ src : C(read);
+ MS : R;
+%}
+
+// Double Store
+pipe_class fstoreD_mem_zero(memoryD mem, immD0 src) %{
+ single_instruction;
+ mem : R(read);
+ MS : R;
+%}
+
+// Integer Load (when sign bit propagation not needed)
+pipe_class iload_mem(iRegI dst, memoryI mem) %{
+ single_instruction;
+ mem : R(read);
+ dst : C(write);
+ MS : R;
+%}
+
+// Integer Load (when sign bit propagation or masking is needed)
+pipe_class iload_mask_mem(iRegI dst, memoryI mem) %{
+ single_instruction;
+ mem : R(read);
+ dst : M(write);
+ MS : R;
+%}
+
+// Float Load
+pipe_class floadF_mem(regF dst, memoryF mem) %{
+ single_instruction;
+ mem : R(read);
+ dst : M(write);
+ MS : R;
+%}
+
+// Float Load
+pipe_class floadD_mem(regD dst, memoryD mem) %{
+ instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case
+ mem : R(read);
+ dst : M(write);
+ MS : R;
+%}
+
+// Memory Nop
+pipe_class mem_nop() %{
+ single_instruction;
+ MS : R;
+%}
+
+pipe_class sethi(iRegP dst, immI src) %{
+ single_instruction;
+ dst : E(write);
+ IALU : R;
+%}
+
+pipe_class loadPollP(iRegP poll) %{
+ single_instruction;
+ poll : R(read);
+ MS : R;
+%}
+
+pipe_class br(Universe br, label labl) %{
+ single_instruction_with_delay_slot;
+ BR : R;
+%}
+
+pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{
+ single_instruction_with_delay_slot;
+ cr : E(read);
+ BR : R;
+%}
+
+pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{
+ single_instruction_with_delay_slot;
+ op1 : E(read);
+ BR : R;
+ MS : R;
+%}
+
+pipe_class br_nop() %{
+ single_instruction;
+ BR : R;
+%}
+
+pipe_class simple_call(method meth) %{
+ instruction_count(2); multiple_bundles; force_serialization;
+ fixed_latency(100);
+ BR : R(1);
+ MS : R(1);
+ A0 : R(1);
+%}
+
+pipe_class compiled_call(method meth) %{
+ instruction_count(1); multiple_bundles; force_serialization;
+ fixed_latency(100);
+ MS : R(1);
+%}
+
+pipe_class call(method meth) %{
+ instruction_count(0); multiple_bundles; force_serialization;
+ fixed_latency(100);
+%}
+
+pipe_class tail_call(Universe ignore, label labl) %{
+ single_instruction; has_delay_slot;
+ fixed_latency(100);
+ BR : R(1);
+ MS : R(1);
+%}
+
+pipe_class ret(Universe ignore) %{
+ single_instruction; has_delay_slot;
+ BR : R(1);
+ MS : R(1);
+%}
+
+// The real do-nothing guy
+pipe_class empty( ) %{
+ instruction_count(0);
+%}
+
+pipe_class long_memory_op() %{
+ instruction_count(0); multiple_bundles; force_serialization;
+ fixed_latency(25);
+ MS : R(1);
+%}
+
+// Check-cast
+pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{
+ array : R(read);
+ match : R(read);
+ IALU : R(2);
+ BR : R(2);
+ MS : R;
+%}
+
+// Convert FPU flags into +1,0,-1
+pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{
+ src1 : E(read);
+ src2 : E(read);
+ dst : E(write);
+ FA : R;
+ MS : R(2);
+ BR : R(2);
+%}
+
+// Compare for p < q, and conditionally add y
+pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{
+ p : E(read);
+ q : E(read);
+ y : E(read);
+ IALU : R(3)
+%}
+
+// Perform a compare, then move conditionally in a branch delay slot.
+pipe_class min_max( iRegI src2, iRegI srcdst ) %{
+ src2 : E(read);
+ srcdst : E(read);
+ IALU : R;
+ BR : R;
+%}
+
+// Define the class for the Nop node
+define %{
+ MachNop = ialu_nop;
+%}
+
+%}
+
+//----------INSTRUCTIONS-------------------------------------------------------
+
+//------------Special Nop instructions for bundling - no match rules-----------
+// Nop using the A0 functional unit
+instruct Nop_A0() %{
+ ins_pipe(ialu_nop_A0);
+%}
+
+// Nop using the A1 functional unit
+instruct Nop_A1( ) %{
+ ins_pipe(ialu_nop_A1);
+%}
+
+// Nop using the memory functional unit
+instruct Nop_MS( ) %{
+ ins_pipe(mem_nop);
+%}
+
+// Nop using the floating add functional unit
+instruct Nop_FA( ) %{
+ ins_pipe(fadd_nop);
+%}
+
+// Nop using the branch functional unit
+instruct Nop_BR( ) %{
+ ins_pipe(br_nop);
+%}
+
+//----------Load/Store/Move Instructions---------------------------------------
+//----------Load Instructions--------------------------------------------------
+// Load Byte (8bit signed)
+instruct loadB(iRegI dst, memoryB mem) %{
+ match(Set dst (LoadB mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRSB $dst,$mem\t! byte -> int" %}
+ ins_encode %{
+ // High 32 bits are harmlessly set on Aarch64
+ __ ldrsb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Byte (8bit signed) into a Long Register
+instruct loadB2L(iRegL dst, memoryB mem) %{
+ match(Set dst (ConvI2L (LoadB mem)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRSB $dst,$mem\t! byte -> long" %}
+ ins_encode %{
+ __ ldrsb($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t"
+ "ASR $dst.hi,$dst.lo,31" %}
+ ins_encode %{
+ __ ldrsb($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
+ %}
+#endif
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into an int reg
+instruct loadUB(iRegI dst, memoryB mem) %{
+ match(Set dst (LoadUB mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRB $dst,$mem\t! ubyte -> int" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register
+instruct loadUB2L(iRegL dst, memoryB mem) %{
+ match(Set dst (ConvI2L (LoadUB mem)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRB $dst,$mem\t! ubyte -> long" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t"
+ "MOV $dst.hi,0" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) with immediate mask into Long Register
+instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{
+ match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
+
+#ifdef AARCH64
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+ size(8);
+ format %{ "LDRB $dst,$mem\t! ubyte -> long\n\t"
+ "AND $dst,$dst,$mask" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
+ %}
+#else
+ ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+ size(12);
+ format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t"
+ "MOV $dst.hi,0\n\t"
+ "AND $dst.lo,$dst.lo,$mask" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Short (16bit signed)
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadS (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "LDRSH $dst,$mem+$off\t! short temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldrsh($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+#endif
+
+instruct loadS(iRegI dst, memoryS mem) %{
+ match(Set dst (LoadS mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRSH $dst,$mem\t! short" %}
+ ins_encode %{
+ __ ldrsh($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16 bit signed) to Byte (8 bit signed)
+instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
+ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+
+ format %{ "LDRSB $dst,$mem\t! short -> byte" %}
+ ins_encode %{
+ // High 32 bits are harmlessly set on Aarch64
+ __ ldrsb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16bit signed) into a Long Register
+instruct loadS2L(iRegL dst, memoryS mem) %{
+ match(Set dst (ConvI2L (LoadS mem)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRSH $dst,$mem\t! short -> long" %}
+ ins_encode %{
+ __ ldrsh($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t"
+ "ASR $dst.hi,$dst.lo,31" %}
+ ins_encode %{
+ __ ldrsh($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
+ %}
+#endif
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned)
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadUSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadUS (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "LDRH $dst,$mem+$off\t! ushort/char temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldrh($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+instruct loadUS(iRegI dst, memoryS mem) %{
+ match(Set dst (LoadUS mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRH $dst,$mem\t! ushort/char" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
+instruct loadUS2B(iRegI dst, memoryB mem, immI_24 twentyfour) %{
+ match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRSB $dst,$mem\t! ushort -> byte" %}
+ ins_encode %{
+ __ ldrsb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) into a Long Register
+instruct loadUS2L(iRegL dst, memoryS mem) %{
+ match(Set dst (ConvI2L (LoadUS mem)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRH $dst,$mem\t! short -> long" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRH $dst.lo,$mem\t! short -> long\n\t"
+ "MOV $dst.hi, 0" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register
+instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
+ match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRB $dst,$mem" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRB $dst.lo,$mem\t! \n\t"
+ "MOV $dst.hi, 0" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register
+instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{
+ match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+#ifdef AARCH64
+ ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST);
+
+ size(8);
+ format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t"
+ "AND $dst,$dst,$mask" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant);
+ %}
+#else
+ ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+ size(12);
+ format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t"
+ "MOV $dst.hi, 0\n\t"
+ "AND $dst,$dst,$mask" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ __ andr($dst$$Register, $dst$$Register, $mask$$constant);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Integer
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadIoff(iRegI dst, memoryScaledI mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadI (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "ldr_s32 $dst,$mem+$off\t! int temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr_s32($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+instruct loadI(iRegI dst, memoryI mem) %{
+ match(Set dst (LoadI mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "ldr_s32 $dst,$mem\t! int" %}
+ ins_encode %{
+ __ ldr_s32($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+// Load Integer to Byte (8 bit signed)
+instruct loadI2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
+ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+
+ format %{ "LDRSB $dst,$mem\t! int -> byte" %}
+ ins_encode %{
+ __ ldrsb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Unsigned Byte (8 bit UNsigned)
+instruct loadI2UB(iRegI dst, memoryB mem, immI_255 mask) %{
+ match(Set dst (AndI (LoadI mem) mask));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+
+ format %{ "LDRB $dst,$mem\t! int -> ubyte" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Short (16 bit signed)
+instruct loadI2S(iRegI dst, memoryS mem, immI_16 sixteen) %{
+ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRSH $dst,$mem\t! int -> short" %}
+ ins_encode %{
+ __ ldrsh($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Unsigned Short (16 bit UNsigned)
+instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{
+ match(Set dst (AndI (LoadI mem) mask));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRH $dst,$mem\t! int -> ushort/char" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer into a Long Register
+instruct loadI2L(iRegL dst, memoryI mem) %{
+ match(Set dst (ConvI2L (LoadI mem)));
+#ifdef AARCH64
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRSW $dst.lo,$mem\t! int -> long" %}
+ ins_encode %{
+ __ ldr_s32($dst$$Register, $mem$$Address);
+ %}
+#else
+ ins_cost(MEMORY_REF_COST);
+
+ size(8);
+ format %{ "LDR $dst.lo,$mem\t! int -> long\n\t"
+ "ASR $dst.hi,$dst.lo,31\t! int->long" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
+ %}
+#endif
+ ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer with mask 0xFF into a Long Register
+instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+#ifdef AARCH64
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ %}
+#else
+ ins_cost(MEMORY_REF_COST);
+
+ size(8);
+ format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long\n\t"
+ "MOV $dst.hi, 0" %}
+ ins_encode %{
+ __ ldrb($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Integer with mask 0xFFFF into a Long Register
+instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long\n\t"
+ "MOV $dst.hi, 0" %}
+ ins_encode %{
+ __ ldrh($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mask_mem);
+%}
+
+#ifdef AARCH64
+// Load Integer with an immediate mask into a Long Register
+instruct loadI2L_limmI(iRegL dst, memoryI mem, limmI mask) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+ ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST);
+
+ size(8);
+ format %{ "LDRSW $dst,$mem\t! int -> long\n\t"
+ "AND $dst,$dst,$mask" %}
+
+ ins_encode %{
+ __ ldr_s32($dst$$Register, $mem$$Address);
+ __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant);
+ %}
+ ins_pipe(iload_mem);
+%}
+#else
+// Load Integer with a 31-bit immediate mask into a Long Register
+instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+ ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+ size(12);
+ format %{ "LDR $dst.lo,$mem\t! int -> long\n\t"
+ "MOV $dst.hi, 0\n\t"
+ "AND $dst,$dst,$mask" %}
+
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ __ andr($dst$$Register, $dst$$Register, $mask$$constant);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+#ifdef AARCH64
+// Load Integer with mask into a Long Register
+// FIXME: use signedRegI mask, remove tmp?
+instruct loadI2L_immI(iRegL dst, memoryI mem, immI mask, iRegI tmp) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+ effect(TEMP dst, TEMP tmp);
+
+ ins_cost(MEMORY_REF_COST + 3*DEFAULT_COST);
+ format %{ "LDRSW $mem,$dst\t! int & 31-bit mask -> long\n\t"
+ "MOV_SLOW $tmp,$mask\n\t"
+ "AND $dst,$tmp,$dst" %}
+ ins_encode %{
+ __ ldrsw($dst$$Register, $mem$$Address);
+ __ mov_slow($tmp$$Register, $mask$$constant);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(iload_mem);
+%}
+#else
+// Load Integer with a 31-bit mask into a Long Register
+// FIXME: use iRegI mask, remove tmp?
+instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{
+ match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+ effect(TEMP dst, TEMP tmp);
+
+ ins_cost(MEMORY_REF_COST + 4*DEFAULT_COST);
+ size(20);
+ format %{ "LDR $mem,$dst\t! int & 31-bit mask -> long\n\t"
+ "MOV $dst.hi, 0\n\t"
+ "MOV_SLOW $tmp,$mask\n\t"
+ "AND $dst,$tmp,$dst" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ __ mov_slow($tmp$$Register, $mask$$constant);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{
+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+ ins_cost(MEMORY_REF_COST);
+
+#ifdef AARCH64
+//size(4);
+ format %{ "LDR_w $dst,$mem\t! uint -> long" %}
+ ins_encode %{
+ __ ldr_w($dst$$Register, $mem$$Address);
+ %}
+#else
+ size(8);
+ format %{ "LDR $dst.lo,$mem\t! uint -> long\n\t"
+ "MOV $dst.hi,0" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(iload_mem);
+%}
+
+// Load Long
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadLoff(iRegLd dst, memoryScaledL mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadL (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "LDR $dst,$mem+$off\t! long temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+instruct loadL(iRegLd dst, memoryL mem ) %{
+#ifdef AARCH64
+ // already atomic for Aarch64
+#else
+ predicate(!((LoadLNode*)n)->require_atomic_access());
+#endif
+ match(Set dst (LoadL mem));
+ effect(TEMP dst);
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "ldr_64 $dst,$mem\t! long" %}
+ ins_encode %{
+ __ ldr_64($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+#ifndef AARCH64
+instruct loadL_2instr(iRegL dst, memorylong mem ) %{
+ predicate(!((LoadLNode*)n)->require_atomic_access());
+ match(Set dst (LoadL mem));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+ size(8);
+ format %{ "LDR $dst.lo,$mem \t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
+ "LDR $dst.hi,$mem+4 or $mem" %}
+ ins_encode %{
+ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+
+ if ($dst$$Register == reg_to_register_object($mem$$base)) {
+ __ ldr($dst$$Register->successor(), Amemhi);
+ __ ldr($dst$$Register, Amemlo);
+ } else {
+ __ ldr($dst$$Register, Amemlo);
+ __ ldr($dst$$Register->successor(), Amemhi);
+ }
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct loadL_volatile(iRegL dst, indirect mem ) %{
+ predicate(((LoadLNode*)n)->require_atomic_access());
+ match(Set dst (LoadL mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDMIA $dst,$mem\t! long" %}
+ ins_encode %{
+ // FIXME: why is ldmia considered atomic? Should be ldrexd
+ RegisterSet set($dst$$Register);
+ set = set | reg_to_register_object($dst$$reg + 1);
+ __ ldmia(reg_to_register_object($mem$$base), set);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct loadL_volatile_fp(iRegL dst, memoryD mem ) %{
+ predicate(((LoadLNode*)n)->require_atomic_access());
+ match(Set dst (LoadL mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(8);
+ format %{ "FLDD S14, $mem"
+ "FMRRD $dst, S14\t! long \n't" %}
+ ins_encode %{
+ __ fldd(S14, $mem$$Address);
+ __ fmrrd($dst$$Register, $dst$$Register->successor(), S14);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct loadL_unaligned(iRegL dst, memorylong mem ) %{
+ match(Set dst (LoadL_unaligned mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(8);
+ format %{ "LDR $dst.lo,$mem\t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
+ "LDR $dst.hi,$mem+4" %}
+ ins_encode %{
+ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+
+ if ($dst$$Register == reg_to_register_object($mem$$base)) {
+ __ ldr($dst$$Register->successor(), Amemhi);
+ __ ldr($dst$$Register, Amemlo);
+ } else {
+ __ ldr($dst$$Register, Amemlo);
+ __ ldr($dst$$Register->successor(), Amemhi);
+ }
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif // !AARCH64
+
+// Load Range
+instruct loadRange(iRegI dst, memoryI mem) %{
+ match(Set dst (LoadRange mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "LDR_u32 $dst,$mem\t! range" %}
+ ins_encode %{
+ __ ldr_u32($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+// Load Pointer
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadPoff(iRegP dst, memoryScaledP mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadP (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "LDR $dst,$mem+$off\t! ptr temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+instruct loadP(iRegP dst, memoryP mem) %{
+ match(Set dst (LoadP mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "LDR $dst,$mem\t! ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+#ifdef XXX
+// FIXME XXXX
+//instruct loadSP(iRegP dst, memoryP mem) %{
+instruct loadSP(SPRegP dst, memoryP mem, iRegP tmp) %{
+ match(Set dst (LoadP mem));
+ effect(TEMP tmp);
+ ins_cost(MEMORY_REF_COST+1);
+ size(8);
+
+ format %{ "LDR $tmp,$mem\t! ptr\n\t"
+ "MOV $dst,$tmp\t! ptr" %}
+ ins_encode %{
+ __ ldr($tmp$$Register, $mem$$Address);
+ __ mov($dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+#ifdef _LP64
+// Load Compressed Pointer
+
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadNoff(iRegN dst, memoryScaledI mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadN (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "ldr_u32 $dst,$mem+$off\t! compressed ptr temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr_u32($dst$$Register, nmem);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct loadN(iRegN dst, memoryI mem) %{
+ match(Set dst (LoadN mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "ldr_u32 $dst,$mem\t! compressed ptr" %}
+ ins_encode %{
+ __ ldr_u32($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+// Load Klass Pointer
+instruct loadKlass(iRegP dst, memoryI mem) %{
+ match(Set dst (LoadKlass mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "LDR $dst,$mem\t! klass ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+
+#ifdef _LP64
+// Load narrow Klass Pointer
+instruct loadNKlass(iRegN dst, memoryI mem) %{
+ match(Set dst (LoadNKlass mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "ldr_u32 $dst,$mem\t! compressed klass ptr" %}
+ ins_encode %{
+ __ ldr_u32($dst$$Register, $mem$$Address);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadDoff(regD dst, memoryScaledD mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadD (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "ldr $dst,$mem+$off\t! double temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr_d($dst$$FloatRegister, nmem);
+ %}
+ ins_pipe(floadD_mem);
+%}
+#endif
+
+instruct loadD(regD dst, memoryD mem) %{
+ match(Set dst (LoadD mem));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees
+ // only LDREXD and STREXD are 64-bit single-copy atomic
+ format %{ "FLDD $dst,$mem" %}
+ ins_encode %{
+ __ ldr_double($dst$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(floadD_mem);
+%}
+
+#ifndef AARCH64
+// Load Double - UNaligned
+instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{
+ match(Set dst (LoadD_unaligned mem));
+ ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
+ size(8);
+ format %{ "FLDS $dst.lo,$mem\t! misaligned double\n"
+ "\tFLDS $dst.hi,$mem+4\t!" %}
+ ins_encode %{
+ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+ __ flds($dst$$FloatRegister, Amemlo);
+ __ flds($dst$$FloatRegister->successor(), Amemhi);
+ %}
+ ins_pipe(iload_mem);
+%}
+#endif
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct loadFoff(regF dst, memoryScaledF mem, aimmX off, iRegP tmp) %{
+ match(Set dst (LoadF (AddP mem off)));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "ldr $dst,$mem+$off\t! float temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ ldr_s($dst$$FloatRegister, nmem);
+ %}
+ ins_pipe(floadF_mem);
+%}
+#endif
+
+instruct loadF(regF dst, memoryF mem) %{
+ match(Set dst (LoadF mem));
+
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "FLDS $dst,$mem" %}
+ ins_encode %{
+ __ ldr_float($dst$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(floadF_mem);
+%}
+
+#ifdef AARCH64
+instruct load_limmI(iRegI dst, limmI src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST + 1); // + 1 because MOV is preferred
+ format %{ "ORR_w $dst, ZR, $src\t! int" %}
+ ins_encode %{
+ __ orr_w($dst$$Register, ZR, (uintx)$src$$constant);
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+// // Load Constant
+instruct loadConI( iRegI dst, immI src ) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 3/2);
+ format %{ "MOV_SLOW $dst, $src" %}
+ ins_encode %{
+ __ mov_slow($dst$$Register, $src$$constant);
+ %}
+ ins_pipe(ialu_hi_lo_reg);
+%}
+
+instruct loadConIMov( iRegI dst, immIMov src ) %{
+ match(Set dst src);
+ size(4);
+ format %{ "MOV $dst, $src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant);
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+#ifndef AARCH64
+instruct loadConIMovn( iRegI dst, immIRotn src ) %{
+ match(Set dst src);
+ size(4);
+ format %{ "MVN $dst, ~$src" %}
+ ins_encode %{
+ __ mvn($dst$$Register, ~$src$$constant);
+ %}
+ ins_pipe(ialu_imm_n);
+%}
+#endif
+
+instruct loadConI16( iRegI dst, immI16 src ) %{
+ match(Set dst src);
+ size(4);
+#ifdef AARCH64
+ format %{ "MOVZ_w $dst, $src" %}
+#else
+ format %{ "MOVW $dst, $src" %}
+#endif
+ ins_encode %{
+#ifdef AARCH64
+ __ mov_w($dst$$Register, $src$$constant);
+#else
+ __ movw($dst$$Register, $src$$constant);
+#endif
+ %}
+ ins_pipe(ialu_imm_n);
+%}
+
+instruct loadConP(iRegP dst, immP src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 3/2);
+ format %{ "MOV_SLOW $dst,$src\t!ptr" %}
+ ins_encode %{
+ relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc();
+ intptr_t val = $src$$constant;
+ if (constant_reloc == relocInfo::oop_type) {
+ __ mov_oop($dst$$Register, (jobject)val);
+ } else if (constant_reloc == relocInfo::metadata_type) {
+ __ mov_metadata($dst$$Register, (Metadata*)val);
+ } else {
+ __ mov_slow($dst$$Register, val);
+ }
+ %}
+ ins_pipe(loadConP);
+%}
+
+
+instruct loadConP_poll(iRegP dst, immP_poll src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ format %{ "MOV_SLOW $dst,$src\t!ptr" %}
+ ins_encode %{
+ __ mov_slow($dst$$Register, $src$$constant);
+ %}
+ ins_pipe(loadConP_poll);
+%}
+
+#ifdef AARCH64
+instruct loadConP0(iRegP dst, immP0 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ format %{ "MOV $dst,ZR\t!ptr" %}
+ ins_encode %{
+ __ mov($dst$$Register, ZR);
+ %}
+ ins_pipe(ialu_none);
+%}
+
+instruct loadConN(iRegN dst, immN src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 3/2);
+ format %{ "SET $dst,$src\t! compressed ptr" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ // FIXME: use $constanttablebase?
+ __ set_narrow_oop(dst, (jobject)$src$$constant);
+ %}
+ ins_pipe(ialu_hi_lo_reg);
+%}
+
+instruct loadConN0(iRegN dst, immN0 src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ format %{ "MOV $dst,ZR\t! compressed ptr" %}
+ ins_encode %{
+ __ mov($dst$$Register, ZR);
+ %}
+ ins_pipe(ialu_none);
+%}
+
+instruct loadConNKlass(iRegN dst, immNKlass src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 3/2);
+ format %{ "SET $dst,$src\t! compressed klass ptr" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ // FIXME: use $constanttablebase?
+ __ set_narrow_klass(dst, (Klass*)$src$$constant);
+ %}
+ ins_pipe(ialu_hi_lo_reg);
+%}
+
+instruct load_limmL(iRegL dst, limmL src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ format %{ "ORR $dst, ZR, $src\t! long" %}
+ ins_encode %{
+ __ orr($dst$$Register, ZR, (uintx)$src$$constant);
+ %}
+ ins_pipe(loadConL);
+%}
+instruct load_immLMov(iRegL dst, immLMov src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ format %{ "MOV $dst, $src\t! long" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant);
+ %}
+ ins_pipe(loadConL);
+%}
+instruct loadConL(iRegL dst, immL src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 4); // worst case
+ format %{ "mov_slow $dst, $src\t! long" %}
+ ins_encode %{
+ // FIXME: use $constanttablebase?
+ __ mov_slow($dst$$Register, $src$$constant);
+ %}
+ ins_pipe(loadConL);
+%}
+#else
+instruct loadConL(iRegL dst, immL src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 4);
+ format %{ "MOV_SLOW $dst.lo, $src & 0x0FFFFFFFFL \t! long\n\t"
+ "MOV_SLOW $dst.hi, $src >> 32" %}
+ ins_encode %{
+ __ mov_slow(reg_to_register_object($dst$$reg), $src$$constant & 0x0FFFFFFFFL);
+ __ mov_slow(reg_to_register_object($dst$$reg + 1), ((julong)($src$$constant)) >> 32);
+ %}
+ ins_pipe(loadConL);
+%}
+
+instruct loadConL16( iRegL dst, immL16 src ) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 2);
+
+ size(8);
+ format %{ "MOVW $dst.lo, $src \n\t"
+ "MOVW $dst.hi, 0 \n\t" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant);
+ __ movw($dst$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+instruct loadConF_imm8(regF dst, imm8F src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(4);
+
+ format %{ "FCONSTS $dst, $src"%}
+
+ ins_encode %{
+ __ fconsts($dst$$FloatRegister, Assembler::float_num($src$$constant).imm8());
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+#ifdef AARCH64
+instruct loadIConF(iRegI dst, immF src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 2);
+
+ format %{ "MOV_SLOW $dst, $src\t! loadIConF" %}
+
+ ins_encode %{
+ // FIXME revisit once 6961697 is in
+ union {
+ jfloat f;
+ int i;
+ } v;
+ v.f = $src$$constant;
+ __ mov_slow($dst$$Register, v.i);
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+instruct loadConF(regF dst, immF src, iRegI tmp) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST * 2);
+ effect(TEMP tmp);
+ size(3*4);
+
+ format %{ "MOV_SLOW $tmp, $src\n\t"
+ "FMSR $dst, $tmp"%}
+
+ ins_encode %{
+ // FIXME revisit once 6961697 is in
+ union {
+ jfloat f;
+ int i;
+ } v;
+ v.f = $src$$constant;
+ __ mov_slow($tmp$$Register, v.i);
+ __ fmsr($dst$$FloatRegister, $tmp$$Register);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+instruct loadConD_imm8(regD dst, imm8D src) %{
+ match(Set dst src);
+ ins_cost(DEFAULT_COST);
+ size(4);
+
+ format %{ "FCONSTD $dst, $src"%}
+
+ ins_encode %{
+ __ fconstd($dst$$FloatRegister, Assembler::double_num($src$$constant).imm8());
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+instruct loadConD(regD dst, immD src, iRegP tmp) %{
+ match(Set dst src);
+ effect(TEMP tmp);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "FLDD $dst, [$constanttablebase + $constantoffset]\t! load from constant table: double=$src" %}
+
+ ins_encode %{
+ Register r = $constanttablebase;
+ int offset = $constantoffset($src);
+ if (!is_memoryD(offset)) { // can't use a predicate
+ // in load constant instructs
+ __ add_slow($tmp$$Register, r, offset);
+ r = $tmp$$Register;
+ offset = 0;
+ }
+ __ ldr_double($dst$$FloatRegister, Address(r, offset));
+ %}
+ ins_pipe(loadConFD);
+%}
+
+// Prefetch instructions.
+// Must be safe to execute with invalid address (cannot fault).
+
+instruct prefetchAlloc_mp( memoryP mem ) %{
+ predicate(os::is_MP());
+ match( PrefetchAllocation mem );
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "PLDW $mem\t! Prefetch allocation" %}
+ ins_encode %{
+#ifdef AARCH64
+ __ prfm(pstl1keep, $mem$$Address);
+#else
+ __ pldw($mem$$Address);
+#endif
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct prefetchAlloc_sp( memoryP mem ) %{
+ predicate(!os::is_MP());
+ match( PrefetchAllocation mem );
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "PLD $mem\t! Prefetch allocation" %}
+ ins_encode %{
+#ifdef AARCH64
+ __ prfm(pstl1keep, $mem$$Address);
+#else
+ __ pld($mem$$Address);
+#endif
+ %}
+ ins_pipe(iload_mem);
+%}
+
+//----------Store Instructions-------------------------------------------------
+// Store Byte
+instruct storeB(memoryB mem, store_RegI src) %{
+ match(Set mem (StoreB mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "STRB $src,$mem\t! byte" %}
+ ins_encode %{
+ __ strb($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+instruct storeCM(memoryB mem, store_RegI src) %{
+ match(Set mem (StoreCM mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "STRB $src,$mem\t! CMS card-mark byte" %}
+ ins_encode %{
+ __ strb($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+// Store Char/Short
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeCoff(store_RegI src, memoryScaledS mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreC (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "STRH $src,$mem+$off\t! short temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ strh($src$$Register, nmem);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+instruct storeC(memoryS mem, store_RegI src) %{
+ match(Set mem (StoreC mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "STRH $src,$mem\t! short" %}
+ ins_encode %{
+ __ strh($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+// Store Integer
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeIoff(store_RegI src, memoryScaledI mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreI (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "str_32 $src,$mem+$off\t! int temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str_32($src$$Register, nmem);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+instruct storeI(memoryI mem, store_RegI src) %{
+ match(Set mem (StoreI mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "str_32 $src,$mem" %}
+ ins_encode %{
+ __ str_32($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+// Store Long
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeLoff(store_RegLd src, memoryScaledL mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreL (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "str_64 $src,$mem+$off\t! long temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str_64($src$$Register, nmem);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+instruct storeL(memoryL mem, store_RegLd src) %{
+#ifdef AARCH64
+ // already atomic for Aarch64
+#else
+ predicate(!((StoreLNode*)n)->require_atomic_access());
+#endif
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "str_64 $src,$mem\t! long\n\t" %}
+
+ ins_encode %{
+ __ str_64($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+#ifndef AARCH64
+instruct storeL_2instr(memorylong mem, iRegL src) %{
+ predicate(!((StoreLNode*)n)->require_atomic_access());
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+ size(8);
+ format %{ "STR $src.lo,$mem\t! long\n\t"
+ "STR $src.hi,$mem+4" %}
+
+ ins_encode %{
+ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+ __ str($src$$Register, Amemlo);
+ __ str($src$$Register->successor(), Amemhi);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+instruct storeL_volatile(indirect mem, iRegL src) %{
+ predicate(((StoreLNode*)n)->require_atomic_access());
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STMIA $src,$mem\t! long" %}
+ ins_encode %{
+ // FIXME: why is stmia considered atomic? Should be strexd
+ RegisterSet set($src$$Register);
+ set = set | reg_to_register_object($src$$reg + 1);
+ __ stmia(reg_to_register_object($mem$$base), set);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif // !AARCH64
+
+#ifndef AARCH64
+instruct storeL_volatile_fp(memoryD mem, iRegL src) %{
+ predicate(((StoreLNode*)n)->require_atomic_access());
+ match(Set mem (StoreL mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(8);
+ format %{ "FMDRR S14, $src\t! long \n\t"
+ "FSTD S14, $mem" %}
+ ins_encode %{
+ __ fmdrr(S14, $src$$Register, $src$$Register->successor());
+ __ fstd(S14, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+#ifdef XXX
+// Move SP Pointer
+//instruct movSP(sp_ptr_RegP dst, SPRegP src) %{
+//instruct movSP(iRegP dst, SPRegP src) %{
+instruct movSP(store_ptr_RegP dst, SPRegP src) %{
+ match(Set dst src);
+//predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "MOV $dst,$src\t! SP ptr\n\t" %}
+ ins_encode %{
+ assert(false, "XXX1 got here");
+ __ mov($dst$$Register, SP);
+ __ mov($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+#ifdef AARCH64
+// FIXME
+// Store SP Pointer
+instruct storeSP(memoryP mem, SPRegP src, iRegP tmp) %{
+ match(Set mem (StoreP mem src));
+ predicate(_kids[1]->_leaf->is_Proj() && _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr);
+ // Multiple StoreP rules, different only in register mask.
+ // Matcher makes the last always valid. The others will
+ // only be valid if they cost less than the last valid
+ // rule. So cost(rule1) < cost(rule2) < cost(last)
+ // Unlike immediates, register constraints are not checked
+ // at match time.
+ ins_cost(MEMORY_REF_COST+DEFAULT_COST+4);
+ effect(TEMP tmp);
+ size(8);
+
+ format %{ "MOV $tmp,$src\t! SP ptr\n\t"
+ "STR $tmp,$mem\t! SP ptr" %}
+ ins_encode %{
+ assert($src$$Register == SP, "SP expected");
+ __ mov($tmp$$Register, $src$$Register);
+ __ str($tmp$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_spORreg); // FIXME
+%}
+#endif // AARCH64
+
+// Store Pointer
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storePoff(store_ptr_RegP src, memoryScaledP mem, aimmX off, iRegP tmp) %{
+ predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr);
+ match(Set mem (StoreP (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "STR $src,$mem+$off\t! ptr temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str($src$$Register, nmem);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+instruct storeP(memoryP mem, store_ptr_RegP src) %{
+ match(Set mem (StoreP mem src));
+#ifdef AARCH64
+ predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr);
+#endif
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "STR $src,$mem\t! ptr" %}
+ ins_encode %{
+ __ str($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_spORreg);
+%}
+
+#ifdef AARCH64
+// Store NULL Pointer
+instruct storeP0(memoryP mem, immP0 src) %{
+ match(Set mem (StoreP mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "STR ZR,$mem\t! ptr" %}
+ ins_encode %{
+ __ str(ZR, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_spORreg);
+%}
+#endif // AARCH64
+
+#ifdef _LP64
+// Store Compressed Pointer
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeNoff(store_RegN src, memoryScaledI mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreN (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "str_32 $src,$mem+$off\t! compressed ptr temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str_32($src$$Register, nmem);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+instruct storeN(memoryI mem, store_RegN src) %{
+ match(Set mem (StoreN mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "str_32 $src,$mem\t! compressed ptr" %}
+ ins_encode %{
+ __ str_32($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+#ifdef AARCH64
+// Store NULL Pointer
+instruct storeN0(memoryI mem, immN0 src) %{
+ match(Set mem (StoreN mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "str_32 ZR,$mem\t! compressed ptr" %}
+ ins_encode %{
+ __ str_32(ZR, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+// Store Compressed Klass Pointer
+instruct storeNKlass(memoryI mem, store_RegN src) %{
+ match(Set mem (StoreNKlass mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+
+ format %{ "str_32 $src,$mem\t! compressed klass ptr" %}
+ ins_encode %{
+ __ str_32($src$$Register, $mem$$Address);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+#endif
+
+// Store Double
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeDoff(regD src, memoryScaledD mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreD (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "STR $src,$mem+$off\t! double temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str_d($src$$FloatRegister, nmem);
+ %}
+ ins_pipe(fstoreD_mem_reg);
+%}
+#endif
+
+instruct storeD(memoryD mem, regD src) %{
+ match(Set mem (StoreD mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees
+ // only LDREXD and STREXD are 64-bit single-copy atomic
+ format %{ "FSTD $src,$mem" %}
+ ins_encode %{
+ __ str_double($src$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_reg);
+%}
+
+#ifdef AARCH64
+instruct movI2F(regF dst, iRegI src) %{
+ match(Set dst src);
+ size(4);
+
+ format %{ "FMOV_sw $dst,$src\t! movI2F" %}
+ ins_encode %{
+ __ fmov_sw($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+instruct movF2I(iRegI dst, regF src) %{
+ match(Set dst src);
+ size(4);
+
+ format %{ "FMOV_ws $dst,$src\t! movF2I" %}
+ ins_encode %{
+ __ fmov_ws($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif
+
+// Store Float
+
+#ifdef AARCH64
+// XXX This variant shouldn't be necessary if 6217251 is implemented
+instruct storeFoff(regF src, memoryScaledF mem, aimmX off, iRegP tmp) %{
+ match(Set mem (StoreF (AddP mem off) src));
+ ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
+ effect(TEMP tmp);
+ size(4 * 2);
+
+ format %{ "str_s $src,$mem+$off\t! float temp=$tmp" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ __ add($tmp$$Register, base, $off$$constant);
+ Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+ __ str_s($src$$FloatRegister, nmem);
+ %}
+ ins_pipe(fstoreF_mem_reg);
+%}
+#endif
+
+instruct storeF( memoryF mem, regF src) %{
+ match(Set mem (StoreF mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ size(4);
+ format %{ "FSTS $src,$mem" %}
+ ins_encode %{
+ __ str_float($src$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(fstoreF_mem_reg);
+%}
+
+#ifdef AARCH64
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(iRegN dst, iRegP src, flagsReg ccr) %{
+ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
+ match(Set dst (EncodeP src));
+ effect(KILL ccr);
+ format %{ "encode_heap_oop $dst, $src" %}
+ ins_encode %{
+ __ encode_heap_oop($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{
+ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
+ match(Set dst (EncodeP src));
+ format %{ "encode_heap_oop_not_null $dst, $src" %}
+ ins_encode %{
+ __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop(iRegP dst, iRegN src, flagsReg ccr) %{
+ predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
+ n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
+ match(Set dst (DecodeN src));
+ effect(KILL ccr);
+ format %{ "decode_heap_oop $dst, $src" %}
+ ins_encode %{
+ __ decode_heap_oop($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{
+ predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
+ n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
+ match(Set dst (DecodeN src));
+ format %{ "decode_heap_oop_not_null $dst, $src" %}
+ ins_encode %{
+ __ decode_heap_oop_not_null($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct encodeKlass_not_null(iRegN dst, iRegP src) %{
+ match(Set dst (EncodePKlass src));
+ format %{ "encode_klass_not_null $dst, $src" %}
+ ins_encode %{
+ __ encode_klass_not_null($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeKlass_not_null(iRegP dst, iRegN src) %{
+ match(Set dst (DecodeNKlass src));
+ format %{ "decode_klass_not_null $dst, $src" %}
+ ins_encode %{
+ __ decode_klass_not_null($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif // AARCH64
+
+//----------MemBar Instructions-----------------------------------------------
+// Memory barrier flavors
+
+// TODO: take advantage of Aarch64 load-acquire, store-release, etc
+// pattern-match out unnecessary membars
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-storestore" %}
+ ins_encode %{
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct membar_acquire() %{
+ match(MemBarAcquire);
+ match(LoadFence);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-acquire" %}
+ ins_encode %{
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct membar_acquire_lock() %{
+ match(MemBarAcquireLock);
+ ins_cost(0);
+
+ size(0);
+ format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
+instruct membar_release() %{
+ match(MemBarRelease);
+ match(StoreFence);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-release" %}
+ ins_encode %{
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct membar_release_lock() %{
+ match(MemBarReleaseLock);
+ ins_cost(0);
+
+ size(0);
+ format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
+instruct membar_volatile() %{
+ match(MemBarVolatile);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-volatile" %}
+ ins_encode %{
+ __ membar(MacroAssembler::StoreLoad, noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct unnecessary_membar_volatile() %{
+ match(MemBarVolatile);
+ predicate(Matcher::post_store_load_barrier(n));
+ ins_cost(0);
+
+ size(0);
+ format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
+//----------Register Move Instructions-----------------------------------------
+// instruct roundDouble_nop(regD dst) %{
+// match(Set dst (RoundDouble dst));
+// ins_pipe(empty);
+// %}
+
+
+// instruct roundFloat_nop(regF dst) %{
+// match(Set dst (RoundFloat dst));
+// ins_pipe(empty);
+// %}
+
+
+#ifdef AARCH64
+// 0 constant in register
+instruct zrImmI0(ZRRegI dst, immI0 imm) %{
+ match(Set dst imm);
+ size(0);
+ ins_cost(0);
+
+ format %{ "! ZR (int 0)" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(ialu_none);
+%}
+
+// 0 constant in register
+instruct zrImmL0(ZRRegL dst, immL0 imm) %{
+ match(Set dst imm);
+ size(0);
+ ins_cost(0);
+
+ format %{ "! ZR (long 0)" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(ialu_none);
+%}
+
+#ifdef XXX
+// 0 constant in register
+instruct zrImmN0(ZRRegN dst, immN0 imm) %{
+ match(Set dst imm);
+ size(0);
+ ins_cost(0);
+
+ format %{ "! ZR (compressed pointer NULL)" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(ialu_none);
+%}
+
+// 0 constant in register
+instruct zrImmP0(ZRRegP dst, immP0 imm) %{
+ match(Set dst imm);
+ size(0);
+ ins_cost(0);
+
+ format %{ "! ZR (NULL)" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(ialu_none);
+%}
+#endif
+#endif // AARCH64
+
+// Cast Index to Pointer for unsafe natives
+instruct castX2P(iRegX src, iRegP dst) %{
+ match(Set dst (CastX2P src));
+
+ format %{ "MOV $dst,$src\t! IntX->Ptr if $dst != $src" %}
+ ins_encode %{
+ if ($dst$$Register != $src$$Register) {
+ __ mov($dst$$Register, $src$$Register);
+ }
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Cast Pointer to Index for unsafe natives
+instruct castP2X(iRegP src, iRegX dst) %{
+ match(Set dst (CastP2X src));
+
+ format %{ "MOV $dst,$src\t! Ptr->IntX if $dst != $src" %}
+ ins_encode %{
+ if ($dst$$Register != $src$$Register) {
+ __ mov($dst$$Register, $src$$Register);
+ }
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifndef AARCH64
+//----------Conditional Move---------------------------------------------------
+// Conditional move
+instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src\t! int" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+#ifdef AARCH64
+instruct cmovI_reg3(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovL_reg3(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovP_reg3(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src1, iRegP src2) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovN_reg3(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src1, iRegN src2) %{
+ match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovIP_reg3(cmpOpP cmp, flagsRegP icc, iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLP_reg3(cmpOpP cmp, flagsRegP icc, iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPP_reg3(cmpOpP cmp, flagsRegP icc, iRegP dst, iRegP src1, iRegP src2) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovNP_reg3(cmpOpP cmp, flagsRegP icc, iRegN dst, iRegN src1, iRegN src2) %{
+ match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovIU_reg3(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLU_reg3(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPU_reg3(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src1, iRegP src2) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovNU_reg3(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src1, iRegN src2) %{
+ match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovIZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src1, iRegP src2) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovNZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegN dst, iRegN src1, iRegN src2) %{
+ match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
+ ins_encode %{
+ __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif // AARCH64
+
+#ifndef AARCH64
+instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{
+ match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOVw$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifdef AARCH64
+instruct cmovL_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src\t! long" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+#ifndef AARCH64
+instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOVw$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifndef AARCH64
+instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(140);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(140);
+ size(4);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifndef AARCH64
+instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif
+
+// Conditional move
+instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
+ ins_cost(140);
+ size(4);
+#ifdef AARCH64
+ format %{ "MOV$cmp $dst,ZR" %}
+#else
+ format %{ "MOV$cmp $dst,$src" %}
+#endif
+ ins_encode %{
+#ifdef AARCH64
+ __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode));
+#else
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+#endif
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+// This instruction also works with CmpN so we don't need cmovPN_reg.
+instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(150);
+
+ size(4);
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+
+ size(4);
+#ifdef AARCH64
+ format %{ "MOV$cmp $dst,ZR\t! ptr" %}
+#else
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+#endif
+ ins_encode %{
+#ifdef AARCH64
+ __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode));
+#else
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+#endif
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(140);
+
+ size(4);
+#ifdef AARCH64
+ format %{ "MOV$cmp $dst,ZR\t! ptr" %}
+#else
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+#endif
+ ins_encode %{
+#ifdef AARCH64
+ __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode));
+#else
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+#endif
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+
+ size(4);
+#ifdef AARCH64
+ format %{ "MOV$cmp $dst,ZR\t! ptr" %}
+#else
+ format %{ "MOV$cmp $dst,$src\t! ptr" %}
+#endif
+ ins_encode %{
+#ifdef AARCH64
+ __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode));
+#else
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+#endif
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+#ifdef AARCH64
+// Conditional move
+instruct cmovF_reg(cmpOp cmp, flagsReg icc, regF dst, regF src1, regF src2) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovD_reg(cmpOp cmp, flagsReg icc, regD dst, regD src1, regD src2) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFP_reg(cmpOpP cmp, flagsRegP icc, regF dst, regF src1, regF src2) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDP_reg(cmpOpP cmp, flagsRegP icc, regD dst, regD src1, regD src2) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFU_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src1, regF src2) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDU_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src1, regD src2) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src1, regF src2) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src1, regD src2) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+ ins_encode %{
+ __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+#else // !AARCH64
+
+// Conditional move
+instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+// Conditional move
+instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_double_move);
+%}
+
+// Conditional move
+instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+ ins_cost(150);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst.hi,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLP_immRot(cmpOpP cmp, flagsRegP pcc, iRegL dst, immLlowRot src) %{
+ match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLP_imm16(cmpOpP cmp, flagsRegP pcc, iRegL dst, immL16 src) %{
+ match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst.hi,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(150);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst.hi,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLI_immRot(cmpOp cmp, flagsReg icc, iRegL dst, immLlowRot src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLI_immRot_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immLlowRot src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_imm16(cmpOp cmp, flagsReg icc, iRegL dst, immL16 src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ movw($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immL16 src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+ _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+ ins_cost(140);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src\t! long\n\t"
+ "MOV$cmp $dst.hi,0" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ __ movw($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+ ins_cost(150);
+
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst.hi,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif // !AARCH64
+
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(RthreadRegP dst) %{
+ match(Set dst (ThreadLocal));
+
+ size(0);
+ ins_cost(0);
+ format %{ "! TLS is in $dst" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(ialu_none);
+%}
+
+instruct checkCastPP( iRegP dst ) %{
+ match(Set dst (CheckCastPP dst));
+
+ size(0);
+ format %{ "! checkcastPP of $dst" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(empty);
+%}
+
+
+instruct castPP( iRegP dst ) %{
+ match(Set dst (CastPP dst));
+ format %{ "! castPP of $dst" %}
+ ins_encode( /*empty encoding*/ );
+ ins_pipe(empty);
+%}
+
+instruct castII( iRegI dst ) %{
+ match(Set dst (CastII dst));
+ format %{ "! castII of $dst" %}
+ ins_encode( /*empty encoding*/ );
+ ins_cost(0);
+ ins_pipe(empty);
+%}
+
+//----------Arithmetic Instructions--------------------------------------------
+// Addition Instructions
+// Register Addition
+instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (AddI src1 src2));
+
+ size(4);
+ format %{ "add_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AddI (LShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+#ifdef AARCH64
+#ifdef TODO
+instruct addshlL_reg_imm_reg(iRegL dst, iRegL src1, immU6 src2, iRegL src3) %{
+ match(Set dst (AddL (LShiftL src1 src2) src3));
+
+ size(4);
+ format %{ "ADD $dst,$src3,$src1<<$src2\t! long" %}
+ ins_encode %{
+ __ add($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+#endif
+
+instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (AddI (LShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AddI (RShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (AddI (RShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AddI (URShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (AddI (URShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Addition
+instruct addI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
+ match(Set dst (AddI src1 src2));
+
+ size(4);
+ format %{ "add_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ add_32($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Register Addition
+instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{
+ match(Set dst (AddP src1 src2));
+
+ size(4);
+ format %{ "ADD $dst,$src1,$src2\t! ptr" %}
+ ins_encode %{
+ __ add($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifdef AARCH64
+// unshifted I2L operand
+operand unshiftedI2L(iRegI src2) %{
+//constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(ConvI2L src2);
+
+ op_cost(1);
+ format %{ "$src2.w" %}
+ interface(MEMORY_INTER) %{
+ base($src2);
+ index(0xff);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+// shifted I2L operand
+operand shiftedI2L(iRegI src2, immI_0_4 src3) %{
+//constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(LShiftX (ConvI2L src2) src3);
+
+ op_cost(1);
+ format %{ "$src2.w << $src3" %}
+ interface(MEMORY_INTER) %{
+ base($src2);
+ index(0xff);
+ scale($src3);
+ disp(0x0);
+ %}
+%}
+
+opclass shiftedRegI(shiftedI2L, unshiftedI2L);
+
+instruct shlL_reg_regI(iRegL dst, iRegI src1, immU6 src2) %{
+ match(Set dst (LShiftL (ConvI2L src1) src2));
+
+ size(4);
+ format %{ "LSL $dst,$src1.w,$src2\t! ptr" %}
+ ins_encode %{
+ int c = $src2$$constant;
+ int r = 64 - c;
+ int s = 31;
+ if (s >= r) {
+ s = r - 1;
+ }
+ __ sbfm($dst$$Register, $src1$$Register, r, s);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct addP_reg_regI(iRegP dst, iRegP src1, shiftedRegI src2) %{
+ match(Set dst (AddP src1 src2));
+
+ ins_cost(DEFAULT_COST * 3/2);
+ size(4);
+ format %{ "ADD $dst,$src1,$src2, sxtw\t! ptr" %}
+ ins_encode %{
+ Register base = reg_to_register_object($src2$$base);
+ __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+// shifted iRegX operand
+operand shiftedX(iRegX src2, shimmX src3) %{
+//constraint(ALLOC_IN_RC(sp_ptr_reg));
+ match(LShiftX src2 src3);
+
+ op_cost(1);
+ format %{ "$src2 << $src3" %}
+ interface(MEMORY_INTER) %{
+ base($src2);
+ index(0xff);
+ scale($src3);
+ disp(0x0);
+ %}
+%}
+
+instruct addshlP_reg_reg_imm(iRegP dst, iRegP src1, shiftedX src2) %{
+ match(Set dst (AddP src1 src2));
+
+ ins_cost(DEFAULT_COST * 3/2);
+ size(4);
+ format %{ "ADD $dst,$src1,$src2\t! ptr" %}
+ ins_encode %{
+ Register base = reg_to_register_object($src2$$base);
+ __ add($dst$$Register, $src1$$Register, AsmOperand(base, lsl, $src2$$scale));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Pointer Immediate Addition
+instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{
+ match(Set dst (AddP src1 src2));
+
+ size(4);
+ format %{ "ADD $dst,$src1,$src2\t! ptr" %}
+ ins_encode %{
+ __ add($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+#ifdef AARCH64
+instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (AddL src1 src2));
+ size(4);
+ format %{ "ADD $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ add($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct addL_reg_regI(iRegL dst, iRegL src1, shiftedRegI src2) %{
+ match(Set dst (AddL src1 src2));
+
+ ins_cost(DEFAULT_COST * 3/2);
+ size(4);
+ format %{ "ADD $dst,$src1,$src2, sxtw\t! long" %}
+ ins_encode %{
+ Register base = reg_to_register_object($src2$$base);
+ __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#else
+instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{
+ match(Set dst (AddL src1 src2));
+ effect(KILL ccr);
+ size(8);
+ format %{ "ADDS $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+ "ADC $dst.hi,$src1.hi,$src2.hi" %}
+ ins_encode %{
+ __ adds($dst$$Register, $src1$$Register, $src2$$Register);
+ __ adc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+#ifdef AARCH64
+// Immediate Addition
+instruct addL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{
+ match(Set dst (AddL src1 src2));
+
+ size(4);
+ format %{ "ADD $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ add($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct addL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{
+ match(Set dst (SubL src1 src2));
+
+ size(4);
+ format %{ "ADD $dst,$src1,-($src2)\t! long" %}
+ ins_encode %{
+ __ add($dst$$Register, $src1$$Register, -$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// TODO
+#endif
+
+#ifndef AARCH64
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{
+ match(Set dst (AddL src1 con));
+ effect(KILL ccr);
+ size(8);
+ format %{ "ADDS $dst.lo,$src1.lo,$con\t! long\n\t"
+ "ADC $dst.hi,$src1.hi,0" %}
+ ins_encode %{
+ __ adds($dst$$Register, $src1$$Register, $con$$constant);
+ __ adc($dst$$Register->successor(), $src1$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+//----------Conditional_store--------------------------------------------------
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flags (EQ) on success.
+
+// TODO: optimize out barriers with AArch64 load-acquire/store-release
+// LoadP-locked.
+instruct loadPLocked(iRegP dst, memoryex mem) %{
+ match(Set dst (LoadPLocked mem));
+ size(4);
+ format %{ "LDREX $dst,$mem" %}
+ ins_encode %{
+#ifdef AARCH64
+ Register base = reg_to_register_object($mem$$base);
+ __ ldxr($dst$$Register, base);
+#else
+ __ ldrex($dst$$Register,$mem$$Address);
+#endif
+ %}
+ ins_pipe(iload_mem);
+%}
+
+instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, iRegI tmp, flagsRegP pcc ) %{
+ predicate(_kids[1]->_kids[0]->_leaf->Opcode() == Op_LoadPLocked); // only works in conjunction with a LoadPLocked node
+ match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval)));
+ effect( TEMP tmp );
+ size(8);
+ format %{ "STREX $tmp,$newval,$heap_top_ptr\n\t"
+ "CMP $tmp, 0" %}
+ ins_encode %{
+#ifdef AARCH64
+ Register base = reg_to_register_object($heap_top_ptr$$base);
+ __ stxr($tmp$$Register, $newval$$Register, base);
+#else
+ __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address);
+#endif
+ __ cmp($tmp$$Register, 0);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+// Conditional-store of an intx value.
+instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{
+#ifdef AARCH64
+ match(Set icc (StoreLConditional mem (Binary oldval newval)));
+ effect( TEMP tmp );
+ size(28);
+ format %{ "loop:\n\t"
+ "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t"
+ "SUBS $tmp, $tmp, $oldval\n\t"
+ "B.ne done\n\t"
+ "STXR $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop\n\t"
+ "CMP $tmp, 0\n\t"
+ "done:\n\t"
+ "membar LoadStore|LoadLoad" %}
+#else
+ match(Set icc (StoreIConditional mem (Binary oldval newval)));
+ effect( TEMP tmp );
+ size(28);
+ format %{ "loop: \n\t"
+ "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t"
+ "XORS $tmp,$tmp, $oldval\n\t"
+ "STREX.eq $tmp, $newval, $mem\n\t"
+ "CMP.eq $tmp, 1 \n\t"
+ "B.eq loop \n\t"
+ "TEQ $tmp, 0\n\t"
+ "membar LoadStore|LoadLoad" %}
+#endif
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+#ifdef AARCH64
+// FIXME: use load-acquire/store-release, remove membar?
+ Label done;
+ Register base = reg_to_register_object($mem$$base);
+ __ ldxr($tmp$$Register, base);
+ __ subs($tmp$$Register, $tmp$$Register, $oldval$$Register);
+ __ b(done, ne);
+ __ stxr($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ __ cmp($tmp$$Register, 0);
+ __ bind(done);
+#else
+ __ ldrex($tmp$$Register, $mem$$Address);
+ __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register);
+ __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq);
+ __ cmp($tmp$$Register, 1, eq);
+ __ b(loop, eq);
+ __ teq($tmp$$Register, 0);
+#endif
+ // used by biased locking only. Requires a membar.
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad), noreg);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
+
+#ifdef AARCH64
+// TODO: if combined with membar, elide membar and use
+// load-acquire/store-release if appropriate
+instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegL newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(24);
+ format %{ "loop:\n\t"
+ "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp, $oldval\n\t"
+ "B.ne done\n\t"
+ "STXR $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop\n\t"
+ "done:\n\t"
+ "CSET_w $res, eq" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ Label loop, done;
+ __ bind(loop);
+ __ ldxr($tmp$$Register, base);
+ __ cmp($tmp$$Register, $oldval$$Register);
+ __ b(done, ne);
+ __ stxr($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ __ bind(done);
+ __ cset_w($res$$Register, eq);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(24);
+ format %{ "loop:\n\t"
+ "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP_w $tmp, $oldval\n\t"
+ "B.ne done\n\t"
+ "STXR_w $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop\n\t"
+ "done:\n\t"
+ "CSET_w $res, eq" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ Label loop, done;
+ __ bind(loop);
+ __ ldxr_w($tmp$$Register, base);
+ __ cmp_w($tmp$$Register, $oldval$$Register);
+ __ b(done, ne);
+ __ stxr_w($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ __ bind(done);
+ __ cset_w($res$$Register, eq);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+// tmp must use iRegI instead of iRegN until 8051805 is fixed.
+instruct compareAndSwapN_bool(memoryex mem, iRegN oldval, iRegN newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(24);
+ format %{ "loop:\n\t"
+ "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP_w $tmp, $oldval\n\t"
+ "B.ne done\n\t"
+ "STXR_w $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop\n\t"
+ "done:\n\t"
+ "CSET_w $res, eq" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ Label loop, done;
+ __ bind(loop);
+ __ ldxr_w($tmp$$Register, base);
+ __ cmp_w($tmp$$Register, $oldval$$Register);
+ __ b(done, ne);
+ __ stxr_w($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ __ bind(done);
+ __ cset_w($res$$Register, eq);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(24);
+ format %{ "loop:\n\t"
+ "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp, $oldval\n\t"
+ "B.ne done\n\t"
+ "STXR $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop\n\t"
+ "done:\n\t"
+ "CSET_w $res, eq" %}
+ ins_encode %{
+ Register base = reg_to_register_object($mem$$base);
+ Label loop, done;
+ __ bind(loop);
+ __ ldxr($tmp$$Register, base);
+ __ cmp($tmp$$Register, $oldval$$Register);
+ __ b(done, ne);
+ __ stxr($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ __ bind(done);
+ __ cset_w($res$$Register, eq);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else // !AARCH64
+instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(32);
+ format %{ "loop: \n\t"
+ "LDREXD $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp.lo, $oldval.lo\n\t"
+ "CMP.eq $tmp.hi, $oldval.hi\n\t"
+ "STREXD.eq $tmp, $newval, $mem\n\t"
+ "MOV.ne $tmp, 0 \n\t"
+ "XORS.eq $tmp,$tmp, 1 \n\t"
+ "B.eq loop \n\t"
+ "MOV $res, $tmp" %}
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($tmp$$Register, $mem$$Address);
+ __ cmp($tmp$$Register, $oldval$$Register);
+ __ cmp($tmp$$Register->successor(), $oldval$$Register->successor(), eq);
+ __ strexd($tmp$$Register, $newval$$Register, $mem$$Address, eq);
+ __ mov($tmp$$Register, 0, ne);
+ __ eors($tmp$$Register, $tmp$$Register, 1, eq);
+ __ b(loop, eq);
+ __ mov($res$$Register, $tmp$$Register);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+
+instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(28);
+ format %{ "loop: \n\t"
+ "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp, $oldval\n\t"
+ "STREX.eq $tmp, $newval, $mem\n\t"
+ "MOV.ne $tmp, 0 \n\t"
+ "XORS.eq $tmp,$tmp, 1 \n\t"
+ "B.eq loop \n\t"
+ "MOV $res, $tmp" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp$$Register,$mem$$Address);
+ __ cmp($tmp$$Register, $oldval$$Register);
+ __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq);
+ __ mov($tmp$$Register, 0, ne);
+ __ eors($tmp$$Register, $tmp$$Register, 1, eq);
+ __ b(loop, eq);
+ __ mov($res$$Register, $tmp$$Register);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect( KILL ccr, TEMP tmp);
+ size(28);
+ format %{ "loop: \n\t"
+ "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp, $oldval\n\t"
+ "STREX.eq $tmp, $newval, $mem\n\t"
+ "MOV.ne $tmp, 0 \n\t"
+ "EORS.eq $tmp,$tmp, 1 \n\t"
+ "B.eq loop \n\t"
+ "MOV $res, $tmp" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp$$Register,$mem$$Address);
+ __ cmp($tmp$$Register, $oldval$$Register);
+ __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq);
+ __ mov($tmp$$Register, 0, ne);
+ __ eors($tmp$$Register, $tmp$$Register, 1, eq);
+ __ b(loop, eq);
+ __ mov($res$$Register, $tmp$$Register);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif // !AARCH64
+
+#ifdef AARCH64
+instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddI mem add));
+ effect(TEMP tmp1, TEMP tmp2);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR_w $tmp1, $mem\n\t"
+ "ADD_w $tmp1, $tmp1, $add\n\t"
+ "STXR_w $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($tmp1$$Register, base);
+ __ add_w($tmp1$$Register, $tmp1$$Register, $add$$constant);
+ __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddI mem add));
+ effect(KILL ccr, TEMP tmp1, TEMP tmp2);
+ size(20);
+ format %{ "loop: \n\t"
+ "LDREX $tmp1, $mem\n\t"
+ "ADD $tmp1, $tmp1, $add\n\t"
+ "STREX $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp1$$Register,$mem$$Address);
+ __ add($tmp1$$Register, $tmp1$$Register, $add$$constant);
+ __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddI mem add));
+ effect(TEMP tmp1, TEMP tmp2);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR_w $tmp1, $mem\n\t"
+ "ADD_w $tmp1, $tmp1, $add\n\t"
+ "STXR_w $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($tmp1$$Register, base);
+ __ add_w($tmp1$$Register, $tmp1$$Register, $add$$Register);
+ __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddI mem add));
+ effect(KILL ccr, TEMP tmp1, TEMP tmp2);
+ size(20);
+ format %{ "loop: \n\t"
+ "LDREX $tmp1, $mem\n\t"
+ "ADD $tmp1, $tmp1, $add\n\t"
+ "STREX $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp1$$Register,$mem$$Address);
+ __ add($tmp1$$Register, $tmp1$$Register, $add$$Register);
+ __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2) %{
+ match(Set res (GetAndAddI mem add));
+ effect(TEMP tmp1, TEMP tmp2, TEMP res);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR_w $res, $mem\n\t"
+ "ADD_w $tmp1, $res, $add\n\t"
+ "STXR_w $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($res$$Register, base);
+ __ add_w($tmp1$$Register, $res$$Register, $add$$constant);
+ __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+ match(Set res (GetAndAddI mem add));
+ effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+ size(20);
+ format %{ "loop: \n\t"
+ "LDREX $res, $mem\n\t"
+ "ADD $tmp1, $res, $add\n\t"
+ "STREX $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($res$$Register,$mem$$Address);
+ __ add($tmp1$$Register, $res$$Register, $add$$constant);
+ __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2) %{
+ match(Set res (GetAndAddI mem add));
+ effect(TEMP tmp1, TEMP tmp2, TEMP res);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR_w $res, $mem\n\t"
+ "ADD_w $tmp1, $res, $add\n\t"
+ "STXR_w $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($res$$Register, base);
+ __ add_w($tmp1$$Register, $res$$Register, $add$$Register);
+ __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+ match(Set res (GetAndAddI mem add));
+ effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+ size(20);
+ format %{ "loop: \n\t"
+ "LDREX $res, $mem\n\t"
+ "ADD $tmp1, $res, $add\n\t"
+ "STREX $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($res$$Register,$mem$$Address);
+ __ add($tmp1$$Register, $res$$Register, $add$$Register);
+ __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddL mem add));
+ effect(TEMP tmp1, TEMP tmp2);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR $tmp1, $mem\n\t"
+ "ADD $tmp1, $tmp1, $add\n\t"
+ "STXR $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr($tmp1$$Register, base);
+ __ add($tmp1$$Register, $tmp1$$Register, $add$$Register);
+ __ stxr($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddL mem add));
+ effect( KILL ccr, TEMP tmp1, TEMP tmp2);
+ size(24);
+ format %{ "loop: \n\t"
+ "LDREXD $tmp1, $mem\n\t"
+ "ADDS $tmp1.lo, $tmp1.lo, $add.lo\n\t"
+ "ADC $tmp1.hi, $tmp1.hi, $add.hi\n\t"
+ "STREXD $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($tmp1$$Register, $mem$$Address);
+ __ adds($tmp1$$Register, $tmp1$$Register, $add$$Register);
+ __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), $add$$Register->successor());
+ __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddL_imm_no_res(memoryex mem, aimmL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddL mem add));
+ effect(TEMP tmp1, TEMP tmp2);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR $tmp1, $mem\n\t"
+ "ADD $tmp1, $tmp1, $add\n\t"
+ "STXR $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr($tmp1$$Register, base);
+ __ add($tmp1$$Register, $tmp1$$Register, $add$$constant);
+ __ stxr($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+ predicate(n->as_LoadStore()->result_not_used());
+ match(Set dummy (GetAndAddL mem add));
+ effect( KILL ccr, TEMP tmp1, TEMP tmp2);
+ size(24);
+ format %{ "loop: \n\t"
+ "LDREXD $tmp1, $mem\n\t"
+ "ADDS $tmp1.lo, $tmp1.lo, $add\n\t"
+ "ADC $tmp1.hi, $tmp1.hi, 0\n\t"
+ "STREXD $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($tmp1$$Register, $mem$$Address);
+ __ adds($tmp1$$Register, $tmp1$$Register, $add$$constant);
+ __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), 0);
+ __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddL_reg(memoryex mem, iRegL add, iRegL res, iRegL tmp1, iRegI tmp2) %{
+ match(Set res (GetAndAddL mem add));
+ effect(TEMP tmp1, TEMP tmp2, TEMP res);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR $res, $mem\n\t"
+ "ADD $tmp1, $res, $add\n\t"
+ "STXR $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr($res$$Register, base);
+ __ add($tmp1$$Register, $res$$Register, $add$$Register);
+ __ stxr($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+ match(Set res (GetAndAddL mem add));
+ effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+ size(24);
+ format %{ "loop: \n\t"
+ "LDREXD $res, $mem\n\t"
+ "ADDS $tmp1.lo, $res.lo, $add.lo\n\t"
+ "ADC $tmp1.hi, $res.hi, $add.hi\n\t"
+ "STREXD $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($res$$Register, $mem$$Address);
+ __ adds($tmp1$$Register, $res$$Register, $add$$Register);
+ __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor());
+ __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xaddL_imm(memoryex mem, aimmL add, iRegL res, iRegL tmp1, iRegI tmp2) %{
+ match(Set res (GetAndAddL mem add));
+ effect(TEMP tmp1, TEMP tmp2, TEMP res);
+ size(16);
+ format %{ "loop:\n\t"
+ "LDXR $res, $mem\n\t"
+ "ADD $tmp1, $res, $add\n\t"
+ "STXR $tmp2, $tmp1, $mem\n\t"
+ "CBNZ_w $tmp2, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr($res$$Register, base);
+ __ add($tmp1$$Register, $res$$Register, $add$$constant);
+ __ stxr($tmp2$$Register, $tmp1$$Register, base);
+ __ cbnz_w($tmp2$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+ match(Set res (GetAndAddL mem add));
+ effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+ size(24);
+ format %{ "loop: \n\t"
+ "LDREXD $res, $mem\n\t"
+ "ADDS $tmp1.lo, $res.lo, $add\n\t"
+ "ADC $tmp1.hi, $res.hi, 0\n\t"
+ "STREXD $tmp2, $tmp1, $mem\n\t"
+ "CMP $tmp2, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($res$$Register, $mem$$Address);
+ __ adds($tmp1$$Register, $res$$Register, $add$$constant);
+ __ adc($tmp1$$Register->successor(), $res$$Register->successor(), 0);
+ __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+ __ cmp($tmp2$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp) %{
+ match(Set res (GetAndSetI mem newval));
+ effect(TEMP tmp, TEMP res);
+ size(12);
+ format %{ "loop:\n\t"
+ "LDXR_w $res, $mem\n\t"
+ "STXR_w $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($res$$Register, base);
+ __ stxr_w($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+
+#ifdef XXX
+// Disabled until 8051805 is fixed.
+instruct xchgN(memoryex mem, iRegN newval, iRegN res, iRegN tmp) %{
+ match(Set res (GetAndSetN mem newval));
+ effect(TEMP tmp, TEMP res);
+ size(12);
+ format %{ "loop:\n\t"
+ "LDXR_w $res, $mem\n\t"
+ "STXR_w $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr_w($res$$Register, base);
+ __ stxr_w($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+#else
+instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (GetAndSetI mem newval));
+ effect(KILL ccr, TEMP tmp, TEMP res);
+ size(16);
+ format %{ "loop: \n\t"
+ "LDREX $res, $mem\n\t"
+ "STREX $tmp, $newval, $mem\n\t"
+ "CMP $tmp, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($res$$Register,$mem$$Address);
+ __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
+ __ cmp($tmp$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif
+
+#ifdef AARCH64
+instruct xchgL(memoryex mem, iRegL newval, iRegL res, iRegI tmp) %{
+ match(Set res (GetAndSetL mem newval));
+ effect(TEMP tmp, TEMP res);
+ size(12);
+ format %{ "loop:\n\t"
+ "LDXR $res, $mem\n\t"
+ "STXR $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldxr($res$$Register, base);
+ __ stxr($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (GetAndSetL mem newval));
+ effect( KILL ccr, TEMP tmp, TEMP res);
+ size(16);
+ format %{ "loop: \n\t"
+ "LDREXD $res, $mem\n\t"
+ "STREXD $tmp, $newval, $mem\n\t"
+ "CMP $tmp, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrexd($res$$Register, $mem$$Address);
+ __ strexd($tmp$$Register, $newval$$Register, $mem$$Address);
+ __ cmp($tmp$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif // !AARCH64
+
+#ifdef AARCH64
+instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp) %{
+ match(Set res (GetAndSetP mem newval));
+ effect(TEMP tmp, TEMP res);
+ size(12);
+ format %{ "loop:\n\t"
+ "LDREX $res, $mem\n\t"
+ "STREX $tmp, $newval, $mem\n\t"
+ "CBNZ_w $tmp, loop" %}
+
+ ins_encode %{
+ Label loop;
+ Register base = reg_to_register_object($mem$$base);
+ __ bind(loop);
+ __ ldrex($res$$Register, base);
+ __ strex($tmp$$Register, $newval$$Register, base);
+ __ cbnz_w($tmp$$Register, loop);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#else
+instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
+ match(Set res (GetAndSetP mem newval));
+ effect(KILL ccr, TEMP tmp, TEMP res);
+ size(16);
+ format %{ "loop: \n\t"
+ "LDREX $res, $mem\n\t"
+ "STREX $tmp, $newval, $mem\n\t"
+ "CMP $tmp, 0 \n\t"
+ "B.ne loop \n\t" %}
+
+ ins_encode %{
+ Label loop;
+ __ bind(loop);
+ __ ldrex($res$$Register,$mem$$Address);
+ __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
+ __ cmp($tmp$$Register, 0);
+ __ b(loop, ne);
+ %}
+ ins_pipe( long_memory_op );
+%}
+#endif // !AARCH64
+
+//---------------------
+// Subtraction Instructions
+// Register Subtraction
+instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (SubI src1 src2));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "SUB $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,$src2<<$src3\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "SUB $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,$src2>>$src3\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "SUB $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,$src2>>>$src3\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI (LShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1<<$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (SubI (LShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1<<$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI (RShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1>>$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (SubI (RShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1>>$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI (URShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1>>>$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+ match(Set dst (SubI (URShiftI src1 src2) src3));
+
+ size(4);
+ format %{ "RSB $dst,$src3,$src1>>>$src2" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+// Immediate Subtraction
+instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
+ match(Set dst (SubI src1 src2));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{
+ match(Set dst (AddI src1 src2));
+
+ size(4);
+ format %{ "sub_32 $dst,$src1,-($src2)\t! int" %}
+ ins_encode %{
+ __ sub_32($dst$$Register, $src1$$Register, -$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+#ifndef AARCH64
+instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{
+ match(Set dst (SubI src1 src2));
+
+ size(4);
+ format %{ "RSB $dst,$src2,src1" %}
+ ins_encode %{
+ __ rsb($dst$$Register, $src2$$Register, $src1$$constant);
+ %}
+ ins_pipe(ialu_zero_reg);
+%}
+#endif
+
+// Register Subtraction
+#ifdef AARCH64
+instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (SubL src1 src2));
+
+ size(4);
+ format %{ "SUB $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#else
+instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{
+ match(Set dst (SubL src1 src2));
+ effect (KILL icc);
+
+ size(8);
+ format %{ "SUBS $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+ "SBC $dst.hi,$src1.hi,$src2.hi" %}
+ ins_encode %{
+ __ subs($dst$$Register, $src1$$Register, $src2$$Register);
+ __ sbc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+#ifdef AARCH64
+// Immediate Subtraction
+instruct subL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{
+ match(Set dst (SubL src1 src2));
+
+ size(4);
+ format %{ "SUB $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct subL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{
+ match(Set dst (AddL src1 src2));
+
+ size(4);
+ format %{ "SUB $dst,$src1,-($src2)\t! long" %}
+ ins_encode %{
+ __ sub($dst$$Register, $src1$$Register, -$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// TODO
+#endif
+
+#ifndef AARCH64
+// Immediate Subtraction
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct subL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg icc) %{
+ match(Set dst (SubL src1 con));
+ effect (KILL icc);
+
+ size(8);
+ format %{ "SUB $dst.lo,$src1.lo,$con\t! long\n\t"
+ "SBC $dst.hi,$src1.hi,0" %}
+ ins_encode %{
+ __ subs($dst$$Register, $src1$$Register, $con$$constant);
+ __ sbc($dst$$Register->successor(), $src1$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Long negation
+instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{
+ match(Set dst (SubL zero src2));
+ effect (KILL icc);
+
+ size(8);
+ format %{ "RSBS $dst.lo,$src2.lo,0\t! long\n\t"
+ "RSC $dst.hi,$src2.hi,0" %}
+ ins_encode %{
+ __ rsbs($dst$$Register, $src2$$Register, 0);
+ __ rsc($dst$$Register->successor(), $src2$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_zero_reg);
+%}
+#endif // !AARCH64
+
+// Multiplication Instructions
+// Integer Multiplication
+// Register Multiplication
+instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (MulI src1 src2));
+
+ size(4);
+ format %{ "mul_32 $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mul_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(imul_reg_reg);
+%}
+
+#ifdef AARCH64
+instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (MulL src1 src2));
+ size(4);
+ format %{ "MUL $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ mul($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(imul_reg_reg);
+%}
+#else
+instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{
+ effect(DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "MUL $dst.hi,$src1.lo,$src2.hi\t! long" %}
+ ins_encode %{
+ __ mul($dst$$Register->successor(), $src1$$Register, $src2$$Register->successor());
+ %}
+ ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_hi1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(8);
+ format %{ "MLA $dst.hi,$src1.hi,$src2.lo,$dst.hi\t! long\n\t"
+ "MOV $dst.lo, 0"%}
+ ins_encode %{
+ __ mla($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register, $dst$$Register->successor());
+ __ mov($dst$$Register, 0);
+ %}
+ ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_lo1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "UMLAL $dst.lo,$dst.hi,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (MulL src1 src2));
+
+ expand %{
+ mulL_lo1_hi2(dst, src1, src2);
+ mulL_hi1_lo2(dst, src1, src2);
+ mulL_lo1_lo2(dst, src1, src2);
+ %}
+%}
+#endif // !AARCH64
+
+// Integer Division
+// Register Division
+#ifdef AARCH64
+instruct divI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (DivI src1 src2));
+
+ size(4);
+ format %{ "SDIV $dst,$src1,$src2\t! 32-bit" %}
+ ins_encode %{
+ __ sdiv_w($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+#else
+instruct divI_reg_reg(R1RegI dst, R0RegI src1, R2RegI src2, LRRegP lr, flagsReg ccr) %{
+ match(Set dst (DivI src1 src2));
+ effect( KILL ccr, KILL src1, KILL src2, KILL lr);
+ ins_cost((2+71)*DEFAULT_COST);
+
+ format %{ "DIV $dst,$src1,$src2 ! call to StubRoutines::Arm::idiv_irem_entry()" %}
+ ins_encode %{
+ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type);
+ %}
+ ins_pipe(sdiv_reg_reg);
+%}
+#endif
+
+// Register Long Division
+#ifdef AARCH64
+instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (DivL src1 src2));
+
+ size(4);
+ format %{ "SDIV $dst,$src1,$src2" %}
+ ins_encode %{
+ __ sdiv($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+#else
+instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
+ match(Set dst (DivL src1 src2));
+ effect(CALL);
+ ins_cost(DEFAULT_COST*71);
+ format %{ "DIVL $src1,$src2,$dst\t! long ! call to SharedRuntime::ldiv" %}
+ ins_encode %{
+ address target = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
+ __ call(target, relocInfo::runtime_call_type);
+ %}
+ ins_pipe(divL_reg_reg);
+%}
+#endif
+
+// Integer Remainder
+// Register Remainder
+#ifdef AARCH64
+#ifdef TODO
+instruct msubI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (SubI src1 (MulI src2 src3)));
+
+ size(4);
+ format %{ "MSUB $dst,$src2,$src3,$src1\t! 32-bit\n\t" %}
+ ins_encode %{
+ __ msub_w($dst$$Register, $src2$$Register, $src3$$Register, $src1$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+#endif
+
+instruct modI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{
+ match(Set dst (ModI src1 src2));
+ effect(TEMP temp);
+
+ size(8);
+ format %{ "SDIV $temp,$src1,$src2\t! 32-bit\n\t"
+ "MSUB $dst,$src2,$temp,$src1\t! 32-bit\n\t" %}
+ ins_encode %{
+ __ sdiv_w($temp$$Register, $src1$$Register, $src2$$Register);
+ __ msub_w($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+#else
+instruct modI_reg_reg(R0RegI dst, R0RegI src1, R2RegI src2, R1RegI temp, LRRegP lr, flagsReg ccr ) %{
+ match(Set dst (ModI src1 src2));
+ effect( KILL ccr, KILL temp, KILL src2, KILL lr);
+
+ format %{ "MODI $dst,$src1,$src2\t ! call to StubRoutines::Arm::idiv_irem_entry" %}
+ ins_encode %{
+ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type);
+ %}
+ ins_pipe(sdiv_reg_reg);
+%}
+#endif
+
+// Register Long Remainder
+#ifdef AARCH64
+instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2, iRegL temp) %{
+ match(Set dst (ModL src1 src2));
+ effect(TEMP temp);
+
+ size(8);
+ format %{ "SDIV $temp,$src1,$src2\n\t"
+ "MSUB $dst,$src2,$temp,$src1" %}
+ ins_encode %{
+ __ sdiv($temp$$Register, $src1$$Register, $src2$$Register);
+ __ msub($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+#else
+instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
+ match(Set dst (ModL src1 src2));
+ effect(CALL);
+ ins_cost(MEMORY_REF_COST); // FIXME
+ format %{ "modL $dst,$src1,$src2\t ! call to SharedRuntime::lrem" %}
+ ins_encode %{
+ address target = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
+ __ call(target, relocInfo::runtime_call_type);
+ %}
+ ins_pipe(divL_reg_reg);
+%}
+#endif
+
+// Integer Shift Instructions
+
+// Register Shift Left
+instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (LShiftI src1 src2));
+
+ size(4);
+#ifdef AARCH64
+ format %{ "LSLV $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ lslv_w($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ format %{ "LSL $dst,$src1,$src2 \n\t" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register));
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Register Shift Left Immediate
+instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+ match(Set dst (LShiftI src1 src2));
+
+ size(4);
+#ifdef AARCH64
+ format %{ "LSL_w $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ _lsl($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+#else
+ format %{ "LSL $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+#endif
+ ins_pipe(ialu_reg_imm);
+%}
+
+#ifndef AARCH64
+instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{"OR $dst.hi,$dst.hi,($src1.hi << $src2)" %}
+ ins_encode %{
+ __ orr($dst$$Register->successor(), $dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsl, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shlL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "LSL $dst.lo,$src1.lo,$src2 \n\t" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+ effect(DEF dst, USE src1, USE src2, KILL ccr);
+ size(16);
+ format %{ "SUBS $dst.hi,$src2,32 \n\t"
+ "LSLpl $dst.hi,$src1.lo,$dst.hi \n\t"
+ "RSBmi $dst.hi,$dst.hi,0 \n\t"
+ "LSRmi $dst.hi,$src1.lo,$dst.hi" %}
+
+ ins_encode %{
+ // $src1$$Register and $dst$$Register->successor() can't be the same
+ __ subs($dst$$Register->successor(), $src2$$Register, 32);
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsl, $dst$$Register->successor()), pl);
+ __ rsb($dst$$Register->successor(), $dst$$Register->successor(), 0, mi);
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsr, $dst$$Register->successor()), mi);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif // !AARCH64
+
+instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LSLV $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ lslv($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+#else
+ expand %{
+ flagsReg ccr;
+ shlL_reg_reg_overlap(dst, src1, src2, ccr);
+ shlL_reg_reg_merge_hi(dst, src1, src2);
+ shlL_reg_reg_merge_lo(dst, src1, src2);
+ %}
+#endif
+%}
+
+#ifdef AARCH64
+instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+ size(4);
+ format %{ "LSL $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// Register Shift Left Immediate
+instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+ size(8);
+ format %{ "LSL $dst.hi,$src1.lo,$src2-32\t! or mov if $src2==32\n\t"
+ "MOV $dst.lo, 0" %}
+ ins_encode %{
+ if ($src2$$constant == 32) {
+ __ mov($dst$$Register->successor(), $src1$$Register);
+ } else {
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsl, $src2$$constant-32));
+ }
+ __ mov($dst$$Register, 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+ size(12);
+ format %{ "LSL $dst.hi,$src1.lo,$src2\n\t"
+ "OR $dst.hi, $dst.hi, $src1.lo >> 32-$src2\n\t"
+ "LSL $dst.lo,$src1.lo,$src2" %}
+ ins_encode %{
+ // The order of the following 3 instructions matters: src1.lo and
+ // dst.hi can't overlap but src.hi and dst.hi can.
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsl, $src2$$constant));
+ __ orr($dst$$Register->successor(), $dst$$Register->successor(), AsmOperand($src1$$Register, lsr, 32-$src2$$constant));
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif // !AARCH64
+
+// Register Arithmetic Shift Right
+instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (RShiftI src1 src2));
+ size(4);
+#ifdef AARCH64
+ format %{ "ASRV $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ asrv_w($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ format %{ "ASR $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$Register));
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Register Arithmetic Shift Right Immediate
+instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+ match(Set dst (RShiftI src1 src2));
+
+ size(4);
+#ifdef AARCH64
+ format %{ "ASR_w $dst,$src1,$src2" %}
+ ins_encode %{
+ __ _asr_w($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+#else
+ format %{ "ASR $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$constant));
+ %}
+#endif
+ ins_pipe(ialu_reg_imm);
+%}
+
+#ifndef AARCH64
+// Register Shift Right Arithmetic Long
+instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "OR $dst.lo,$dst.lo,($src1.lo >> $src2)" %}
+ ins_encode %{
+ __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct sarL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "ASR $dst.hi,$src1.hi,$src2 \n\t" %}
+ ins_encode %{
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+ effect(DEF dst, USE src1, USE src2, KILL ccr);
+ size(16);
+ format %{ "SUBS $dst.lo,$src2,32 \n\t"
+ "ASRpl $dst.lo,$src1.hi,$dst.lo \n\t"
+ "RSBmi $dst.lo,$dst.lo,0 \n\t"
+ "LSLmi $dst.lo,$src1.hi,$dst.lo" %}
+
+ ins_encode %{
+ // $src1$$Register->successor() and $dst$$Register can't be the same
+ __ subs($dst$$Register, $src2$$Register, 32);
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), asr, $dst$$Register), pl);
+ __ rsb($dst$$Register, $dst$$Register, 0, mi);
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsl, $dst$$Register), mi);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif // !AARCH64
+
+instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+ match(Set dst (RShiftL src1 src2));
+
+#ifdef AARCH64
+ size(4);
+ format %{ "ASRV $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ asrv($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+#else
+ expand %{
+ flagsReg ccr;
+ sarL_reg_reg_overlap(dst, src1, src2, ccr);
+ sarL_reg_reg_merge_lo(dst, src1, src2);
+ sarL_reg_reg_merge_hi(dst, src1, src2);
+ %}
+#endif
+%}
+
+// Register Shift Left Immediate
+#ifdef AARCH64
+instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
+ match(Set dst (RShiftL src1 src2));
+
+ size(4);
+ format %{ "ASR $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ _asr($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+ match(Set dst (RShiftL src1 src2));
+
+ size(8);
+ format %{ "ASR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
+ "ASR $dst.hi,$src1.hi, $src2" %}
+ ins_encode %{
+ if ($src2$$constant == 32) {
+ __ mov($dst$$Register, $src1$$Register->successor());
+ } else{
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), asr, $src2$$constant-32));
+ }
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, 0));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+ match(Set dst (RShiftL src1 src2));
+ size(12);
+ format %{ "LSR $dst.lo,$src1.lo,$src2\n\t"
+ "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
+ "ASR $dst.hi,$src1.hi,$src2" %}
+ ins_encode %{
+ // The order of the following 3 instructions matters: src1.lo and
+ // dst.hi can't overlap but src.hi and dst.hi can.
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
+ __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register->successor(), lsl, 32-$src2$$constant));
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+// Register Shift Right
+instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (URShiftI src1 src2));
+ size(4);
+#ifdef AARCH64
+ format %{ "LSRV $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ lsrv_w($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ format %{ "LSR $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Register Shift Right Immediate
+instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+ match(Set dst (URShiftI src1 src2));
+
+ size(4);
+#ifdef AARCH64
+ format %{ "LSR_w $dst,$src1,$src2" %}
+ ins_encode %{
+ __ _lsr_w($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+#else
+ format %{ "LSR $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
+ %}
+#endif
+ ins_pipe(ialu_reg_imm);
+%}
+
+#ifndef AARCH64
+// Register Shift Right
+instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "OR $dst.lo,$dst,($src1.lo >>> $src2)" %}
+ ins_encode %{
+ __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shrL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+ effect(USE_DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "LSR $dst.hi,$src1.hi,$src2 \n\t" %}
+ ins_encode %{
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsr, $src2$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+ effect(DEF dst, USE src1, USE src2, KILL ccr);
+ size(16);
+ format %{ "SUBS $dst,$src2,32 \n\t"
+ "LSRpl $dst,$src1.hi,$dst \n\t"
+ "RSBmi $dst,$dst,0 \n\t"
+ "LSLmi $dst,$src1.hi,$dst" %}
+
+ ins_encode %{
+ // $src1$$Register->successor() and $dst$$Register can't be the same
+ __ subs($dst$$Register, $src2$$Register, 32);
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsr, $dst$$Register), pl);
+ __ rsb($dst$$Register, $dst$$Register, 0, mi);
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsl, $dst$$Register), mi);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif // !AARCH64
+
+instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+#ifdef AARCH64
+ size(4);
+ format %{ "LSRV $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ lsrv($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+#else
+ expand %{
+ flagsReg ccr;
+ shrL_reg_reg_overlap(dst, src1, src2, ccr);
+ shrL_reg_reg_merge_lo(dst, src1, src2);
+ shrL_reg_reg_merge_hi(dst, src1, src2);
+ %}
+#endif
+%}
+
+// Register Shift Right Immediate
+#ifdef AARCH64
+instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+ size(4);
+ format %{ "LSR $dst,$src1,$src2" %}
+ ins_encode %{
+ __ _lsr($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+ size(8);
+ format %{ "LSR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
+ "MOV $dst.hi, 0" %}
+ ins_encode %{
+ if ($src2$$constant == 32) {
+ __ mov($dst$$Register, $src1$$Register->successor());
+ } else {
+ __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsr, $src2$$constant-32));
+ }
+ __ mov($dst$$Register->successor(), 0);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+ size(12);
+ format %{ "LSR $dst.lo,$src1.lo,$src2\n\t"
+ "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
+ "LSR $dst.hi,$src1.hi,$src2" %}
+ ins_encode %{
+ // The order of the following 3 instructions matters: src1.lo and
+ // dst.hi can't overlap but src.hi and dst.hi can.
+ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
+ __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register->successor(), lsl, 32-$src2$$constant));
+ __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsr, $src2$$constant));
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif // !AARCH64
+
+
+instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{
+ match(Set dst (URShiftI (CastP2X src1) src2));
+ size(4);
+ format %{ "LSR $dst,$src1,$src2\t! Cast ptr $src1 to int and shift" %}
+ ins_encode %{
+ __ logical_shift_right($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+//----------Floating Point Arithmetic Instructions-----------------------------
+
+// Add float single precision
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
+ match(Set dst (AddF src1 src2));
+
+ size(4);
+ format %{ "FADDS $dst,$src1,$src2" %}
+ ins_encode %{
+ __ add_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(faddF_reg_reg);
+%}
+
+// Add float double precision
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
+ match(Set dst (AddD src1 src2));
+
+ size(4);
+ format %{ "FADDD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ add_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(faddD_reg_reg);
+%}
+
+// Sub float single precision
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
+ match(Set dst (SubF src1 src2));
+
+ size(4);
+ format %{ "FSUBS $dst,$src1,$src2" %}
+ ins_encode %{
+ __ sub_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(faddF_reg_reg);
+%}
+
+// Sub float double precision
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
+ match(Set dst (SubD src1 src2));
+
+ size(4);
+ format %{ "FSUBD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ sub_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg_reg);
+%}
+
+// Mul float single precision
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
+ match(Set dst (MulF src1 src2));
+
+ size(4);
+ format %{ "FMULS $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mul_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(fmulF_reg_reg);
+%}
+
+// Mul float double precision
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
+ match(Set dst (MulD src1 src2));
+
+ size(4);
+ format %{ "FMULD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mul_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(fmulD_reg_reg);
+%}
+
+// Div float single precision
+instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
+ match(Set dst (DivF src1 src2));
+
+ size(4);
+ format %{ "FDIVS $dst,$src1,$src2" %}
+ ins_encode %{
+ __ div_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(fdivF_reg_reg);
+%}
+
+// Div float double precision
+instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
+ match(Set dst (DivD src1 src2));
+
+ size(4);
+ format %{ "FDIVD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ div_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+
+ ins_pipe(fdivD_reg_reg);
+%}
+
+// Absolute float double precision
+instruct absD_reg(regD dst, regD src) %{
+ match(Set dst (AbsD src));
+
+ size(4);
+ format %{ "FABSd $dst,$src" %}
+ ins_encode %{
+ __ abs_double($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg);
+%}
+
+// Absolute float single precision
+instruct absF_reg(regF dst, regF src) %{
+ match(Set dst (AbsF src));
+ format %{ "FABSs $dst,$src" %}
+ ins_encode %{
+ __ abs_float($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(faddF_reg);
+%}
+
+instruct negF_reg(regF dst, regF src) %{
+ match(Set dst (NegF src));
+
+ size(4);
+ format %{ "FNEGs $dst,$src" %}
+ ins_encode %{
+ __ neg_float($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(faddF_reg);
+%}
+
+instruct negD_reg(regD dst, regD src) %{
+ match(Set dst (NegD src));
+
+ format %{ "FNEGd $dst,$src" %}
+ ins_encode %{
+ __ neg_double($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg);
+%}
+
+// Sqrt float double precision
+instruct sqrtF_reg_reg(regF dst, regF src) %{
+ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+ size(4);
+ format %{ "FSQRTS $dst,$src" %}
+ ins_encode %{
+ __ sqrt_float($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(fdivF_reg_reg);
+%}
+
+// Sqrt float double precision
+instruct sqrtD_reg_reg(regD dst, regD src) %{
+ match(Set dst (SqrtD src));
+
+ size(4);
+ format %{ "FSQRTD $dst,$src" %}
+ ins_encode %{
+ __ sqrt_double($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(fdivD_reg_reg);
+%}
+
+//----------Logical Instructions-----------------------------------------------
+// And Instructions
+// Register And
+instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (AndI src1 src2));
+
+ size(4);
+ format %{ "and_32 $dst,$src1,$src2" %}
+ ins_encode %{
+ __ and_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "AND $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "and_32 $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "AND $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "and_32 $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "AND $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "and_32 $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And
+instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
+ match(Set dst (AndI src1 src2));
+
+ size(4);
+ format %{ "and_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ and_32($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+#ifndef AARCH64
+instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{
+ match(Set dst (AndI src1 src2));
+
+ size(4);
+ format %{ "bic $dst,$src1,~$src2\t! int" %}
+ ins_encode %{
+ __ bic($dst$$Register, $src1$$Register, ~$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+// Register And Long
+instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (AndL src1 src2));
+
+ ins_cost(DEFAULT_COST);
+#ifdef AARCH64
+ size(4);
+ format %{ "AND $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ size(8);
+ format %{ "AND $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, $src2$$Register);
+ __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifdef AARCH64
+// Immediate And
+instruct andL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{
+ match(Set dst (AndL src1 src2));
+
+ size(4);
+ format %{ "AND $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, (uintx)$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+ match(Set dst (AndL src1 con));
+ ins_cost(DEFAULT_COST);
+ size(8);
+ format %{ "AND $dst,$src1,$con\t! long" %}
+ ins_encode %{
+ __ andr($dst$$Register, $src1$$Register, $con$$constant);
+ __ andr($dst$$Register->successor(), $src1$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+// Or Instructions
+// Register Or
+instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (OrI src1 src2));
+
+ size(4);
+ format %{ "orr_32 $dst,$src1,$src2\t! int" %}
+ ins_encode %{
+ __ orr_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "OR $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "orr_32 $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "OR $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "orr_32 $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "OR $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "orr_32 $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or
+instruct orI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
+ match(Set dst (OrI src1 src2));
+
+ size(4);
+ format %{ "orr_32 $dst,$src1,$src2" %}
+ ins_encode %{
+ __ orr_32($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+// TODO: orn_32 with limmIn
+
+// Register Or Long
+instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (OrL src1 src2));
+
+ ins_cost(DEFAULT_COST);
+#ifdef AARCH64
+ size(4);
+ format %{ "OR $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ size(8);
+ format %{ "OR $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+ "OR $dst.hi,$src1.hi,$src2.hi" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+ __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifdef AARCH64
+instruct orL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{
+ match(Set dst (OrL src1 src2));
+
+ size(4);
+ format %{ "ORR $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, (uintx)$src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+ match(Set dst (OrL src1 con));
+ ins_cost(DEFAULT_COST);
+ size(8);
+ format %{ "OR $dst.lo,$src1.lo,$con\t! long\n\t"
+ "OR $dst.hi,$src1.hi,$con" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, $con$$constant);
+ __ orr($dst$$Register->successor(), $src1$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+#ifdef TODO
+// Use SPRegP to match Rthread (TLS register) without spilling.
+// Use store_ptr_RegP to match Rthread (TLS register) without spilling.
+// Use sp_ptr_RegP to match Rthread (TLS register) without spilling.
+instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{
+ match(Set dst (OrI src1 (CastP2X src2)));
+ size(4);
+ format %{ "OR $dst,$src1,$src2" %}
+ ins_encode %{
+ __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+// Xor Instructions
+// Register Xor
+instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+ match(Set dst (XorI src1 src2));
+
+ size(4);
+ format %{ "eor_32 $dst,$src1,$src2" %}
+ ins_encode %{
+ __ eor_32($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "XOR $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+ size(4);
+ format %{ "eor_32 $dst,$src1,$src2<<$src3" %}
+ ins_encode %{
+ __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "XOR $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+ size(4);
+ format %{ "eor_32 $dst,$src1,$src2>>$src3" %}
+ ins_encode %{
+ __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifndef AARCH64
+instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+ match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "XOR $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+ match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+ size(4);
+ format %{ "eor_32 $dst,$src1,$src2>>>$src3" %}
+ ins_encode %{
+ __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor
+instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{
+ match(Set dst (XorI src1 src2));
+
+ size(4);
+ format %{ "eor_32 $dst,$src1,$src2" %}
+ ins_encode %{
+ __ eor_32($dst$$Register, $src1$$Register, $src2$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+ match(Set dst (XorL src1 src2));
+ ins_cost(DEFAULT_COST);
+#ifdef AARCH64
+ size(4);
+ format %{ "XOR $dst,$src1,$src2\t! long" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+#else
+ size(8);
+ format %{ "XOR $dst.hi,$src1.hi,$src2.hi\t! long\n\t"
+ "XOR $dst.lo,$src1.lo,$src2.lo\t! long" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, $src2$$Register);
+ __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+#ifdef AARCH64
+instruct xorL_reg_limmL(iRegL dst, iRegL src1, limmL con) %{
+ match(Set dst (XorL src1 con));
+ ins_cost(DEFAULT_COST);
+ size(4);
+ format %{ "EOR $dst,$src1,$con\t! long" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, (uintx)$con$$constant);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#else
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+ match(Set dst (XorL src1 con));
+ ins_cost(DEFAULT_COST);
+ size(8);
+ format %{ "XOR $dst.hi,$src1.hi,$con\t! long\n\t"
+ "XOR $dst.lo,$src1.lo,0\t! long" %}
+ ins_encode %{
+ __ eor($dst$$Register, $src1$$Register, $con$$constant);
+ __ eor($dst$$Register->successor(), $src1$$Register->successor(), 0);
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif // AARCH64
+
+//----------Convert to Boolean-------------------------------------------------
+instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
+ match(Set dst (Conv2B src));
+ effect(KILL ccr);
+#ifdef AARCH64
+ size(8);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "cmp_32 $src,ZR\n\t"
+ "cset_w $dst, ne" %}
+ ins_encode %{
+ __ cmp_32($src$$Register, ZR);
+ __ cset_w($dst$$Register, ne);
+ %}
+#else
+ size(12);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "TST $src,$src \n\t"
+ "MOV $dst, 0 \n\t"
+ "MOV.ne $dst, 1" %}
+ ins_encode %{ // FIXME: can do better?
+ __ tst($src$$Register, $src$$Register);
+ __ mov($dst$$Register, 0);
+ __ mov($dst$$Register, 1, ne);
+ %}
+#endif
+ ins_pipe(ialu_reg_ialu);
+%}
+
+instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
+ match(Set dst (Conv2B src));
+ effect(KILL ccr);
+#ifdef AARCH64
+ size(8);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "CMP $src,ZR\n\t"
+ "cset $dst, ne" %}
+ ins_encode %{
+ __ cmp($src$$Register, ZR);
+ __ cset($dst$$Register, ne);
+ %}
+#else
+ size(12);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "TST $src,$src \n\t"
+ "MOV $dst, 0 \n\t"
+ "MOV.ne $dst, 1" %}
+ ins_encode %{
+ __ tst($src$$Register, $src$$Register);
+ __ mov($dst$$Register, 0);
+ __ mov($dst$$Register, 1, ne);
+ %}
+#endif
+ ins_pipe(ialu_reg_ialu);
+%}
+
+instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
+ match(Set dst (CmpLTMask p q));
+ effect( KILL ccr );
+#ifdef AARCH64
+ size(8);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "CMP_w $p,$q\n\t"
+ "CSETM_w $dst, lt" %}
+ ins_encode %{
+ __ cmp_w($p$$Register, $q$$Register);
+ __ csetm_w($dst$$Register, lt);
+ %}
+#else
+ ins_cost(DEFAULT_COST*3);
+ format %{ "CMP $p,$q\n\t"
+ "MOV $dst, #0\n\t"
+ "MOV.lt $dst, #-1" %}
+ ins_encode %{
+ __ cmp($p$$Register, $q$$Register);
+ __ mov($dst$$Register, 0);
+ __ mvn($dst$$Register, 0, lt);
+ %}
+#endif
+ ins_pipe(ialu_reg_reg_ialu);
+%}
+
+instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{
+ match(Set dst (CmpLTMask p q));
+ effect( KILL ccr );
+#ifdef AARCH64
+ size(8);
+ ins_cost(DEFAULT_COST*2);
+ format %{ "CMP_w $p,$q\n\t"
+ "CSETM_w $dst, lt" %}
+ ins_encode %{
+ __ cmp_w($p$$Register, $q$$constant);
+ __ csetm_w($dst$$Register, lt);
+ %}
+#else
+ ins_cost(DEFAULT_COST*3);
+ format %{ "CMP $p,$q\n\t"
+ "MOV $dst, #0\n\t"
+ "MOV.lt $dst, #-1" %}
+ ins_encode %{
+ __ cmp($p$$Register, $q$$constant);
+ __ mov($dst$$Register, 0);
+ __ mvn($dst$$Register, 0, lt);
+ %}
+#endif
+ ins_pipe(ialu_reg_reg_ialu);
+%}
+
+#ifdef AARCH64
+instruct cadd_cmpLTMask3( iRegI dst, iRegI p, iRegI q, iRegI y, iRegI x, flagsReg ccr ) %{
+ match(Set dst (AddI (AndI (CmpLTMask p q) y) x));
+ effect( TEMP dst, KILL ccr );
+ size(12);
+ ins_cost(DEFAULT_COST*3);
+ format %{ "CMP_w $p,$q\n\t"
+ "ADD_w $dst,$y,$x\n\t"
+ "CSEL_w $dst,$dst,$x,lt" %}
+ ins_encode %{
+ __ cmp_w($p$$Register, $q$$Register);
+ __ add_w($dst$$Register, $y$$Register, $x$$Register);
+ __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask );
+%}
+#else
+instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{
+ match(Set z (AddI (AndI (CmpLTMask p q) y) z));
+ effect( KILL ccr );
+ ins_cost(DEFAULT_COST*2);
+ format %{ "CMP $p,$q\n\t"
+ "ADD.lt $z,$y,$z" %}
+ ins_encode %{
+ __ cmp($p$$Register, $q$$Register);
+ __ add($z$$Register, $y$$Register, $z$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask );
+%}
+#endif
+
+#ifdef AARCH64
+instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI x, flagsReg ccr ) %{
+ match(Set dst (AddI (AndI (CmpLTMask p q) y) x));
+ effect( TEMP dst, KILL ccr );
+ size(12);
+ ins_cost(DEFAULT_COST*3);
+ format %{ "CMP_w $p,$q\n\t"
+ "ADD_w $dst,$y,$x\n\t"
+ "CSEL_w $dst,$dst,$x,lt" %}
+ ins_encode %{
+ __ cmp_w($p$$Register, $q$$constant);
+ __ add_w($dst$$Register, $y$$Register, $x$$Register);
+ __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask );
+%}
+#else
+// FIXME: remove unused "dst"
+instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{
+ match(Set z (AddI (AndI (CmpLTMask p q) y) z));
+ effect( KILL ccr );
+ ins_cost(DEFAULT_COST*2);
+ format %{ "CMP $p,$q\n\t"
+ "ADD.lt $z,$y,$z" %}
+ ins_encode %{
+ __ cmp($p$$Register, $q$$constant);
+ __ add($z$$Register, $y$$Register, $z$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask );
+%}
+#endif // !AARCH64
+
+#ifdef AARCH64
+instruct cadd_cmpLTMask( iRegI dst, iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
+ match(Set dst (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
+ effect( TEMP dst, KILL ccr );
+ size(12);
+ ins_cost(DEFAULT_COST*3);
+ format %{ "SUBS_w $p,$p,$q\n\t"
+ "ADD_w $dst,$y,$p\n\t"
+ "CSEL_w $dst,$dst,$p,lt" %}
+ ins_encode %{
+ __ subs_w($p$$Register, $p$$Register, $q$$Register);
+ __ add_w($dst$$Register, $y$$Register, $p$$Register);
+ __ csel_w($dst$$Register, $dst$$Register, $p$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask ); // FIXME
+%}
+#else
+instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
+ match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
+ effect( KILL ccr );
+ ins_cost(DEFAULT_COST*2);
+ format %{ "SUBS $p,$p,$q\n\t"
+ "ADD.lt $p,$y,$p" %}
+ ins_encode %{
+ __ subs($p$$Register, $p$$Register, $q$$Register);
+ __ add($p$$Register, $y$$Register, $p$$Register, lt);
+ %}
+ ins_pipe( cadd_cmpltmask );
+%}
+#endif
+
+//----------Arithmetic Conversion Instructions---------------------------------
+// The conversions operations are all Alpha sorted. Please keep it that way!
+
+instruct convD2F_reg(regF dst, regD src) %{
+ match(Set dst (ConvD2F src));
+ size(4);
+ format %{ "FCVTSD $dst,$src" %}
+ ins_encode %{
+ __ convert_d2f($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtD2F);
+%}
+
+// Convert a double to an int in a float register.
+// If the double is a NAN, stuff a zero in instead.
+
+#ifdef AARCH64
+instruct convD2I_reg_reg(iRegI dst, regD src) %{
+ match(Set dst (ConvD2I src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ format %{ "FCVTZS_wd $dst, $src" %}
+ ins_encode %{
+ __ fcvtzs_wd($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtD2I);
+%}
+
+instruct convD2L_reg_reg(iRegL dst, regD src) %{
+ match(Set dst (ConvD2L src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ format %{ "FCVTZS_xd $dst, $src" %}
+ ins_encode %{
+ __ fcvtzs_xd($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtD2L);
+%}
+#else
+instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{
+ match(Set dst (ConvD2I src));
+ effect( TEMP tmp );
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ format %{ "FTOSIZD $tmp,$src\n\t"
+ "FMRS $dst, $tmp" %}
+ ins_encode %{
+ __ ftosizd($tmp$$FloatRegister, $src$$FloatRegister);
+ __ fmrs($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(fcvtD2I);
+%}
+#endif
+
+// Convert a double to a long in a double register.
+// If the double is a NAN, stuff a zero in instead.
+
+#ifndef AARCH64
+// Double to Long conversion
+instruct convD2L_reg(R0R1RegL dst, regD src) %{
+ match(Set dst (ConvD2L src));
+ effect(CALL);
+ ins_cost(MEMORY_REF_COST); // FIXME
+ format %{ "convD2L $dst,$src\t ! call to SharedRuntime::d2l" %}
+ ins_encode %{
+#ifndef __ABI_HARD__
+ __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
+#else
+ if ($src$$FloatRegister != D0) {
+ __ mov_double(D0, $src$$FloatRegister);
+ }
+#endif
+ address target = CAST_FROM_FN_PTR(address, SharedRuntime::d2l);
+ __ call(target, relocInfo::runtime_call_type);
+ %}
+ ins_pipe(fcvtD2L);
+%}
+#endif
+
+instruct convF2D_reg(regD dst, regF src) %{
+ match(Set dst (ConvF2D src));
+ size(4);
+ format %{ "FCVTDS $dst,$src" %}
+ ins_encode %{
+ __ convert_f2d($dst$$FloatRegister, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtF2D);
+%}
+
+#ifdef AARCH64
+instruct convF2I_reg_reg(iRegI dst, regF src) %{
+ match(Set dst (ConvF2I src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ size(4);
+ format %{ "FCVTZS_ws $dst, $src" %}
+ ins_encode %{
+ __ fcvtzs_ws($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtF2I);
+%}
+
+instruct convF2L_reg_reg(iRegL dst, regF src) %{
+ match(Set dst (ConvF2L src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ size(4);
+ format %{ "FCVTZS_xs $dst, $src" %}
+ ins_encode %{
+ __ fcvtzs_xs($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(fcvtF2L);
+%}
+#else
+instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{
+ match(Set dst (ConvF2I src));
+ effect( TEMP tmp );
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ size(8);
+ format %{ "FTOSIZS $tmp,$src\n\t"
+ "FMRS $dst, $tmp" %}
+ ins_encode %{
+ __ ftosizs($tmp$$FloatRegister, $src$$FloatRegister);
+ __ fmrs($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(fcvtF2I);
+%}
+
+// Float to Long conversion
+instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{
+ match(Set dst (ConvF2L src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ effect(CALL);
+ format %{ "convF2L $dst,$src\t! call to SharedRuntime::f2l" %}
+ ins_encode %{
+#ifndef __ABI_HARD__
+ __ fmrs($arg1$$Register, $src$$FloatRegister);
+#else
+ if($src$$FloatRegister != S0) {
+ __ mov_float(S0, $src$$FloatRegister);
+ }
+#endif
+ address target = CAST_FROM_FN_PTR(address, SharedRuntime::f2l);
+ __ call(target, relocInfo::runtime_call_type);
+ %}
+ ins_pipe(fcvtF2L);
+%}
+#endif
+
+#ifdef AARCH64
+instruct convI2D_reg_reg(iRegI src, regD dst) %{
+ match(Set dst (ConvI2D src));
+ ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
+ size(4);
+ format %{ "SCVTF_dw $dst,$src" %}
+ ins_encode %{
+ __ scvtf_dw($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(fcvtI2D);
+%}
+#else
+instruct convI2D_reg_reg(iRegI src, regD_low dst) %{
+ match(Set dst (ConvI2D src));
+ ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
+ size(8);
+ format %{ "FMSR $dst,$src \n\t"
+ "FSITOD $dst $dst"%}
+ ins_encode %{
+ __ fmsr($dst$$FloatRegister, $src$$Register);
+ __ fsitod($dst$$FloatRegister, $dst$$FloatRegister);
+ %}
+ ins_pipe(fcvtI2D);
+%}
+#endif
+
+instruct convI2F_reg_reg( regF dst, iRegI src ) %{
+ match(Set dst (ConvI2F src));
+ ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
+#ifdef AARCH64
+ size(4);
+ format %{ "SCVTF_sw $dst,$src" %}
+ ins_encode %{
+ __ scvtf_sw($dst$$FloatRegister, $src$$Register);
+ %}
+#else
+ size(8);
+ format %{ "FMSR $dst,$src \n\t"
+ "FSITOS $dst, $dst"%}
+ ins_encode %{
+ __ fmsr($dst$$FloatRegister, $src$$Register);
+ __ fsitos($dst$$FloatRegister, $dst$$FloatRegister);
+ %}
+#endif
+ ins_pipe(fcvtI2F);
+%}
+
+instruct convI2L_reg(iRegL dst, iRegI src) %{
+ match(Set dst (ConvI2L src));
+#ifdef AARCH64
+ size(4);
+ format %{ "SXTW $dst,$src\t! int->long" %}
+ ins_encode %{
+ __ sxtw($dst$$Register, $src$$Register);
+ %}
+#else
+ size(8);
+ format %{ "MOV $dst.lo, $src \n\t"
+ "ASR $dst.hi,$src,31\t! int->long" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register);
+ __ mov($dst$$Register->successor(), AsmOperand($src$$Register, asr, 31));
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Zero-extend convert int to long
+instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
+ match(Set dst (AndL (ConvI2L src) mask) );
+#ifdef AARCH64
+ size(4);
+ format %{ "mov_w $dst,$src\t! zero-extend int to long" %}
+ ins_encode %{
+ __ mov_w($dst$$Register, $src$$Register);
+ %}
+#else
+ size(8);
+ format %{ "MOV $dst.lo,$src.lo\t! zero-extend int to long\n\t"
+ "MOV $dst.hi, 0"%}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Zero-extend long
+instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
+ match(Set dst (AndL src mask) );
+#ifdef AARCH64
+ size(4);
+ format %{ "mov_w $dst,$src\t! zero-extend long" %}
+ ins_encode %{
+ __ mov_w($dst$$Register, $src$$Register);
+ %}
+#else
+ size(8);
+ format %{ "MOV $dst.lo,$src.lo\t! zero-extend long\n\t"
+ "MOV $dst.hi, 0"%}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register);
+ __ mov($dst$$Register->successor(), 0);
+ %}
+#endif
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct MoveF2I_reg_reg(iRegI dst, regF src) %{
+ match(Set dst (MoveF2I src));
+ effect(DEF dst, USE src);
+ ins_cost(MEMORY_REF_COST); // FIXME
+
+ size(4);
+ format %{ "FMRS $dst,$src\t! MoveF2I" %}
+ ins_encode %{
+ __ fmrs($dst$$Register, $src$$FloatRegister);
+ %}
+ ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveI2F_reg_reg(regF dst, iRegI src) %{
+ match(Set dst (MoveI2F src));
+ ins_cost(MEMORY_REF_COST); // FIXME
+
+ size(4);
+ format %{ "FMSR $dst,$src\t! MoveI2F" %}
+ ins_encode %{
+ __ fmsr($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveD2L_reg_reg(iRegL dst, regD src) %{
+ match(Set dst (MoveD2L src));
+ effect(DEF dst, USE src);
+ ins_cost(MEMORY_REF_COST); // FIXME
+
+ size(4);
+#ifdef AARCH64
+ format %{ "FMOV_xd $dst,$src\t! MoveD2L" %}
+ ins_encode %{
+ __ fmov_xd($dst$$Register, $src$$FloatRegister);
+ %}
+#else
+ format %{ "FMRRD $dst,$src\t! MoveD2L" %}
+ ins_encode %{
+ __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
+ %}
+#endif
+ ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveL2D_reg_reg(regD dst, iRegL src) %{
+ match(Set dst (MoveL2D src));
+ effect(DEF dst, USE src);
+ ins_cost(MEMORY_REF_COST); // FIXME
+
+ size(4);
+#ifdef AARCH64
+ format %{ "FMOV_dx $dst,$src\t! MoveL2D" %}
+ ins_encode %{
+ __ fmov_dx($dst$$FloatRegister, $src$$Register);
+ %}
+#else
+ format %{ "FMDRR $dst,$src\t! MoveL2D" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+ %}
+#endif
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+//-----------
+// Long to Double conversion
+
+#ifdef AARCH64
+instruct convL2D(regD dst, iRegL src) %{
+ match(Set dst (ConvL2D src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ size(4);
+ format %{ "SCVTF_dx $dst, $src" %}
+ ins_encode %{
+ __ scvtf_dx($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(fcvtL2D);
+%}
+
+instruct convL2F(regF dst, iRegL src) %{
+ match(Set dst (ConvL2F src));
+ ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+ size(4);
+ format %{ "SCVTF_sx $dst, $src" %}
+ ins_encode %{
+ __ scvtf_sx($dst$$FloatRegister, $src$$Register);
+ %}
+ ins_pipe(fcvtL2F);
+%}
+#else
+// Magic constant, 0x43300000
+instruct loadConI_x43300000(iRegI dst) %{
+ effect(DEF dst);
+ size(8);
+ format %{ "MOV_SLOW $dst,0x43300000\t! 2^52" %}
+ ins_encode %{
+ __ mov_slow($dst$$Register, 0x43300000);
+ %}
+ ins_pipe(ialu_none);
+%}
+
+// Magic constant, 0x41f00000
+instruct loadConI_x41f00000(iRegI dst) %{
+ effect(DEF dst);
+ size(8);
+ format %{ "MOV_SLOW $dst, 0x41f00000\t! 2^32" %}
+ ins_encode %{
+ __ mov_slow($dst$$Register, 0x41f00000);
+ %}
+ ins_pipe(ialu_none);
+%}
+
+instruct loadConI_x0(iRegI dst) %{
+ effect(DEF dst);
+ size(4);
+ format %{ "MOV $dst, 0x0\t! 0" %}
+ ins_encode %{
+ __ mov($dst$$Register, 0);
+ %}
+ ins_pipe(ialu_none);
+%}
+
+// Construct a double from two float halves
+instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{
+ effect(DEF dst, USE src1, USE src2);
+ size(8);
+ format %{ "FCPYS $dst.hi,$src1.hi\n\t"
+ "FCPYS $dst.lo,$src2.lo" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister->successor(), $src1$$FloatRegister->successor());
+ __ fcpys($dst$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg_reg);
+%}
+
+#ifndef AARCH64
+// Convert integer in high half of a double register (in the lower half of
+// the double register file) to double
+instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
+ effect(DEF dst, USE src);
+ size(4);
+ format %{ "FSITOD $dst,$src" %}
+ ins_encode %{
+ __ fsitod($dst$$FloatRegister, $src$$FloatRegister->successor());
+ %}
+ ins_pipe(fcvtLHi2D);
+%}
+#endif
+
+// Add float double precision
+instruct addD_regD_regD(regD dst, regD src1, regD src2) %{
+ effect(DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "FADDD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ add_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg_reg);
+%}
+
+// Sub float double precision
+instruct subD_regD_regD(regD dst, regD src1, regD src2) %{
+ effect(DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "FSUBD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ sub_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(faddD_reg_reg);
+%}
+
+// Mul float double precision
+instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{
+ effect(DEF dst, USE src1, USE src2);
+ size(4);
+ format %{ "FMULD $dst,$src1,$src2" %}
+ ins_encode %{
+ __ mul_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+ ins_pipe(fmulD_reg_reg);
+%}
+
+instruct regL_to_regD(regD dst, iRegL src) %{
+ // No match rule to avoid chain rule match.
+ effect(DEF dst, USE src);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "FMDRR $dst,$src\t! regL to regD" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+instruct regI_regI_to_regD(regD dst, iRegI src1, iRegI src2) %{
+ // No match rule to avoid chain rule match.
+ effect(DEF dst, USE src1, USE src2);
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "FMDRR $dst,$src1,$src2\t! regI,regI to regD" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{
+ match(Set dst (ConvL2D src));
+ ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6); // FIXME
+
+ expand %{
+ regD_low tmpsrc;
+ iRegI ix43300000;
+ iRegI ix41f00000;
+ iRegI ix0;
+ regD_low dx43300000;
+ regD dx41f00000;
+ regD tmp1;
+ regD_low tmp2;
+ regD tmp3;
+ regD tmp4;
+
+ regL_to_regD(tmpsrc, src);
+
+ loadConI_x43300000(ix43300000);
+ loadConI_x41f00000(ix41f00000);
+ loadConI_x0(ix0);
+
+ regI_regI_to_regD(dx43300000, ix0, ix43300000);
+ regI_regI_to_regD(dx41f00000, ix0, ix41f00000);
+
+ convI2D_regDHi_regD(tmp1, tmpsrc);
+ regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc);
+ subD_regD_regD(tmp3, tmp2, dx43300000);
+ mulD_regD_regD(tmp4, tmp1, dx41f00000);
+ addD_regD_regD(dst, tmp3, tmp4);
+ %}
+%}
+#endif // !AARCH64
+
+instruct convL2I_reg(iRegI dst, iRegL src) %{
+ match(Set dst (ConvL2I src));
+ size(4);
+#ifdef AARCH64
+ format %{ "MOV_w $dst,$src\t! long->int" %}
+ ins_encode %{
+ __ mov_w($dst$$Register, $src$$Register);
+ %}
+#else
+ format %{ "MOV $dst,$src.lo\t! long->int" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register);
+ %}
+#endif
+ ins_pipe(ialu_move_reg_I_to_L);
+%}
+
+#ifndef AARCH64
+// Register Shift Right Immediate
+instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
+ match(Set dst (ConvL2I (RShiftL src cnt)));
+ size(4);
+ format %{ "ASR $dst,$src.hi,($cnt - 32)\t! long->int or mov if $cnt==32" %}
+ ins_encode %{
+ if ($cnt$$constant == 32) {
+ __ mov($dst$$Register, $src$$Register->successor());
+ } else {
+ __ mov($dst$$Register, AsmOperand($src$$Register->successor(), asr, $cnt$$constant - 32));
+ }
+ %}
+ ins_pipe(ialu_reg_imm);
+%}
+#endif
+
+
+//----------Control Flow Instructions------------------------------------------
+// Compare Instructions
+// Compare Integers
+instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{
+ match(Set icc (CmpI op1 op2));
+ effect( DEF icc, USE op1, USE op2 );
+
+ size(4);
+ format %{ "cmp_32 $op1,$op2\t! int" %}
+ ins_encode %{
+ __ cmp_32($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+#ifdef _LP64
+// Compare compressed pointers
+instruct compN_reg2(flagsRegU icc, iRegN op1, iRegN op2) %{
+ match(Set icc (CmpN op1 op2));
+ effect( DEF icc, USE op1, USE op2 );
+
+ size(4);
+ format %{ "cmp_32 $op1,$op2\t! int" %}
+ ins_encode %{
+ __ cmp_32($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+#endif
+
+instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{
+ match(Set icc (CmpU op1 op2));
+
+ size(4);
+ format %{ "cmp_32 $op1,$op2\t! unsigned int" %}
+ ins_encode %{
+ __ cmp_32($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compI_iReg_immneg(flagsReg icc, iRegI op1, aimmIneg op2) %{
+ match(Set icc (CmpI op1 op2));
+ effect( DEF icc, USE op1 );
+
+ size(4);
+ format %{ "cmn_32 $op1,-$op2\t! int" %}
+ ins_encode %{
+ __ cmn_32($op1$$Register, -$op2$$constant);
+ %}
+ ins_pipe(ialu_cconly_reg_imm);
+%}
+
+instruct compI_iReg_imm(flagsReg icc, iRegI op1, aimmI op2) %{
+ match(Set icc (CmpI op1 op2));
+ effect( DEF icc, USE op1 );
+
+ size(4);
+ format %{ "cmp_32 $op1,$op2\t! int" %}
+ ins_encode %{
+ __ cmp_32($op1$$Register, $op2$$constant);
+ %}
+ ins_pipe(ialu_cconly_reg_imm);
+%}
+
+instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 op2) zero));
+ size(4);
+ format %{ "tst_32 $op2,$op1" %}
+
+ ins_encode %{
+ __ tst_32($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+#ifndef AARCH64
+instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
+ size(4);
+ format %{ "TST $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst($op1$$Register, AsmOperand($op2$$Register, lsl, $op3$$Register));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+#endif
+
+instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
+ size(4);
+ format %{ "tst_32 $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst_32($op1$$Register, AsmOperand($op2$$Register, lsl, $op3$$constant));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+#ifndef AARCH64
+instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
+ size(4);
+ format %{ "TST $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst($op1$$Register, AsmOperand($op2$$Register, asr, $op3$$Register));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+#endif
+
+instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
+ size(4);
+ format %{ "tst_32 $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst_32($op1$$Register, AsmOperand($op2$$Register, asr, $op3$$constant));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+#ifndef AARCH64
+instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
+ size(4);
+ format %{ "TST $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst($op1$$Register, AsmOperand($op2$$Register, lsr, $op3$$Register));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+#endif
+
+instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
+ size(4);
+ format %{ "tst_32 $op2,$op1<<$op3" %}
+
+ ins_encode %{
+ __ tst_32($op1$$Register, AsmOperand($op2$$Register, lsr, $op3$$constant));
+ %}
+ ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero ) %{
+ match(Set icc (CmpI (AndI op1 op2) zero));
+ size(4);
+ format %{ "tst_32 $op2,$op1" %}
+
+ ins_encode %{
+ __ tst_32($op1$$Register, $op2$$constant);
+ %}
+ ins_pipe(ialu_cconly_reg_imm_zero);
+%}
+
+#ifdef AARCH64
+instruct compL_reg_reg(flagsReg xcc, iRegL op1, iRegL op2)
+%{
+ match(Set xcc (CmpL op1 op2));
+ effect( DEF xcc, USE op1, USE op2 );
+
+ size(4);
+ format %{ "CMP $op1,$op2\t! long" %}
+ ins_encode %{
+ __ cmp($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+#else
+instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegL tmp) %{
+ match(Set xcc (CmpL op1 op2));
+ effect( DEF xcc, USE op1, USE op2, TEMP tmp );
+
+ size(8);
+ format %{ "SUBS $tmp,$op1.low,$op2.low\t\t! long\n\t"
+ "SBCS $tmp,$op1.hi,$op2.hi" %}
+ ins_encode %{
+ __ subs($tmp$$Register, $op1$$Register, $op2$$Register);
+ __ sbcs($tmp$$Register->successor(), $op1$$Register->successor(), $op2$$Register->successor());
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+#endif
+
+#ifdef AARCH64
+instruct compL_reg_con(flagsReg xcc, iRegL op1, aimmL con) %{
+ match(Set xcc (CmpL op1 con));
+ effect( DEF xcc, USE op1, USE con );
+
+ size(8);
+ format %{ "CMP $op1,$con\t\t! long" %}
+ ins_encode %{
+ __ cmp($op1$$Register, $con$$constant);
+ %}
+
+ ins_pipe(ialu_cconly_reg_imm);
+%}
+#else
+instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{
+ match(Set xcc (CmpL op1 op2));
+ effect( DEF xcc, USE op1, USE op2 );
+
+ size(8);
+ format %{ "TEQ $op1.hi,$op2.hi\t\t! long\n\t"
+ "TEQ.eq $op1.lo,$op2.lo" %}
+ ins_encode %{
+ __ teq($op1$$Register->successor(), $op2$$Register->successor());
+ __ teq($op1$$Register, $op2$$Register, eq);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compL_reg_reg_LEGT(flagsRegL_LEGT xcc, iRegL op1, iRegL op2, iRegL tmp) %{
+ match(Set xcc (CmpL op1 op2));
+ effect( DEF xcc, USE op1, USE op2, TEMP tmp );
+
+ size(8);
+ format %{ "SUBS $tmp,$op2.low,$op1.low\t\t! long\n\t"
+ "SBCS $tmp,$op2.hi,$op1.hi" %}
+ ins_encode %{
+ __ subs($tmp$$Register, $op2$$Register, $op1$$Register);
+ __ sbcs($tmp$$Register->successor(), $op2$$Register->successor(), $op1$$Register->successor());
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_LTGE(flagsRegL_LTGE xcc, iRegL op1, immLlowRot con, iRegL tmp) %{
+ match(Set xcc (CmpL op1 con));
+ effect( DEF xcc, USE op1, USE con, TEMP tmp );
+
+ size(8);
+ format %{ "SUBS $tmp,$op1.low,$con\t\t! long\n\t"
+ "SBCS $tmp,$op1.hi,0" %}
+ ins_encode %{
+ __ subs($tmp$$Register, $op1$$Register, $con$$constant);
+ __ sbcs($tmp$$Register->successor(), $op1$$Register->successor(), 0);
+ %}
+
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_EQNE(flagsRegL_EQNE xcc, iRegL op1, immLlowRot con) %{
+ match(Set xcc (CmpL op1 con));
+ effect( DEF xcc, USE op1, USE con );
+
+ size(8);
+ format %{ "TEQ $op1.hi,0\t\t! long\n\t"
+ "TEQ.eq $op1.lo,$con" %}
+ ins_encode %{
+ __ teq($op1$$Register->successor(), 0);
+ __ teq($op1$$Register, $con$$constant, eq);
+ %}
+
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_LEGT(flagsRegL_LEGT xcc, iRegL op1, immLlowRot con, iRegL tmp) %{
+ match(Set xcc (CmpL op1 con));
+ effect( DEF xcc, USE op1, USE con, TEMP tmp );
+
+ size(8);
+ format %{ "RSBS $tmp,$op1.low,$con\t\t! long\n\t"
+ "RSCS $tmp,$op1.hi,0" %}
+ ins_encode %{
+ __ rsbs($tmp$$Register, $op1$$Register, $con$$constant);
+ __ rscs($tmp$$Register->successor(), $op1$$Register->successor(), 0);
+ %}
+
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+#endif
+
+/* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */
+/* match(Set xcc (CmpL (AndL op1 op2) zero)); */
+/* ins_encode %{ */
+/* __ stop("testL_reg_reg unimplemented"); */
+/* %} */
+/* ins_pipe(ialu_cconly_reg_reg); */
+/* %} */
+
+/* // useful for checking the alignment of a pointer: */
+/* instruct testL_reg_con(flagsRegL xcc, iRegL op1, immLlowRot con, immL0 zero) %{ */
+/* match(Set xcc (CmpL (AndL op1 con) zero)); */
+/* ins_encode %{ */
+/* __ stop("testL_reg_con unimplemented"); */
+/* %} */
+/* ins_pipe(ialu_cconly_reg_reg); */
+/* %} */
+
+instruct compU_iReg_imm(flagsRegU icc, iRegI op1, aimmU31 op2 ) %{
+ match(Set icc (CmpU op1 op2));
+
+ size(4);
+ format %{ "cmp_32 $op1,$op2\t! unsigned" %}
+ ins_encode %{
+ __ cmp_32($op1$$Register, $op2$$constant);
+ %}
+ ins_pipe(ialu_cconly_reg_imm);
+%}
+
+// Compare Pointers
+instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{
+ match(Set pcc (CmpP op1 op2));
+
+ size(4);
+ format %{ "CMP $op1,$op2\t! ptr" %}
+ ins_encode %{
+ __ cmp($op1$$Register, $op2$$Register);
+ %}
+ ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compP_iRegP_imm(flagsRegP pcc, iRegP op1, aimmP op2 ) %{
+ match(Set pcc (CmpP op1 op2));
+
+ size(4);
+ format %{ "CMP $op1,$op2\t! ptr" %}
+ ins_encode %{
+ assert($op2$$constant == 0 || _opnds[2]->constant_reloc() == relocInfo::none, "reloc in cmp?");
+ __ cmp($op1$$Register, $op2$$constant);
+ %}
+ ins_pipe(ialu_cconly_reg_imm);
+%}
+
+//----------Max and Min--------------------------------------------------------
+// Min Instructions
+// Conditional move for min
+instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{
+ effect( USE_DEF op2, USE op1, USE icc );
+
+ size(4);
+ format %{ "MOV.lt $op2,$op1\t! min" %}
+ ins_encode %{
+ __ mov($op2$$Register, $op1$$Register, lt);
+ %}
+ ins_pipe(ialu_reg_flags);
+%}
+
+// Min Register with Register.
+instruct minI_eReg(iRegI op1, iRegI op2) %{
+ match(Set op2 (MinI op1 op2));
+ ins_cost(DEFAULT_COST*2);
+ expand %{
+ flagsReg icc;
+ compI_iReg(icc,op1,op2);
+ cmovI_reg_lt(op2,op1,icc);
+ %}
+%}
+
+// Max Instructions
+// Conditional move for max
+instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{
+ effect( USE_DEF op2, USE op1, USE icc );
+ format %{ "MOV.gt $op2,$op1\t! max" %}
+ ins_encode %{
+ __ mov($op2$$Register, $op1$$Register, gt);
+ %}
+ ins_pipe(ialu_reg_flags);
+%}
+
+// Max Register with Register
+instruct maxI_eReg(iRegI op1, iRegI op2) %{
+ match(Set op2 (MaxI op1 op2));
+ ins_cost(DEFAULT_COST*2);
+ expand %{
+ flagsReg icc;
+ compI_iReg(icc,op1,op2);
+ cmovI_reg_gt(op2,op1,icc);
+ %}
+%}
+
+
+//----------Float Compares----------------------------------------------------
+// Compare floating, generate condition code
+instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{
+ match(Set icc (CmpF src1 src2));
+ effect(KILL fcc);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "FCMP_s $src1,$src2" %}
+ ins_encode %{
+ __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+#else
+ size(8);
+ format %{ "FCMPs $src1,$src2\n\t"
+ "FMSTAT" %}
+ ins_encode %{
+ __ fcmps($src1$$FloatRegister, $src2$$FloatRegister);
+ __ fmstat();
+ %}
+#endif
+ ins_pipe(faddF_fcc_reg_reg_zero);
+%}
+
+instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{
+ match(Set icc (CmpF src1 src2));
+ effect(KILL fcc);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "FCMP0_s $src1" %}
+ ins_encode %{
+ __ fcmp0_s($src1$$FloatRegister);
+ %}
+#else
+ size(8);
+ format %{ "FCMPs $src1,$src2\n\t"
+ "FMSTAT" %}
+ ins_encode %{
+ __ fcmpzs($src1$$FloatRegister);
+ __ fmstat();
+ %}
+#endif
+ ins_pipe(faddF_fcc_reg_reg_zero);
+%}
+
+instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{
+ match(Set icc (CmpD src1 src2));
+ effect(KILL fcc);
+
+#ifdef AARCH64
+ size(4);
+ format %{ "FCMP_d $src1,$src2" %}
+ ins_encode %{
+ __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister);
+ %}
+#else
+ size(8);
+ format %{ "FCMPd $src1,$src2 \n\t"
+ "FMSTAT" %}
+ ins_encode %{
+ __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister);
+ __ fmstat();
+ %}
+#endif
+ ins_pipe(faddD_fcc_reg_reg_zero);
+%}
+
+instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{
+ match(Set icc (CmpD src1 src2));
+ effect(KILL fcc);
+
+#ifdef AARCH64
+ size(8);
+ format %{ "FCMP0_d $src1" %}
+ ins_encode %{
+ __ fcmp0_d($src1$$FloatRegister);
+ %}
+#else
+ size(8);
+ format %{ "FCMPZd $src1,$src2 \n\t"
+ "FMSTAT" %}
+ ins_encode %{
+ __ fcmpzd($src1$$FloatRegister);
+ __ fmstat();
+ %}
+#endif
+ ins_pipe(faddD_fcc_reg_reg_zero);
+%}
+
+#ifdef AARCH64
+// Compare floating, generate -1,0,1
+instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg icc) %{
+ match(Set dst (CmpF3 src1 src2));
+ // effect(KILL fcc); // nobody cares if flagsRegF is killed
+ effect(KILL icc);
+ ins_cost(DEFAULT_COST*3); // FIXME
+ size(12);
+ format %{ "FCMP_s $src1,$src2\n\t"
+ "CSET $dst, gt\n\t"
+ "CSINV $dst, $dst, ZR, ge" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister);
+ __ cset(dst, gt); // 1 if '>', else 0
+ __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+ %}
+ ins_pipe( floating_cmp ); // FIXME
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg icc) %{
+ match(Set dst (CmpD3 src1 src2));
+ // effect(KILL fcc); // nobody cares if flagsRegF is killed
+ effect(KILL icc);
+ ins_cost(DEFAULT_COST*3); // FIXME
+ size(12);
+ format %{ "FCMP_d $src1,$src2\n\t"
+ "CSET $dst, gt\n\t"
+ "CSINV $dst, $dst, ZR, ge" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister);
+ __ cset(dst, gt); // 1 if '>', else 0
+ __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+ %}
+ ins_pipe( floating_cmp ); // FIXME
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsReg icc) %{
+ match(Set dst (CmpF3 src1 src2));
+ // effect(KILL fcc); // nobody cares if flagsRegF is killed
+ effect(KILL icc);
+ ins_cost(DEFAULT_COST*3); // FIXME
+ size(12);
+ format %{ "FCMP0_s $src1\n\t"
+ "CSET $dst, gt\n\t"
+ "CSINV $dst, $dst, ZR, ge" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ __ fcmp0_s($src1$$FloatRegister);
+ __ cset(dst, gt); // 1 if '>', else 0
+ __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+ %}
+ ins_pipe( floating_cmp ); // FIXME
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsReg icc) %{
+ match(Set dst (CmpD3 src1 src2));
+ // effect(KILL fcc); // nobody cares if flagsRegF is killed
+ effect(KILL icc);
+ ins_cost(DEFAULT_COST*3); // FIXME
+ size(12);
+ format %{ "FCMP0_d $src1\n\t"
+ "CSET $dst, gt\n\t"
+ "CSINV $dst, $dst, ZR, ge" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ __ fcmp0_d($src1$$FloatRegister);
+ __ cset(dst, gt); // 1 if '>', else 0
+ __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+ %}
+ ins_pipe( floating_cmp ); // FIXME
+%}
+#else
+// Compare floating, generate -1,0,1
+instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{
+ match(Set dst (CmpF3 src1 src2));
+ effect(KILL fcc);
+ ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+ size(20);
+ // same number of instructions as code using conditional moves but
+ // doesn't kill integer condition register
+ format %{ "FCMPs $dst,$src1,$src2 \n\t"
+ "VMRS $dst, FPSCR \n\t"
+ "OR $dst, $dst, 0x08000000 \n\t"
+ "EOR $dst, $dst, $dst << 3 \n\t"
+ "MOV $dst, $dst >> 30" %}
+ ins_encode %{
+ __ fcmps($src1$$FloatRegister, $src2$$FloatRegister);
+ __ floating_cmp($dst$$Register);
+ %}
+ ins_pipe( floating_cmp );
+%}
+
+instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsRegF fcc) %{
+ match(Set dst (CmpF3 src1 src2));
+ effect(KILL fcc);
+ ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+ size(20);
+ // same number of instructions as code using conditional moves but
+ // doesn't kill integer condition register
+ format %{ "FCMPZs $dst,$src1,$src2 \n\t"
+ "VMRS $dst, FPSCR \n\t"
+ "OR $dst, $dst, 0x08000000 \n\t"
+ "EOR $dst, $dst, $dst << 3 \n\t"
+ "MOV $dst, $dst >> 30" %}
+ ins_encode %{
+ __ fcmpzs($src1$$FloatRegister);
+ __ floating_cmp($dst$$Register);
+ %}
+ ins_pipe( floating_cmp );
+%}
+
+instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF fcc) %{
+ match(Set dst (CmpD3 src1 src2));
+ effect(KILL fcc);
+ ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+ size(20);
+ // same number of instructions as code using conditional moves but
+ // doesn't kill integer condition register
+ format %{ "FCMPd $dst,$src1,$src2 \n\t"
+ "VMRS $dst, FPSCR \n\t"
+ "OR $dst, $dst, 0x08000000 \n\t"
+ "EOR $dst, $dst, $dst << 3 \n\t"
+ "MOV $dst, $dst >> 30" %}
+ ins_encode %{
+ __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister);
+ __ floating_cmp($dst$$Register);
+ %}
+ ins_pipe( floating_cmp );
+%}
+
+instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{
+ match(Set dst (CmpD3 src1 src2));
+ effect(KILL fcc);
+ ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+ size(20);
+ // same number of instructions as code using conditional moves but
+ // doesn't kill integer condition register
+ format %{ "FCMPZd $dst,$src1,$src2 \n\t"
+ "VMRS $dst, FPSCR \n\t"
+ "OR $dst, $dst, 0x08000000 \n\t"
+ "EOR $dst, $dst, $dst << 3 \n\t"
+ "MOV $dst, $dst >> 30" %}
+ ins_encode %{
+ __ fcmpzd($src1$$FloatRegister);
+ __ floating_cmp($dst$$Register);
+ %}
+ ins_pipe( floating_cmp );
+%}
+#endif // !AARCH64
+
+//----------Branches---------------------------------------------------------
+// Jump
+// (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
+// FIXME
+instruct jumpXtnd(iRegX switch_val, iRegP tmp) %{
+ match(Jump switch_val);
+ effect(TEMP tmp);
+ ins_cost(350);
+ format %{ "ADD $tmp, $constanttablebase, $switch_val\n\t"
+ "LDR $tmp,[$tmp + $constantoffset]\n\t"
+ "BX $tmp" %}
+ size(20);
+ ins_encode %{
+ Register table_reg;
+ Register label_reg = $tmp$$Register;
+ if (constant_offset() == 0) {
+ table_reg = $constanttablebase;
+ __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
+ } else {
+ table_reg = $tmp$$Register;
+ int offset = $constantoffset;
+ if (is_memoryP(offset)) {
+ __ add(table_reg, $constanttablebase, $switch_val$$Register);
+ __ ldr(label_reg, Address(table_reg, offset));
+ } else {
+ __ mov_slow(table_reg, $constantoffset);
+ __ add(table_reg, $constanttablebase, table_reg);
+ __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
+ }
+ }
+ __ jump(label_reg); // ldr + b better than ldr to PC for branch predictor?
+ // __ ldr(PC, Address($table$$Register, $switch_val$$Register));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// // Direct Branch.
+instruct branch(label labl) %{
+ match(Goto);
+ effect(USE labl);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B $labl" %}
+ ins_encode %{
+ __ b(*($labl$$label));
+ %}
+ ins_pipe(br);
+%}
+
+// Conditional Direct Branch
+instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
+ match(If cmp icc);
+ effect(USE labl);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $icc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+#ifdef ARM
+instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{
+ match(If cmp icc);
+ effect(USE labl);
+ predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $icc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+#endif
+
+#ifdef AARCH64
+instruct cbzI(cmpOp cmp, iRegI op1, immI0 op2, label labl) %{
+ match(If cmp (CmpI op1 op2));
+ effect(USE labl);
+ predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "CB{N}Z $op1, $labl\t! int $cmp" %}
+ ins_encode %{
+ if ($cmp$$cmpcode == eq) {
+ __ cbz_w($op1$$Register, *($labl$$label));
+ } else {
+ __ cbnz_w($op1$$Register, *($labl$$label));
+ }
+ %}
+ ins_pipe(br_cc); // FIXME
+%}
+
+instruct cbzP(cmpOpP cmp, iRegP op1, immP0 op2, label labl) %{
+ match(If cmp (CmpP op1 op2));
+ effect(USE labl);
+ predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "CB{N}Z $op1, $labl\t! ptr $cmp" %}
+ ins_encode %{
+ if ($cmp$$cmpcode == eq) {
+ __ cbz($op1$$Register, *($labl$$label));
+ } else {
+ __ cbnz($op1$$Register, *($labl$$label));
+ }
+ %}
+ ins_pipe(br_cc); // FIXME
+%}
+
+instruct cbzL(cmpOpL cmp, iRegL op1, immL0 op2, label labl) %{
+ match(If cmp (CmpL op1 op2));
+ effect(USE labl);
+ predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "CB{N}Z $op1, $labl\t! long $cmp" %}
+ ins_encode %{
+ if ($cmp$$cmpcode == eq) {
+ __ cbz($op1$$Register, *($labl$$label));
+ } else {
+ __ cbnz($op1$$Register, *($labl$$label));
+ }
+ %}
+ ins_pipe(br_cc); // FIXME
+%}
+#endif
+
+instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
+ match(If cmp icc);
+ effect(USE labl);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $icc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
+ match(If cmp pcc);
+ effect(USE labl);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $pcc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+#ifndef AARCH64
+instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{
+ match(If cmp xcc);
+ effect(USE labl);
+ predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $xcc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+instruct branchConL_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, label labl) %{
+ match(If cmp xcc);
+ effect(USE labl);
+ predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $xcc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+instruct branchConL_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, label labl) %{
+ match(If cmp xcc);
+ effect(USE labl);
+ predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $xcc,$labl" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+#endif
+
+instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
+ match(CountedLoopEnd cmp icc);
+ effect(USE labl);
+
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "B$cmp $icc,$labl\t! Loop end" %}
+ ins_encode %{
+ __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(br_cc);
+%}
+
+// instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
+// match(CountedLoopEnd cmp icc);
+// ins_pipe(br_cc);
+// %}
+
+// ============================================================================
+// Long Compare
+//
+// Currently we hold longs in 2 registers. Comparing such values efficiently
+// is tricky. The flavor of compare used depends on whether we are testing
+// for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
+// The GE test is the negated LT test. The LE test can be had by commuting
+// the operands (yielding a GE test) and then negating; negate again for the
+// GT test. The EQ test is done by ORcc'ing the high and low halves, and the
+// NE test is negated from that.
+
+// Due to a shortcoming in the ADLC, it mixes up expressions like:
+// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
+// difference between 'Y' and '0L'. The tree-matches for the CmpI sections
+// are collapsed internally in the ADLC's dfa-gen code. The match for
+// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
+// foo match ends up with the wrong leaf. One fix is to not match both
+// reg-reg and reg-zero forms of long-compare. This is unfortunate because
+// both forms beat the trinary form of long-compare and both are very useful
+// on Intel which has so few registers.
+
+// instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
+// match(If cmp xcc);
+// ins_pipe(br_cc);
+// %}
+
+// Manifest a CmpL3 result in an integer register. Very painful.
+// This is the test to avoid.
+#ifdef AARCH64
+instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr) %{
+ match(Set dst (CmpL3 src1 src2));
+ // effect(KILL fcc); // nobody cares if flagsRegF is killed
+ effect(KILL ccr);
+ ins_cost(DEFAULT_COST*3); // FIXME
+ size(12);
+ format %{ "CMP $src1,$src2\n\t"
+ "CSET $dst, gt\n\t"
+ "CSINV $dst, $dst, ZR, ge" %}
+ ins_encode %{
+ Register dst = $dst$$Register;
+ __ cmp($src1$$Register, $src2$$Register);
+ __ cset(dst, gt); // 1 if '>', else 0
+ __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+ %}
+ ins_pipe( ialu_cconly_reg_reg ); // FIXME
+%}
+// TODO cmpL3_reg_imm
+#else
+instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
+ match(Set dst (CmpL3 src1 src2) );
+ effect( KILL ccr );
+ ins_cost(6*DEFAULT_COST); // FIXME
+ size(32);
+ format %{
+ "CMP $src1.hi, $src2.hi\t\t! long\n"
+ "\tMOV.gt $dst, 1\n"
+ "\tmvn.lt $dst, 0\n"
+ "\tB.ne done\n"
+ "\tSUBS $dst, $src1.lo, $src2.lo\n"
+ "\tMOV.hi $dst, 1\n"
+ "\tmvn.lo $dst, 0\n"
+ "done:" %}
+ ins_encode %{
+ Label done;
+ __ cmp($src1$$Register->successor(), $src2$$Register->successor());
+ __ mov($dst$$Register, 1, gt);
+ __ mvn($dst$$Register, 0, lt);
+ __ b(done, ne);
+ __ subs($dst$$Register, $src1$$Register, $src2$$Register);
+ __ mov($dst$$Register, 1, hi);
+ __ mvn($dst$$Register, 0, lo);
+ __ bind(done);
+ %}
+ ins_pipe(cmpL_reg);
+%}
+#endif
+
+#ifndef AARCH64
+// Conditional move
+instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(150);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(150);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, iRegL src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(150);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t"
+ "MOV$cmp $dst,$src.hi" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, immL0 src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+ ins_cost(140);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,0\t! long\n\t"
+ "MOV$cmp $dst,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, immL0 src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+ ins_cost(140);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,0\t! long\n\t"
+ "MOV$cmp $dst,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL0 src) %{
+ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+ ins_cost(140);
+ size(8);
+ format %{ "MOV$cmp $dst.lo,0\t! long\n\t"
+ "MOV$cmp $dst,0" %}
+ ins_encode %{
+ __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode));
+ __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+#endif // !AARCH64
+
+#ifndef AARCH64
+instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovIL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iRegI src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif // !AARCH64
+
+#ifndef AARCH64
+instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovIL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovIL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, immI16 src) %{
+ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, iRegP src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(150);
+ size(4);
+ format %{ "MOV$cmp $dst,$src" %}
+ ins_encode %{
+ __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, immP0 src) %{
+ match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(140);
+ format %{ "MOVW$cmp $dst,$src" %}
+ ins_encode %{
+ __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(ialu_imm);
+%}
+
+instruct cmovFL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regF dst, regF src) %{
+ match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYS$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD src) %{
+ match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+ predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+ ins_cost(150);
+ size(4);
+ format %{ "FCPYD$cmp $dst,$src" %}
+ ins_encode %{
+ __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+ %}
+ ins_pipe(int_conditional_float_move);
+%}
+#endif // !AARCH64
+
+// ============================================================================
+// Safepoint Instruction
+#ifdef AARCH64
+instruct safePoint_poll(iRegP poll, flagsReg icc, RtempRegP tmp) %{
+ match(SafePoint poll);
+ // The handler stub kills Rtemp
+ effect(USE poll, KILL tmp, KILL icc);
+
+ size(4);
+ format %{ "LDR ZR,[$poll]\t! Safepoint: poll for GC" %}
+ ins_encode %{
+ __ relocate(relocInfo::poll_type);
+ __ ldr(ZR, Address($poll$$Register));
+ %}
+ ins_pipe(loadPollP);
+%}
+#else
+// rather than KILL R12, it would be better to use any reg as
+// TEMP. Can't do that at this point because it crashes the compiler
+instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{
+ match(SafePoint poll);
+ effect(USE poll, KILL tmp, KILL icc);
+
+ size(4);
+ format %{ "LDR $tmp,[$poll]\t! Safepoint: poll for GC" %}
+ ins_encode %{
+ __ relocate(relocInfo::poll_type);
+ __ ldr($tmp$$Register, Address($poll$$Register));
+ %}
+ ins_pipe(loadPollP);
+%}
+#endif
+
+
+// ============================================================================
+// Call Instructions
+// Call Java Static Instruction
+instruct CallStaticJavaDirect( method meth ) %{
+ match(CallStaticJava);
+ predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
+ effect(USE meth);
+
+ ins_cost(CALL_COST);
+ format %{ "CALL,static ==> " %}
+ ins_encode( Java_Static_Call( meth ), call_epilog );
+ ins_pipe(simple_call);
+%}
+
+// Call Java Static Instruction (method handle version)
+instruct CallStaticJavaHandle( method meth ) %{
+ match(CallStaticJava);
+ predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
+ effect(USE meth);
+ // FP is saved by all callees (for interpreter stack correction).
+ // We use it here for a similar purpose, in {preserve,restore}_FP.
+
+ ins_cost(CALL_COST);
+ format %{ "CALL,static/MethodHandle ==> " %}
+ ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog );
+ ins_pipe(simple_call);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect( method meth ) %{
+ match(CallDynamicJava);
+ effect(USE meth);
+
+ ins_cost(CALL_COST);
+ format %{ "MOV_OOP (empty),R_R8\n\t"
+ "CALL,dynamic ; NOP ==> " %}
+ ins_encode( Java_Dynamic_Call( meth ), call_epilog );
+ ins_pipe(call);
+%}
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+ match(CallRuntime);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ format %{ "CALL,runtime" %}
+#ifdef AARCH64
+ ins_encode( save_last_PC, Java_To_Runtime( meth ),
+ call_epilog );
+#else
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog );
+#endif
+ ins_pipe(simple_call);
+%}
+
+// Call runtime without safepoint - same as CallRuntime
+instruct CallLeafDirect(method meth) %{
+ match(CallLeaf);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ format %{ "CALL,runtime leaf" %}
+ // TODO: ned save_last_PC here?
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog );
+ ins_pipe(simple_call);
+%}
+
+// Call runtime without safepoint - same as CallLeaf
+instruct CallLeafNoFPDirect(method meth) %{
+ match(CallLeafNoFP);
+ effect(USE meth);
+ ins_cost(CALL_COST);
+ format %{ "CALL,runtime leaf nofp" %}
+ // TODO: ned save_last_PC here?
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog );
+ ins_pipe(simple_call);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(IPRegP jump_target, inline_cache_regP method_oop) %{
+ match(TailCall jump_target method_oop );
+
+ ins_cost(CALL_COST);
+ format %{ "MOV Rexception_pc, LR\n\t"
+ "jump $jump_target \t! $method_oop holds method oop" %}
+ ins_encode %{
+ __ mov(Rexception_pc, LR); // this is used only to call
+ // StubRoutines::forward_exception_entry()
+ // which expects PC of exception in
+ // R5. FIXME?
+ __ jump($jump_target$$Register);
+ %}
+ ins_pipe(tail_call);
+%}
+
+
+// Return Instruction
+instruct Ret() %{
+ match(Return);
+
+ format %{ "ret LR" %}
+
+ ins_encode %{
+ __ ret(LR);
+ %}
+
+ ins_pipe(br);
+%}
+
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+instruct tailjmpInd(IPRegP jump_target, RExceptionRegP ex_oop) %{
+ match( TailJump jump_target ex_oop );
+ ins_cost(CALL_COST);
+ format %{ "MOV Rexception_pc, LR\n\t"
+ "jump $jump_target \t! $ex_oop holds exc. oop" %}
+ ins_encode %{
+ __ mov(Rexception_pc, LR);
+ __ jump($jump_target$$Register);
+ %}
+ ins_pipe(tail_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+instruct CreateException( RExceptionRegP ex_oop )
+%{
+ match(Set ex_oop (CreateEx));
+ ins_cost(0);
+
+ size(0);
+ // use the following format syntax
+ format %{ "! exception oop is in Rexception_obj; no code emitted" %}
+ ins_encode();
+ ins_pipe(empty);
+%}
+
+
+// Rethrow exception:
+// The exception oop will come in the first argument position.
+// Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+ match(Rethrow);
+ ins_cost(CALL_COST);
+
+ // use the following format syntax
+ format %{ "b rethrow_stub" %}
+ ins_encode %{
+ Register scratch = R1_tmp;
+ assert_different_registers(scratch, c_rarg0, LR);
+ __ jump(OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type, scratch);
+ %}
+ ins_pipe(tail_call);
+%}
+
+
+// Die now
+instruct ShouldNotReachHere( )
+%{
+ match(Halt);
+ ins_cost(CALL_COST);
+
+ size(4);
+ // Use the following format syntax
+ format %{ "breakpoint ; ShouldNotReachHere" %}
+ ins_encode %{
+ __ breakpoint();
+ %}
+ ins_pipe(tail_call);
+%}
+
+// ============================================================================
+// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
+// array for an instance of the superklass. Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()). Return
+// not zero for a miss or zero for a hit. The encoding ALSO sets flags.
+instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP pcc, LRRegP lr ) %{
+ match(Set index (PartialSubtypeCheck sub super));
+ effect( KILL pcc, KILL lr );
+ ins_cost(DEFAULT_COST*10);
+ format %{ "CALL PartialSubtypeCheck" %}
+ ins_encode %{
+ __ call(StubRoutines::Arm::partial_subtype_check(), relocInfo::runtime_call_type);
+ %}
+ ins_pipe(partial_subtype_check_pipe);
+%}
+
+/* instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ */
+/* match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); */
+/* ins_pipe(partial_subtype_check_pipe); */
+/* %} */
+
+
+// ============================================================================
+// inlined locking and unlocking
+
+#ifdef AARCH64
+instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 )
+#else
+instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch )
+#endif
+%{
+ match(Set pcc (FastLock object box));
+
+#ifdef AARCH64
+ effect(TEMP scratch, TEMP scratch2, TEMP scratch3);
+#else
+ effect(TEMP scratch, TEMP scratch2);
+#endif
+ ins_cost(100);
+
+#ifdef AARCH64
+ format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %}
+ ins_encode %{
+ __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register);
+ %}
+#else
+ format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2" %}
+ ins_encode %{
+ __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register);
+ %}
+#endif
+ ins_pipe(long_memory_op);
+%}
+
+
+#ifdef AARCH64
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) %{
+ match(Set pcc (FastUnlock object box));
+ effect(TEMP scratch, TEMP scratch2, TEMP scratch3);
+ ins_cost(100);
+
+ format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %}
+ ins_encode %{
+ __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register);
+ %}
+ ins_pipe(long_memory_op);
+%}
+#else
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{
+ match(Set pcc (FastUnlock object box));
+ effect(TEMP scratch, TEMP scratch2);
+ ins_cost(100);
+
+ format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2" %}
+ ins_encode %{
+ __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register);
+ %}
+ ins_pipe(long_memory_op);
+%}
+#endif
+
+#ifdef AARCH64
+// TODO: add version that takes immI cnt?
+instruct clear_array(iRegX cnt, iRegP base, iRegP ptr, iRegX temp, Universe dummy, flagsReg cpsr) %{
+ match(Set dummy (ClearArray cnt base));
+ effect(TEMP temp, TEMP ptr, KILL cpsr);
+ ins_cost(300);
+ format %{
+ " MOV $temp,$cnt\n"
+ " ADD $ptr,$base,$cnt\n"
+ " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n"
+ " B.lt done16\n"
+ "loop: STP ZR,ZR,[$ptr,-16]!\n"
+ " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n"
+ " B.ge loop\t! Clearing loop\n"
+ "done16: ADDS $temp,$temp,8\t! Room for 1 more long?\n"
+ " B.lt done\n"
+ " STR ZR,[$base+$temp]\n"
+ "done:"
+ %}
+ ins_encode %{
+ // TODO: preload?
+ __ mov($temp$$Register, $cnt$$Register);
+ __ add($ptr$$Register, $base$$Register, $cnt$$Register);
+ Label loop, done, done16;
+ __ subs($temp$$Register, $temp$$Register, 16);
+ __ b(done16, lt);
+ __ bind(loop);
+ __ stp(ZR, ZR, Address($ptr$$Register, -16, pre_indexed));
+ __ subs($temp$$Register, $temp$$Register, 16);
+ __ b(loop, ge);
+ __ bind(done16);
+ __ adds($temp$$Register, $temp$$Register, 8);
+ __ b(done, lt);
+ // $temp should be 0 here
+ __ str(ZR, Address($base$$Register, $temp$$Register));
+ __ bind(done);
+ %}
+ ins_pipe(long_memory_op);
+%}
+#else
+// Count and Base registers are fixed because the allocator cannot
+// kill unknown registers. The encodings are generic.
+instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{
+ match(Set dummy (ClearArray cnt base));
+ effect(TEMP temp, TEMP zero, KILL cpsr);
+ ins_cost(300);
+ format %{ "MOV $zero,0\n"
+ " MOV $temp,$cnt\n"
+ "loop: SUBS $temp,$temp,4\t! Count down a dword of bytes\n"
+ " STR.ge $zero,[$base+$temp]\t! delay slot"
+ " B.gt loop\t\t! Clearing loop\n" %}
+ ins_encode %{
+ __ mov($zero$$Register, 0);
+ __ mov($temp$$Register, $cnt$$Register);
+ Label(loop);
+ __ bind(loop);
+ __ subs($temp$$Register, $temp$$Register, 4);
+ __ str($zero$$Register, Address($base$$Register, $temp$$Register), ge);
+ __ b(loop, gt);
+ %}
+ ins_pipe(long_memory_op);
+%}
+#endif
+
+#ifdef XXX
+// FIXME: Why R0/R1/R2/R3?
+instruct string_compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+ iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+ predicate(!CompactStrings);
+ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, TEMP tmp1, TEMP tmp2);
+ ins_cost(300);
+ format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // TEMP $tmp1, $tmp2" %}
+ ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2) );
+
+ ins_pipe(long_memory_op);
+%}
+
+// FIXME: Why R0/R1/R2?
+instruct string_equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2,
+ flagsReg ccr) %{
+ predicate(!CompactStrings);
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr);
+
+ ins_cost(300);
+ format %{ "String Equals $str1,$str2,$cnt -> $result // TEMP $tmp1, $tmp2" %}
+ ins_encode( enc_String_Equals(str1, str2, cnt, result, tmp1, tmp2) );
+ ins_pipe(long_memory_op);
+%}
+
+// FIXME: Why R0/R1?
+instruct array_equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result,
+ flagsReg ccr) %{
+ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (AryEq ary1 ary2));
+ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP result, KILL ccr);
+
+ ins_cost(300);
+ format %{ "Array Equals $ary1,$ary2 -> $result // TEMP $tmp1,$tmp2,$tmp3" %}
+ ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, result));
+ ins_pipe(long_memory_op);
+%}
+#endif
+
+//---------- Zeros Count Instructions ------------------------------------------
+
+instruct countLeadingZerosI(iRegI dst, iRegI src) %{
+ match(Set dst (CountLeadingZerosI src));
+ size(4);
+ format %{ "CLZ_32 $dst,$src" %}
+ ins_encode %{
+ __ clz_32($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifdef AARCH64
+instruct countLeadingZerosL(iRegI dst, iRegL src) %{
+ match(Set dst (CountLeadingZerosL src));
+ size(4);
+ format %{ "CLZ $dst,$src" %}
+ ins_encode %{
+ __ clz($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#else
+instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
+ match(Set dst (CountLeadingZerosL src));
+ effect(TEMP tmp, TEMP dst, KILL ccr);
+ size(16);
+ format %{ "CLZ $dst,$src.hi\n\t"
+ "TEQ $dst,32\n\t"
+ "CLZ.eq $tmp,$src.lo\n\t"
+ "ADD.eq $dst, $dst, $tmp\n\t" %}
+ ins_encode %{
+ __ clz($dst$$Register, $src$$Register->successor());
+ __ teq($dst$$Register, 32);
+ __ clz($tmp$$Register, $src$$Register, eq);
+ __ add($dst$$Register, $dst$$Register, $tmp$$Register, eq);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{
+ match(Set dst (CountTrailingZerosI src));
+ effect(TEMP tmp);
+ size(8);
+ format %{ "RBIT_32 $tmp, $src\n\t"
+ "CLZ_32 $dst,$tmp" %}
+ ins_encode %{
+ __ rbit_32($tmp$$Register, $src$$Register);
+ __ clz_32($dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+#ifdef AARCH64
+instruct countTrailingZerosL(iRegI dst, iRegL src, iRegL tmp) %{
+ match(Set dst (CountTrailingZerosL src));
+ effect(TEMP tmp);
+ size(8);
+ format %{ "RBIT $tmp, $src\n\t"
+ "CLZ $dst,$tmp" %}
+ ins_encode %{
+ __ rbit($tmp$$Register, $src$$Register);
+ __ clz($dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#else
+instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
+ match(Set dst (CountTrailingZerosL src));
+ effect(TEMP tmp, TEMP dst, KILL ccr);
+ size(24);
+ format %{ "RBIT $tmp,$src.lo\n\t"
+ "CLZ $dst,$tmp\n\t"
+ "TEQ $dst,32\n\t"
+ "RBIT $tmp,$src.hi\n\t"
+ "CLZ.eq $tmp,$tmp\n\t"
+ "ADD.eq $dst,$dst,$tmp\n\t" %}
+ ins_encode %{
+ __ rbit($tmp$$Register, $src$$Register);
+ __ clz($dst$$Register, $tmp$$Register);
+ __ teq($dst$$Register, 32);
+ __ rbit($tmp$$Register, $src$$Register->successor());
+ __ clz($tmp$$Register, $tmp$$Register, eq);
+ __ add($dst$$Register, $dst$$Register, $tmp$$Register, eq);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+
+//---------- Population Count Instructions -------------------------------------
+
+#ifdef AARCH64
+instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountI src));
+ effect(TEMP tmp);
+ size(20);
+
+ format %{ "MOV_W $dst,$src\n\t"
+ "FMOV_dx $tmp,$dst\n\t"
+ "VCNT $tmp.8B,$tmp.8B\n\t"
+ "ADDV $tmp.B,$tmp.8B\n\t"
+ "FMRS $dst,$tmp" %}
+
+ ins_encode %{
+ __ mov_w($dst$$Register, $src$$Register);
+ __ fmov_dx($tmp$$FloatRegister, $dst$$Register);
+ int quad = 0;
+ int cnt_size = 0; // VELEM_SIZE_8
+ __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size);
+ int add_size = 0; // VELEM_SIZE_8
+ __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size);
+ __ fmrs($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#else
+instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountI src));
+ effect(TEMP tmp);
+
+ format %{ "FMSR $tmp,$src\n\t"
+ "VCNT.8 $tmp,$tmp\n\t"
+ "VPADDL.U8 $tmp,$tmp\n\t"
+ "VPADDL.U16 $tmp,$tmp\n\t"
+ "FMRS $dst,$tmp" %}
+ size(20);
+
+ ins_encode %{
+ __ fmsr($tmp$$FloatRegister, $src$$Register);
+ __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister);
+ __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 8, 0);
+ __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 16, 0);
+ __ fmrs($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif
+
+#ifdef AARCH64
+instruct popCountL(iRegI dst, iRegL src, regD tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountL src));
+ effect(TEMP tmp);
+ size(16);
+
+ format %{ "FMOV_dx $tmp,$src\n\t"
+ "VCNT $tmp.8B,$tmp.8B\n\t"
+ "ADDV $tmp.B,$tmp.8B\n\t"
+ "FMOV_ws $dst,$tmp" %}
+
+ ins_encode %{
+ __ fmov_dx($tmp$$FloatRegister, $src$$Register);
+ int quad = 0;
+ int cnt_size = 0;
+ __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size);
+ int add_size = 0;
+ __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size);
+ __ fmov_ws($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#else
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountL src));
+ effect(TEMP tmp);
+
+ format %{ "FMDRR $tmp,$src.lo,$src.hi\n\t"
+ "VCNT.8 $tmp,$tmp\n\t"
+ "VPADDL.U8 $tmp,$tmp\n\t"
+ "VPADDL.U16 $tmp,$tmp\n\t"
+ "VPADDL.U32 $tmp,$tmp\n\t"
+ "FMRS $dst,$tmp" %}
+
+ size(32);
+
+ ins_encode %{
+ __ fmdrr($tmp$$FloatRegister, $src$$Register, $src$$Register->successor());
+ __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister);
+ __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 8, 0);
+ __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 16, 0);
+ __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 32, 0);
+ __ fmrs($dst$$Register, $tmp$$FloatRegister);
+ %}
+ ins_pipe(ialu_reg);
+%}
+#endif
+
+
+// ============================================================================
+//------------Bytes reverse--------------------------------------------------
+
+instruct bytes_reverse_int(iRegI dst, iRegI src) %{
+ match(Set dst (ReverseBytesI src));
+
+ size(4);
+ format %{ "REV32 $dst,$src" %}
+ ins_encode %{
+#ifdef AARCH64
+ __ rev_w($dst$$Register, $src$$Register);
+ // high 32 bits zeroed, not sign extended
+#else
+ __ rev($dst$$Register, $src$$Register);
+#endif
+ %}
+ ins_pipe( iload_mem ); // FIXME
+%}
+
+instruct bytes_reverse_long(iRegL dst, iRegL src) %{
+ match(Set dst (ReverseBytesL src));
+#ifdef AARCH64
+//size(4);
+ format %{ "REV $dst,$src" %}
+ ins_encode %{
+ __ rev($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // FIXME
+#else
+ effect(TEMP dst);
+ size(8);
+ format %{ "REV $dst.lo,$src.lo\n\t"
+ "REV $dst.hi,$src.hi" %}
+ ins_encode %{
+ __ rev($dst$$Register, $src$$Register->successor());
+ __ rev($dst$$Register->successor(), $src$$Register);
+ %}
+ ins_pipe( iload_mem ); // FIXME
+#endif
+%}
+
+instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
+ match(Set dst (ReverseBytesUS src));
+#ifdef AARCH64
+ size(4);
+ format %{ "REV16_W $dst,$src" %}
+ ins_encode %{
+ __ rev16_w($dst$$Register, $src$$Register);
+ // high 32 bits zeroed
+ %}
+#else
+ size(4);
+ format %{ "REV16 $dst,$src" %}
+ ins_encode %{
+ __ rev16($dst$$Register, $src$$Register);
+ %}
+#endif
+ ins_pipe( iload_mem ); // FIXME
+%}
+
+instruct bytes_reverse_short(iRegI dst, iRegI src) %{
+ match(Set dst (ReverseBytesS src));
+#ifdef AARCH64
+ size(8);
+ format %{ "REV16_W $dst,$src\n\t"
+ "SIGN_EXT16 $dst" %}
+ ins_encode %{
+ __ rev16_w($dst$$Register, $src$$Register);
+ __ sign_extend($dst$$Register, $dst$$Register, 16);
+ %}
+#else
+ size(4);
+ format %{ "REVSH $dst,$src" %}
+ ins_encode %{
+ __ revsh($dst$$Register, $src$$Register);
+ %}
+#endif
+ ins_pipe( iload_mem ); // FIXME
+%}
+
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load Aligned Packed values into a Double Register
+instruct loadV8(vecD dst, memoryD mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 8);
+ match(Set dst (LoadVector mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "FLDD $mem,$dst\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ ldr_double($dst$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(floadD_mem);
+%}
+
+// Load Aligned Packed values into a Double Register Pair
+instruct loadV16(vecX dst, memoryvld mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "VLD1 $mem,$dst.Q\t! load vector (16 bytes)" %}
+ ins_encode %{
+ __ vld1($dst$$FloatRegister, $mem$$Address, MacroAssembler::VELEM_SIZE_16, 128);
+ %}
+ ins_pipe(floadD_mem); // FIXME
+%}
+
+// Store Vector in Double register to memory
+instruct storeV8(memoryD mem, vecD src) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "FSTD $src,$mem\t! store vector (8 bytes)" %}
+ ins_encode %{
+ __ str_double($src$$FloatRegister, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Vector in Double Register Pair to memory
+instruct storeV16(memoryvld mem, vecX src) %{
+ predicate(n->as_StoreVector()->memory_size() == 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "VST1 $src,$mem\t! store vector (16 bytes)" %}
+ ins_encode %{
+ __ vst1($src$$FloatRegister, $mem$$Address, MacroAssembler::VELEM_SIZE_16, 128);
+ %}
+ ins_pipe(fstoreD_mem_reg); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar to packed byte values in Double register
+instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB src));
+ ins_cost(DEFAULT_COST*4);
+ effect(TEMP tmp);
+ size(16);
+
+ // FIXME: could use PKH instruction instead?
+ format %{ "LSL $tmp, $src, 24 \n\t"
+ "OR $tmp, $tmp, ($tmp >> 8) \n\t"
+ "OR $tmp, $tmp, ($tmp >> 16) \n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode %{
+ __ mov($tmp$$Register, AsmOperand($src$$Register, lsl, 24));
+ __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 8));
+ __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 16));
+ __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar to packed byte values in Double register
+instruct Repl8B_reg_simd(vecD dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateB src));
+ size(4);
+
+ format %{ "VDUP.8 $dst,$src\t" %}
+ ins_encode %{
+ bool quad = false;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl16B_reg(vecX dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (ReplicateB src));
+ size(4);
+
+ format %{ "VDUP.8 $dst.Q,$src\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB src));
+ ins_cost(DEFAULT_COST*2);
+ effect(TEMP tmp);
+ size(12);
+
+ format %{ "MOV $tmp, Repl4($src))\n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) );
+ ins_pipe(loadConFD); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar constant to packed byte values in Double register
+// TODO: support negative constants with MVNI?
+instruct Repl8B_immU8(vecD dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateB src));
+ size(4);
+
+ format %{ "VMOV.U8 $dst,$src" %}
+ ins_encode %{
+ bool quad = false;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl16B_immU8(vecX dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateB src));
+ size(4);
+
+ format %{ "VMOV.U8 $dst.Q,$src" %}
+ ins_encode %{
+ bool quad = true;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar to packed short/char values into Double register
+instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS src));
+ ins_cost(DEFAULT_COST*3);
+ effect(TEMP tmp);
+ size(12);
+
+ // FIXME: could use PKH instruction instead?
+ format %{ "LSL $tmp, $src, 16 \n\t"
+ "OR $tmp, $tmp, ($tmp >> 16) \n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode %{
+ __ mov($tmp$$Register, AsmOperand($src$$Register, lsl, 16));
+ __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 16));
+ __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar to packed byte values in Double register
+instruct Repl4S_reg_simd(vecD dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateS src));
+ size(4);
+
+ format %{ "VDUP.16 $dst,$src\t" %}
+ ins_encode %{
+ bool quad = false;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl8S_reg(vecX dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateS src));
+ size(4);
+
+ format %{ "VDUP.16 $dst.Q,$src\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+
+#ifndef AARCH64
+// Replicate scalar constant to packed short/char values in Double register
+instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS src));
+ effect(TEMP tmp);
+ size(12);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "MOV $tmp, Repl2($src))\n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) );
+ ins_pipe(loadConFD); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl4S_immU8(vecD dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateS src));
+ size(4);
+
+ format %{ "VMOV.U16 $dst,$src" %}
+ ins_encode %{
+ bool quad = false;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl8S_immU8(vecX dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateS src));
+ size(4);
+
+ format %{ "VMOV.U16 $dst.Q,$src" %}
+ ins_encode %{
+ bool quad = true;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar to packed int values in Double register
+instruct Repl2I_reg(vecD dst, iRegI src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI src));
+ size(4);
+
+ format %{ "FMDRR $dst,$src,$src\t" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register pair
+instruct Repl4I_reg(vecX dst, iRegI src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateI src));
+ ins_cost(DEFAULT_COST*2);
+ size(8);
+
+ format %{ "FMDRR $dst.lo,$src,$src\n\t"
+ "FMDRR $dst.hi,$src,$src" %}
+
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register);
+ __ fmdrr($dst$$FloatRegister->successor()->successor(),
+ $src$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar to packed int values in Double register
+instruct Repl2I_reg_simd(vecD dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateI src));
+ size(4);
+
+ format %{ "VDUP.32 $dst.D,$src\t" %}
+ ins_encode %{
+ bool quad = false;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register pair
+instruct Repl4I_reg_simd(vecX dst, iRegI src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateI src));
+ size(4);
+
+ format %{ "VDUP.32 $dst.Q,$src\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+
+#ifndef AARCH64
+// Replicate scalar zero constant to packed int values in Double register
+instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI src));
+ effect(TEMP tmp);
+ size(12);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "MOV $tmp, Repl1($src))\n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) );
+ ins_pipe(loadConFD); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl2I_immU8(vecD dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateI src));
+ size(4);
+
+ format %{ "VMOV.I32 $dst.D,$src" %}
+ ins_encode %{
+ bool quad = false;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl4I_immU8(vecX dst, immU8 src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateI src));
+ size(4);
+
+ format %{ "VMOV.I32 $dst.Q,$src" %}
+ ins_encode %{
+ bool quad = true;
+ __ vmovI($dst$$FloatRegister, $src$$constant,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe(loadConFD); // FIXME
+%}
+
+#ifdef AARCH64
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl2L_reg(vecX dst, iRegL src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL src));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "VDUP.2D $dst.Q,$src\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupI($dst$$FloatRegister, $src$$Register,
+ MacroAssembler::VELEM_SIZE_64, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#else /* !AARCH64 */
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl2L_reg(vecX dst, iRegL src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL src));
+ size(8);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FMDRR $dst.D,$src.lo,$src.hi\t\n"
+ "FMDRR $dst.D.next,$src.lo,$src.hi" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+ __ fmdrr($dst$$FloatRegister->successor()->successor(),
+ $src$$Register, $src$$Register->successor());
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_regI(vecD dst, iRegI src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF src));
+ size(4);
+
+ format %{ "FMDRR $dst.D,$src,$src\t" %}
+ ins_encode %{
+ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_reg_vfp(vecD dst, regF src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF src));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ expand %{
+ iRegI tmp;
+ MoveF2I_reg_reg(tmp, src);
+ Repl2F_regI(dst,tmp);
+ %}
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_reg_simd(vecD dst, regF src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (ReplicateF src));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+
+ format %{ "VDUP.32 $dst.D,$src.D\t" %}
+ ins_encode %{
+ bool quad = false;
+ __ vdupF($dst$$FloatRegister, $src$$FloatRegister, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar to packed float values in Double register pair
+instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateF src));
+ effect(TEMP tmp);
+ size(4*3);
+ ins_cost(DEFAULT_COST*3); // FIXME
+
+ format %{ "FMRS $tmp,$src\n\t"
+ "FMDRR $dst.D,$tmp,$tmp\n\t"
+ "FMDRR $dst.D.next,$tmp,$tmp\t" %}
+ ins_encode %{
+ __ fmrs($tmp$$Register, $src$$FloatRegister);
+ __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+ __ fmdrr($dst$$FloatRegister->successor()->successor(),
+ $tmp$$Register, $tmp$$Register);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+#endif /* !AARCH64 */
+
+// Replicate scalar to packed float values in Double register pair
+instruct Repl4F_reg_simd(vecX dst, regF src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (ReplicateF src));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+
+ format %{ "VDUP.32 $dst.Q,$src.D\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupF($dst$$FloatRegister, $src$$FloatRegister, quad);
+ %}
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+#ifndef AARCH64
+// Replicate scalar zero constant to packed float values in Double register
+instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF src));
+ effect(TEMP tmp);
+ size(12);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "MOV $tmp, Repl1($src))\n\t"
+ "FMDRR $dst,$tmp,$tmp\t" %}
+ ins_encode( LdReplImmF(src, dst, tmp) );
+ ins_pipe(loadConFD); // FIXME
+%}
+#endif /* !AAARCH64 */
+
+// Replicate scalar to packed double float values in Double register pair
+instruct Repl2D_reg(vecX dst, regD src) %{
+#ifdef AARCH64
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (ReplicateD src));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "VDUP $dst.2D,$src\t" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupD($dst$$FloatRegister, $src$$FloatRegister, quad);
+ %}
+#else
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateD src));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FCPYD $dst.D.a,$src\n\t"
+ "FCPYD $dst.D.b,$src\t" %}
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src = $src$$FloatRegister;
+ __ fcpyd(dsta, src);
+ FloatRegister dstb = dsta->successor()->successor();
+ __ fcpyd(dstb, src);
+ %}
+#endif
+ ins_pipe(ialu_reg); // FIXME
+%}
+
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB src1 src2));
+ format %{ "VADD.I8 $dst,$src1,$src2\t! add packed8B" %}
+ size(4);
+ ins_encode %{
+ bool quad = false;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src1 src2));
+ size(4);
+ format %{ "VADD.I8 $dst.Q,$src1.Q,$src2.Q\t! add packed16B" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector add
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src1 src2));
+ size(4);
+ format %{ "VADD.I16 $dst,$src1,$src2\t! add packed4S" %}
+ ins_encode %{
+ bool quad = false;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src1 src2));
+ size(4);
+ format %{ "VADD.I16 $dst.Q,$src1.Q,$src2.Q\t! add packed8S" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector add
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI src1 src2));
+ size(4);
+ format %{ "VADD.I32 $dst.D,$src1.D,$src2.D\t! add packed2I" %}
+ ins_encode %{
+ bool quad = false;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI src1 src2));
+ size(4);
+ format %{ "VADD.I32 $dst.Q,$src1.Q,$src2.Q\t! add packed4I" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector add
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL src1 src2));
+ size(4);
+ format %{ "VADD.I64 $dst.Q,$src1.Q,$src2.Q\t! add packed2L" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_64, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector add
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
+ match(Set dst (AddVF src1 src2));
+ size(4);
+ format %{ "VADD.F32 $dst,$src1,$src2\t! add packed2F" %}
+ ins_encode %{
+ bool quad = false;
+ __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+ ins_pipe( faddD_reg_reg ); // FIXME
+%}
+
+#ifndef AARCH64
+instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (AddVF src1 src2));
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ size(4*2);
+ format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t"
+ "FADDS $dst.b,$src1.b,$src2.b" %}
+ ins_encode %{
+ __ add_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ __ add_float($dst$$FloatRegister->successor(),
+ $src1$$FloatRegister->successor(),
+ $src2$$FloatRegister->successor());
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+#endif
+
+instruct vadd4F_reg_simd(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant());
+ match(Set dst (AddVF src1 src2));
+ size(4);
+ format %{ "VADD.F32 $dst.Q,$src1.Q,$src2.Q\t! add packed4F" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+ ins_pipe( faddD_reg_reg ); // FIXME
+%}
+
+#ifdef AARCH64
+instruct vadd2D_reg_simd(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
+ match(Set dst (AddVD src1 src2));
+ size(4);
+ format %{ "VADD.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %}
+ ins_encode %{
+ bool quad = true;
+ __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F64, quad);
+ %}
+ ins_pipe( faddD_reg_reg ); // FIXME
+%}
+#else
+instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (AddVF src1 src2));
+ size(4*4);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t"
+ "FADDS $dst.b,$src1.b,$src2.b\n\t"
+ "FADDS $dst.c,$src1.c,$src2.c\n\t"
+ "FADDS $dst.d,$src1.d,$src2.d" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ add_float(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor();
+ FloatRegister src1b = src1a->successor();
+ FloatRegister src2b = src2a->successor();
+ __ add_float(dstb, src1b, src2b);
+ FloatRegister dstc = dstb->successor();
+ FloatRegister src1c = src1b->successor();
+ FloatRegister src2c = src2b->successor();
+ __ add_float(dstc, src1c, src2c);
+ FloatRegister dstd = dstc->successor();
+ FloatRegister src1d = src1c->successor();
+ FloatRegister src2d = src2c->successor();
+ __ add_float(dstd, src1d, src2d);
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVD src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FADDD $dst.a,$src1.a,$src2.a\n\t"
+ "FADDD $dst.b,$src1.b,$src2.b" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ add_double(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor()->successor();
+ FloatRegister src1b = src1a->successor()->successor();
+ FloatRegister src2b = src2a->successor()->successor();
+ __ add_double(dstb, src1b, src2b);
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+#endif
+
+
+// Bytes vector sub
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src1 src2));
+ size(4);
+ format %{ "VSUB.I8 $dst,$src1,$src2\t! sub packed8B" %}
+ ins_encode %{
+ bool quad = false;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src1 src2));
+ size(4);
+ format %{ "VSUB.I8 $dst.Q,$src1.Q,$src2.Q\t! sub packed16B" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector sub
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src1 src2));
+ size(4);
+ format %{ "VSUB.I16 $dst,$src1,$src2\t! sub packed4S" %}
+ ins_encode %{
+ bool quad = false;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub16S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (SubVS src1 src2));
+ size(4);
+ format %{ "VSUB.I16 $dst.Q,$src1.Q,$src2.Q\t! sub packed8S" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector sub
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVI src1 src2));
+ size(4);
+ format %{ "VSUB.I32 $dst,$src1,$src2\t! sub packed2I" %}
+ ins_encode %{
+ bool quad = false;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVI src1 src2));
+ size(4);
+ format %{ "VSUB.I32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4I" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector sub
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVL src1 src2));
+ size(4);
+ format %{ "VSUB.I64 $dst.Q,$src1.Q,$src2.Q\t! sub packed2L" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_64, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector sub
+instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
+ match(Set dst (SubVF src1 src2));
+ size(4);
+ format %{ "VSUB.F32 $dst,$src1,$src2\t! sub packed2F" %}
+ ins_encode %{
+ bool quad = false;
+ __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+ ins_pipe( faddF_reg_reg ); // FIXME
+%}
+
+#ifndef AARCH64
+instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (SubVF src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t"
+ "FSUBS $dst.b,$src1.b,$src2.b" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ sub_float(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor();
+ FloatRegister src1b = src1a->successor();
+ FloatRegister src2b = src2a->successor();
+ __ sub_float(dstb, src1b, src2b);
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+#endif
+
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant());
+ match(Set dst (SubVF src1 src2));
+ size(4);
+ format %{ "VSUB.F32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4F" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+ ins_pipe( faddF_reg_reg ); // FIXME
+%}
+
+#ifdef AARCH64
+instruct vsub2D_reg_simd(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
+ match(Set dst (SubVD src1 src2));
+ size(4);
+ format %{ "VSUB.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %}
+ ins_encode %{
+ bool quad = true;
+ __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F64, quad);
+ %}
+ ins_pipe( faddD_reg_reg ); // FIXME
+%}
+#else
+instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (SubVF src1 src2));
+ size(4*4);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t"
+ "FSUBS $dst.b,$src1.b,$src2.b\n\t"
+ "FSUBS $dst.c,$src1.c,$src2.c\n\t"
+ "FSUBS $dst.d,$src1.d,$src2.d" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ sub_float(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor();
+ FloatRegister src1b = src1a->successor();
+ FloatRegister src2b = src2a->successor();
+ __ sub_float(dstb, src1b, src2b);
+ FloatRegister dstc = dstb->successor();
+ FloatRegister src1c = src1b->successor();
+ FloatRegister src2c = src2b->successor();
+ __ sub_float(dstc, src1c, src2c);
+ FloatRegister dstd = dstc->successor();
+ FloatRegister src1d = src1c->successor();
+ FloatRegister src2d = src2c->successor();
+ __ sub_float(dstd, src1d, src2d);
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVD src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FSUBD $dst.a,$src1.a,$src2.a\n\t"
+ "FSUBD $dst.b,$src1.b,$src2.b" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ sub_double(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor()->successor();
+ FloatRegister src1b = src1a->successor()->successor();
+ FloatRegister src2b = src2a->successor()->successor();
+ __ sub_double(dstb, src1b, src2b);
+ %}
+
+ ins_pipe(faddF_reg_reg); // FIXME
+%}
+#endif
+
+// Shorts/Chars vector mul
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src1 src2));
+ size(4);
+ format %{ "VMUL.I16 $dst,$src1,$src2\t! mul packed4S" %}
+ ins_encode %{
+ __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, 0);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src1 src2));
+ size(4);
+ format %{ "VMUL.I16 $dst.Q,$src1.Q,$src2.Q\t! mul packed8S" %}
+ ins_encode %{
+ __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, 1);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector mul
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVI src1 src2));
+ size(4);
+ format %{ "VMUL.I32 $dst,$src1,$src2\t! mul packed2I" %}
+ ins_encode %{
+ __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, 0);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (MulVI src1 src2));
+ size(4);
+ format %{ "VMUL.I32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4I" %}
+ ins_encode %{
+ __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, 1);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector mul
+instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
+ match(Set dst (MulVF src1 src2));
+ size(4);
+ format %{ "VMUL.F32 $dst,$src1,$src2\t! mul packed2F" %}
+ ins_encode %{
+ __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, 0);
+ %}
+ ins_pipe( fmulF_reg_reg ); // FIXME
+%}
+
+#ifndef AARCH64
+instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (MulVF src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t"
+ "FMULS $dst.b,$src1.b,$src2.b" %}
+ ins_encode %{
+ __ mul_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ __ mul_float($dst$$FloatRegister->successor(),
+ $src1$$FloatRegister->successor(),
+ $src2$$FloatRegister->successor());
+ %}
+
+ ins_pipe(fmulF_reg_reg); // FIXME
+%}
+#endif
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant());
+ match(Set dst (MulVF src1 src2));
+ size(4);
+ format %{ "VMUL.F32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4F" %}
+ ins_encode %{
+ __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, 1);
+ %}
+ ins_pipe( fmulF_reg_reg ); // FIXME
+%}
+
+#ifndef AARCH64
+instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
+ match(Set dst (MulVF src1 src2));
+ size(4*4);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t"
+ "FMULS $dst.b,$src1.b,$src2.b\n\t"
+ "FMULS $dst.c,$src1.c,$src2.c\n\t"
+ "FMULS $dst.d,$src1.d,$src2.d" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ mul_float(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor();
+ FloatRegister src1b = src1a->successor();
+ FloatRegister src2b = src2a->successor();
+ __ mul_float(dstb, src1b, src2b);
+ FloatRegister dstc = dstb->successor();
+ FloatRegister src1c = src1b->successor();
+ FloatRegister src2c = src2b->successor();
+ __ mul_float(dstc, src1c, src2c);
+ FloatRegister dstd = dstc->successor();
+ FloatRegister src1d = src1c->successor();
+ FloatRegister src2d = src2c->successor();
+ __ mul_float(dstd, src1d, src2d);
+ %}
+
+ ins_pipe(fmulF_reg_reg); // FIXME
+%}
+#endif
+
+#ifdef AARCH64
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (MulVD src1 src2));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "FMUL.2D $dst,$src1,$src2\t! double[2]" %}
+ ins_encode %{
+ int quad = 1;
+ __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F64, quad);
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+%}
+#else
+instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVD src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FMULD $dst.D.a,$src1.D.a,$src2.D.a\n\t"
+ "FMULD $dst.D.b,$src1.D.b,$src2.D.b" %}
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ mul_double(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor()->successor();
+ FloatRegister src1b = src1a->successor()->successor();
+ FloatRegister src2b = src2a->successor()->successor();
+ __ mul_double(dstb, src1b, src2b);
+ %}
+
+ ins_pipe(fmulD_reg_reg); // FIXME
+%}
+#endif
+
+
+// Floats vector div
+instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (DivVF src1 src2));
+#ifdef AARCH64
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "FDIV.2S $dst,$src1,$src2\t! float[2]" %}
+ ins_encode %{
+ int quad = 0;
+ __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+#else
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t"
+ "FDIVS $dst.b,$src1.b,$src2.b" %}
+ ins_encode %{
+ __ div_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+ __ div_float($dst$$FloatRegister->successor(),
+ $src1$$FloatRegister->successor(),
+ $src2$$FloatRegister->successor());
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+#endif
+%}
+
+instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (DivVF src1 src2));
+#ifdef AARCH64
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "FDIV.4S $dst,$src1,$src2\t! float[4]" %}
+ ins_encode %{
+ int quad = 1;
+ __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F32, quad);
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+#else
+ size(4*4);
+ ins_cost(DEFAULT_COST*4); // FIXME
+
+ format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t"
+ "FDIVS $dst.b,$src1.b,$src2.b\n\t"
+ "FDIVS $dst.c,$src1.c,$src2.c\n\t"
+ "FDIVS $dst.d,$src1.d,$src2.d" %}
+
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ div_float(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor();
+ FloatRegister src1b = src1a->successor();
+ FloatRegister src2b = src2a->successor();
+ __ div_float(dstb, src1b, src2b);
+ FloatRegister dstc = dstb->successor();
+ FloatRegister src1c = src1b->successor();
+ FloatRegister src2c = src2b->successor();
+ __ div_float(dstc, src1c, src2c);
+ FloatRegister dstd = dstc->successor();
+ FloatRegister src1d = src1c->successor();
+ FloatRegister src2d = src2c->successor();
+ __ div_float(dstd, src1d, src2d);
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+#endif
+%}
+
+#ifdef AARCH64
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (DivVD src1 src2));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+
+ format %{ "FDIV.2D $dst,$src1,$src2\t! double[2]" %}
+ ins_encode %{
+ int quad = 1;
+ __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ MacroAssembler::VFA_SIZE_F64, quad);
+ %}
+
+ ins_pipe(fdivF_reg_reg); // FIXME
+%}
+#else
+instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (DivVD src1 src2));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "FDIVD $dst.D.a,$src1.D.a,$src2.D.a\n\t"
+ "FDIVD $dst.D.b,$src1.D.b,$src2.D.b" %}
+ ins_encode %{
+ FloatRegister dsta = $dst$$FloatRegister;
+ FloatRegister src1a = $src1$$FloatRegister;
+ FloatRegister src2a = $src2$$FloatRegister;
+ __ div_double(dsta, src1a, src2a);
+ FloatRegister dstb = dsta->successor()->successor();
+ FloatRegister src1b = src1a->successor()->successor();
+ FloatRegister src2b = src2a->successor()->successor();
+ __ div_double(dstb, src1b, src2b);
+ %}
+
+ ins_pipe(fdivD_reg_reg); // FIXME
+%}
+#endif
+
+// --------------------------------- NEG --------------------------------------
+
+instruct vneg8B_reg(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ effect(DEF dst, USE src);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{ "VNEG.S8 $dst.D,$src.D\t! neg packed8B" %}
+ ins_encode %{
+ bool quad = false;
+ __ vnegI($dst$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vneg16B_reg(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ effect(DEF dst, USE src);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{ "VNEG.S8 $dst.Q,$src.Q\t! neg0 packed16B" %}
+ ins_encode %{
+ bool _float = false;
+ bool quad = true;
+ __ vnegI($dst$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------------------ Shift ---------------------------------------
+
+instruct vslcntD(vecD dst, iRegI cnt) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (LShiftCntV cnt));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ Repl8B_reg_simd(dst, cnt);
+ %}
+%}
+
+instruct vslcntX(vecX dst, iRegI cnt) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (LShiftCntV cnt));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ Repl16B_reg(dst, cnt);
+ %}
+%}
+
+// Low bits of vector "shift" elements are used, so it
+// doesn't matter if we treat it as ints or bytes here.
+instruct vsrcntD(vecD dst, iRegI cnt) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd());
+ match(Set dst (RShiftCntV cnt));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+
+ format %{ "VDUP.8 $dst.D,$cnt\n\t"
+ "VNEG.S8 $dst.D,$dst.D\t! neg packed8B" %}
+ ins_encode %{
+ bool quad = false;
+ __ vdupI($dst$$FloatRegister, $cnt$$Register,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ __ vnegI($dst$$FloatRegister, $dst$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrcntX(vecX dst, iRegI cnt) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd());
+ match(Set dst (RShiftCntV cnt));
+ size(4*2);
+ ins_cost(DEFAULT_COST*2); // FIXME
+ format %{ "VDUP.8 $dst.Q,$cnt\n\t"
+ "VNEG.S8 $dst.Q,$dst.Q\t! neg packed16B" %}
+ ins_encode %{
+ bool quad = true;
+ __ vdupI($dst$$FloatRegister, $cnt$$Register,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ __ vnegI($dst$$FloatRegister, $dst$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Byte vector logical left/right shift based on sign
+instruct vsh8B_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U8 $dst.D,$src.D,$shift.D\t! logical left/right shift packed8B"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh16B_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U8 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed16B"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Char vector logical left/right shift based on sign
+instruct vsh4S_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U16 $dst.D,$src.D,$shift.D\t! logical left/right shift packed4S"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh8S_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U16 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed8S"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical left/right shift based on sign
+instruct vsh2I_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U32 $dst.D,$src.D,$shift.D\t! logical left/right shift packed2I"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh4I_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U32 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed4I"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical left/right shift based on sign
+instruct vsh2L_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.U64 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed2L"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_64, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Byte vector left shift
+instruct vsl8B_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVB src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh8B_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl16B_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVB src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh16B_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl8B_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I8 $dst.D,$src.D,$shift\t! logical left shift packed8B"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl16B_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I8 $dst.Q,$src.Q,$shift\t! logical left shift packed16B"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector logical left/right shift
+instruct vsl4S_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (URShiftVS src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh4S_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl8S_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (URShiftVS src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh8S_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl4S_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I16 $dst.D,$src.D,$shift\t! logical left shift packed4S"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl8S_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I16 $dst.Q,$src.Q,$shift\t! logical left shift packed8S"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical left/right shift
+instruct vsl2I_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (URShiftVI src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh2I_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl4I_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd());
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (URShiftVI src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh4I_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl2I_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (LShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I32 $dst.D,$src.D,$shift\t! logical left shift packed2I"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl4I_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd());
+ match(Set dst (LShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I32 $dst.Q,$src.Q,$shift\t! logical left shift packed4I"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical left/right shift
+instruct vsl2L_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL src shift));
+ match(Set dst (URShiftVL src shift));
+ size(4*1);
+ ins_cost(DEFAULT_COST*1); // FIXME
+ expand %{
+ vsh2L_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsl2L_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.I64 $dst.Q,$src.Q,$shift\t! logical left shift packed2L"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshli($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Bytes/Shorts vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Chars vector logical right shift
+instruct vsrl4S_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.U16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl8S_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.U16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical right shift
+instruct vsrl2I_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
+ match(Set dst (URShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.U32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl4I_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd());
+ match(Set dst (URShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.U32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVL src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.U64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Bytes vector arithmetic left/right shift based on sign
+instruct vsha8B_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S8 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed8B"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha16B_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S8 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed16B"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_8, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts vector arithmetic left/right shift based on sign
+instruct vsha4S_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S16 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed4S"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha8S_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S16 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed8S"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_16, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector arithmetic left/right shift based on sign
+instruct vsha2I_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S32 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed2I"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha4I_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S32 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed4I"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_32, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector arithmetic left/right shift based on sign
+instruct vsha2L_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ effect(DEF dst, USE src, USE shift);
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHL.S64 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed2L"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister,
+ MacroAssembler::VELEM_SIZE_64, quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Byte vector arithmetic right shift
+
+instruct vsra8B_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha8B_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsrl16B_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha16B_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsrl8B_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S8 $dst.D,$src.D,$shift\t! logical right shift packed8B"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl16B_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVB src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S8 $dst.Q,$src.Q,$shift\t! logical right shift packed16B"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts vector arithmetic right shift
+instruct vsra4S_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha4S_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha8S_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsra4S_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsra8S_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I_reg(vecD dst, vecD src, vecD shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha2I_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha4I_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsra2I_immI(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
+ %}
+ ins_encode %{
+ bool quad = false;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsra4I_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector arithmetic right shift
+instruct vsra2L_reg(vecX dst, vecX src, vecX shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVL src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ expand %{
+ vsha2L_reg(dst, src, shift);
+ %}
+%}
+
+instruct vsra2L_immI(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVL src shift));
+ size(4);
+ ins_cost(DEFAULT_COST); // FIXME
+ format %{
+ "VSHR.S64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
+ %}
+ ins_encode %{
+ bool quad = true;
+ __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- AND --------------------------------------
+
+instruct vandD(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (AndV src1 src2));
+ format %{ "VAND $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+ ins_encode %{
+ bool quad = false;
+ __ vandI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vandX(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (AndV src1 src2));
+ format %{ "VAND $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ bool quad = true;
+ __ vandI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vorD(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (OrV src1 src2));
+ format %{ "VOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+ ins_encode %{
+ bool quad = false;
+ __ vorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vorX(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (OrV src1 src2));
+ format %{ "VOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ bool quad = true;
+ __ vorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxorD(vecD dst, vecD src1, vecD src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (XorV src1 src2));
+ format %{ "VXOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+ ins_encode %{
+ bool quad = false;
+ __ vxorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vxorX(vecX dst, vecX src1, vecX src2) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src1 src2));
+ format %{ "VXOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ bool quad = true;
+ __ vxorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+ quad);
+ %}
+ ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+// [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser. An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+// match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+// match(Set dst (AddI dst src));
+// effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+// // increment preceeded by register-register move
+// peepmatch ( incI_eReg movI );
+// // require that the destination register of the increment
+// // match the destination register of the move
+// peepconstraint ( 0.dst == 1.dst );
+// // construct a replacement instruction that sets
+// // the destination to ( move's source register + one )
+// peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) );
+// %}
+//
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+// match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+// match(Set dst (LoadI mem));
+// %}
+//
+// peephole %{
+// peepmatch ( loadI storeI );
+// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
+// peepreplace ( storeI( 1.mem 1.mem 1.src ) );
+// %}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// ARM will probably not have any of these rules due to RISC instruction set.
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/arm_32.ad Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,586 @@
+//
+// Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// ARM Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name ( register save type, C convention save type,
+// ideal register type, encoding, vm name );
+// Register Save Types:
+//
+// NS = No-Save: The register allocator assumes that these registers
+// can be used without saving upon entry to the method, &
+// that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call: The register allocator assumes that these registers
+// can be used without saving upon entry to the method,
+// but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, but they do not need to be saved at call
+// sites.
+//
+// AS = Always-Save: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+
+// ----------------------------
+// Integer/Long Registers
+// ----------------------------
+
+reg_def R_R0 (SOC, SOC, Op_RegI, 0, R(0)->as_VMReg());
+reg_def R_R1 (SOC, SOC, Op_RegI, 1, R(1)->as_VMReg());
+reg_def R_R2 (SOC, SOC, Op_RegI, 2, R(2)->as_VMReg());
+reg_def R_R3 (SOC, SOC, Op_RegI, 3, R(3)->as_VMReg());
+reg_def R_R4 (SOC, SOE, Op_RegI, 4, R(4)->as_VMReg());
+reg_def R_R5 (SOC, SOE, Op_RegI, 5, R(5)->as_VMReg());
+reg_def R_R6 (SOC, SOE, Op_RegI, 6, R(6)->as_VMReg());
+reg_def R_R7 (SOC, SOE, Op_RegI, 7, R(7)->as_VMReg());
+reg_def R_R8 (SOC, SOE, Op_RegI, 8, R(8)->as_VMReg());
+reg_def R_R9 (SOC, SOE, Op_RegI, 9, R(9)->as_VMReg());
+reg_def R_R10(NS, SOE, Op_RegI, 10, R(10)->as_VMReg());
+reg_def R_R11(NS, SOE, Op_RegI, 11, R(11)->as_VMReg());
+reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg());
+reg_def R_R13(NS, NS, Op_RegI, 13, R(13)->as_VMReg());
+reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg());
+reg_def R_R15(NS, NS, Op_RegI, 15, R(15)->as_VMReg());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Float Registers
+
+reg_def R_S0 ( SOC, SOC, Op_RegF, 0, S0->as_VMReg());
+reg_def R_S1 ( SOC, SOC, Op_RegF, 1, S1_reg->as_VMReg());
+reg_def R_S2 ( SOC, SOC, Op_RegF, 2, S2_reg->as_VMReg());
+reg_def R_S3 ( SOC, SOC, Op_RegF, 3, S3_reg->as_VMReg());
+reg_def R_S4 ( SOC, SOC, Op_RegF, 4, S4_reg->as_VMReg());
+reg_def R_S5 ( SOC, SOC, Op_RegF, 5, S5_reg->as_VMReg());
+reg_def R_S6 ( SOC, SOC, Op_RegF, 6, S6_reg->as_VMReg());
+reg_def R_S7 ( SOC, SOC, Op_RegF, 7, S7->as_VMReg());
+reg_def R_S8 ( SOC, SOC, Op_RegF, 8, S8->as_VMReg());
+reg_def R_S9 ( SOC, SOC, Op_RegF, 9, S9->as_VMReg());
+reg_def R_S10( SOC, SOC, Op_RegF, 10,S10->as_VMReg());
+reg_def R_S11( SOC, SOC, Op_RegF, 11,S11->as_VMReg());
+reg_def R_S12( SOC, SOC, Op_RegF, 12,S12->as_VMReg());
+reg_def R_S13( SOC, SOC, Op_RegF, 13,S13->as_VMReg());
+reg_def R_S14( SOC, SOC, Op_RegF, 14,S14->as_VMReg());
+reg_def R_S15( SOC, SOC, Op_RegF, 15,S15->as_VMReg());
+reg_def R_S16( SOC, SOE, Op_RegF, 16,S16->as_VMReg());
+reg_def R_S17( SOC, SOE, Op_RegF, 17,S17->as_VMReg());
+reg_def R_S18( SOC, SOE, Op_RegF, 18,S18->as_VMReg());
+reg_def R_S19( SOC, SOE, Op_RegF, 19,S19->as_VMReg());
+reg_def R_S20( SOC, SOE, Op_RegF, 20,S20->as_VMReg());
+reg_def R_S21( SOC, SOE, Op_RegF, 21,S21->as_VMReg());
+reg_def R_S22( SOC, SOE, Op_RegF, 22,S22->as_VMReg());
+reg_def R_S23( SOC, SOE, Op_RegF, 23,S23->as_VMReg());
+reg_def R_S24( SOC, SOE, Op_RegF, 24,S24->as_VMReg());
+reg_def R_S25( SOC, SOE, Op_RegF, 25,S25->as_VMReg());
+reg_def R_S26( SOC, SOE, Op_RegF, 26,S26->as_VMReg());
+reg_def R_S27( SOC, SOE, Op_RegF, 27,S27->as_VMReg());
+reg_def R_S28( SOC, SOE, Op_RegF, 28,S28->as_VMReg());
+reg_def R_S29( SOC, SOE, Op_RegF, 29,S29->as_VMReg());
+reg_def R_S30( SOC, SOE, Op_RegF, 30,S30->as_VMReg());
+reg_def R_S31( SOC, SOE, Op_RegF, 31,S31->as_VMReg());
+
+// Double Registers
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers. In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even. Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+reg_def R_D16 (SOC, SOC, Op_RegD, 32, D16->as_VMReg());
+reg_def R_D16x(SOC, SOC, Op_RegD,255, D16->as_VMReg()->next());
+reg_def R_D17 (SOC, SOC, Op_RegD, 34, D17->as_VMReg());
+reg_def R_D17x(SOC, SOC, Op_RegD,255, D17->as_VMReg()->next());
+reg_def R_D18 (SOC, SOC, Op_RegD, 36, D18->as_VMReg());
+reg_def R_D18x(SOC, SOC, Op_RegD,255, D18->as_VMReg()->next());
+reg_def R_D19 (SOC, SOC, Op_RegD, 38, D19->as_VMReg());
+reg_def R_D19x(SOC, SOC, Op_RegD,255, D19->as_VMReg()->next());
+reg_def R_D20 (SOC, SOC, Op_RegD, 40, D20->as_VMReg());
+reg_def R_D20x(SOC, SOC, Op_RegD,255, D20->as_VMReg()->next());
+reg_def R_D21 (SOC, SOC, Op_RegD, 42, D21->as_VMReg());
+reg_def R_D21x(SOC, SOC, Op_RegD,255, D21->as_VMReg()->next());
+reg_def R_D22 (SOC, SOC, Op_RegD, 44, D22->as_VMReg());
+reg_def R_D22x(SOC, SOC, Op_RegD,255, D22->as_VMReg()->next());
+reg_def R_D23 (SOC, SOC, Op_RegD, 46, D23->as_VMReg());
+reg_def R_D23x(SOC, SOC, Op_RegD,255, D23->as_VMReg()->next());
+reg_def R_D24 (SOC, SOC, Op_RegD, 48, D24->as_VMReg());
+reg_def R_D24x(SOC, SOC, Op_RegD,255, D24->as_VMReg()->next());
+reg_def R_D25 (SOC, SOC, Op_RegD, 50, D25->as_VMReg());
+reg_def R_D25x(SOC, SOC, Op_RegD,255, D25->as_VMReg()->next());
+reg_def R_D26 (SOC, SOC, Op_RegD, 52, D26->as_VMReg());
+reg_def R_D26x(SOC, SOC, Op_RegD,255, D26->as_VMReg()->next());
+reg_def R_D27 (SOC, SOC, Op_RegD, 54, D27->as_VMReg());
+reg_def R_D27x(SOC, SOC, Op_RegD,255, D27->as_VMReg()->next());
+reg_def R_D28 (SOC, SOC, Op_RegD, 56, D28->as_VMReg());
+reg_def R_D28x(SOC, SOC, Op_RegD,255, D28->as_VMReg()->next());
+reg_def R_D29 (SOC, SOC, Op_RegD, 58, D29->as_VMReg());
+reg_def R_D29x(SOC, SOC, Op_RegD,255, D29->as_VMReg()->next());
+reg_def R_D30 (SOC, SOC, Op_RegD, 60, D30->as_VMReg());
+reg_def R_D30x(SOC, SOC, Op_RegD,255, D30->as_VMReg()->next());
+reg_def R_D31 (SOC, SOC, Op_RegD, 62, D31->as_VMReg());
+reg_def R_D31x(SOC, SOC, Op_RegD,255, D31->as_VMReg()->next());
+
+// ----------------------------
+// Special Registers
+// Condition Codes Flag Registers
+reg_def APSR (SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad());
+reg_def FPSCR(SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad());
+
+// ----------------------------
+// Specify the enum values for the registers. These enums are only used by the
+// OptoReg "class". We can convert these enum values at will to VMReg when needed
+// for visibility to the rest of the vm. The order of this enum influences the
+// register allocator so having the freedom to set this order and not be stuck
+// with the order that is natural for the rest of the vm is worth it.
+
+// registers in that order so that R11/R12 is an aligned pair that can be used for longs
+alloc_class chunk0(
+ R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R10, R_R13, R_R14, R_R15, R_R0, R_R1, R_R2, R_R3);
+
+// Note that a register is not allocatable unless it is also mentioned
+// in a widely-used reg_class below.
+
+alloc_class chunk1(
+ R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23,
+ R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31,
+ R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7,
+ R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15,
+ R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
+ R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
+ R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
+ R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x
+);
+
+alloc_class chunk2(APSR, FPSCR);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg ( as defined in frame section )
+// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// ----------------------------
+// Integer Register Classes
+// ----------------------------
+// Exclusions from i_reg:
+// SP (R13), PC (R15)
+// R10: reserved by HotSpot to the TLS register (invariant within Java)
+reg_class int_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
+
+reg_class R0_regI(R_R0);
+reg_class R1_regI(R_R1);
+reg_class R2_regI(R_R2);
+reg_class R3_regI(R_R3);
+reg_class R12_regI(R_R12);
+
+// ----------------------------
+// Pointer Register Classes
+// ----------------------------
+reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
+// Special class for storeP instructions, which can store SP or RPC to TLS.
+// It is also used for memory addressing, allowing direct TLS addressing.
+reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14, R_R10 /* TLS*/, R_R13 /* SP*/);
+
+#define R_Ricklass R_R8
+#define R_Rmethod R_R9
+#define R_Rthread R_R10
+#define R_Rexception_obj R_R4
+
+// Other special pointer regs
+reg_class R0_regP(R_R0);
+reg_class R1_regP(R_R1);
+reg_class R2_regP(R_R2);
+reg_class R4_regP(R_R4);
+reg_class Rexception_regP(R_Rexception_obj);
+reg_class Ricklass_regP(R_Ricklass);
+reg_class Rmethod_regP(R_Rmethod);
+reg_class Rthread_regP(R_Rthread);
+reg_class IP_regP(R_R12);
+reg_class LR_regP(R_R14);
+
+reg_class FP_regP(R_R11);
+
+// ----------------------------
+// Long Register Classes
+// ----------------------------
+reg_class long_reg ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9, R_R11,R_R12);
+// for ldrexd, strexd: first reg of pair must be even
+reg_class long_reg_align ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9);
+
+reg_class R0R1_regL(R_R0,R_R1);
+reg_class R2R3_regL(R_R2,R_R3);
+
+// ----------------------------
+// Special Class for Condition Code Flags Register
+reg_class int_flags(APSR);
+reg_class float_flags(FPSCR);
+
+
+// ----------------------------
+// Float Point Register Classes
+// ----------------------------
+// Skip S14/S15, they are reserved for mem-mem copies
+reg_class sflt_reg(R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13,
+ R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31);
+
+// Paired floating point registers--they show up in the same order as the floats,
+// but they are used with the "Op_RegD" type, and always occur in even/odd pairs.
+reg_class dflt_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
+ R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31,
+ R_D16,R_D16x, R_D17,R_D17x, R_D18,R_D18x, R_D19,R_D19x, R_D20,R_D20x, R_D21,R_D21x, R_D22,R_D22x,
+ R_D23,R_D23x, R_D24,R_D24x, R_D25,R_D25x, R_D26,R_D26x, R_D27,R_D27x, R_D28,R_D28x, R_D29,R_D29x,
+ R_D30,R_D30x, R_D31,R_D31x);
+
+reg_class dflt_low_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
+ R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31);
+
+
+reg_class actual_dflt_reg %{
+ if (VM_Version::has_vfp3_32()) {
+ return DFLT_REG_mask();
+ } else {
+ return DFLT_LOW_REG_mask();
+ }
+%}
+
+reg_class S0_regF(R_S0);
+reg_class D0_regD(R_S0,R_S1);
+reg_class D1_regD(R_S2,R_S3);
+reg_class D2_regD(R_S4,R_S5);
+reg_class D3_regD(R_S6,R_S7);
+reg_class D4_regD(R_S8,R_S9);
+reg_class D5_regD(R_S10,R_S11);
+reg_class D6_regD(R_S12,R_S13);
+reg_class D7_regD(R_S14,R_S15);
+
+reg_class D16_regD(R_D16,R_D16x);
+reg_class D17_regD(R_D17,R_D17x);
+reg_class D18_regD(R_D18,R_D18x);
+reg_class D19_regD(R_D19,R_D19x);
+reg_class D20_regD(R_D20,R_D20x);
+reg_class D21_regD(R_D21,R_D21x);
+reg_class D22_regD(R_D22,R_D22x);
+reg_class D23_regD(R_D23,R_D23x);
+reg_class D24_regD(R_D24,R_D24x);
+reg_class D25_regD(R_D25,R_D25x);
+reg_class D26_regD(R_D26,R_D26x);
+reg_class D27_regD(R_D27,R_D27x);
+reg_class D28_regD(R_D28,R_D28x);
+reg_class D29_regD(R_D29,R_D29x);
+reg_class D30_regD(R_D30,R_D30x);
+reg_class D31_regD(R_D31,R_D31x);
+
+reg_class vectorx_reg(R_S0,R_S1,R_S2,R_S3, R_S4,R_S5,R_S6,R_S7,
+ R_S8,R_S9,R_S10,R_S11, /* skip S14/S15 */
+ R_S16,R_S17,R_S18,R_S19, R_S20,R_S21,R_S22,R_S23,
+ R_S24,R_S25,R_S26,R_S27, R_S28,R_S29,R_S30,R_S31,
+ R_D16,R_D16x,R_D17,R_D17x, R_D18,R_D18x,R_D19,R_D19x,
+ R_D20,R_D20x,R_D21,R_D21x, R_D22,R_D22x,R_D23,R_D23x,
+ R_D24,R_D24x,R_D25,R_D25x, R_D26,R_D26x,R_D27,R_D27x,
+ R_D28,R_D28x,R_D29,R_D29x, R_D30,R_D30x,R_D31,R_D31x);
+
+%}
+
+source_hpp %{
+// FIXME
+const MachRegisterNumbers R_mem_copy_lo_num = R_S14_num;
+const MachRegisterNumbers R_mem_copy_hi_num = R_S15_num;
+const FloatRegister Rmemcopy = S14;
+const MachRegisterNumbers R_hf_ret_lo_num = R_S0_num;
+const MachRegisterNumbers R_hf_ret_hi_num = R_S1_num;
+
+const MachRegisterNumbers R_Ricklass_num = R_R8_num;
+const MachRegisterNumbers R_Rmethod_num = R_R9_num;
+
+#define LDR_DOUBLE "FLDD"
+#define LDR_FLOAT "FLDS"
+#define STR_DOUBLE "FSTD"
+#define STR_FLOAT "FSTS"
+#define LDR_64 "LDRD"
+#define STR_64 "STRD"
+#define LDR_32 "LDR"
+#define STR_32 "STR"
+#define MOV_DOUBLE "FCPYD"
+#define MOV_FLOAT "FCPYS"
+#define FMSR "FMSR"
+#define FMRS "FMRS"
+#define LDREX "ldrex "
+#define STREX "strex "
+
+#define str_64 strd
+#define ldr_64 ldrd
+#define ldr_32 ldr
+#define ldrex ldrex
+#define strex strex
+
+static inline bool is_memoryD(int offset) {
+ return offset < 1024 && offset > -1024;
+}
+
+static inline bool is_memoryfp(int offset) {
+ return offset < 1024 && offset > -1024;
+}
+
+static inline bool is_memoryI(int offset) {
+ return offset < 4096 && offset > -4096;
+}
+
+static inline bool is_memoryP(int offset) {
+ return offset < 4096 && offset > -4096;
+}
+
+static inline bool is_memoryHD(int offset) {
+ return offset < 256 && offset > -256;
+}
+
+static inline bool is_aimm(int imm) {
+ return AsmOperand::is_rotated_imm(imm);
+}
+
+static inline bool is_limmI(jint imm) {
+ return AsmOperand::is_rotated_imm(imm);
+}
+
+static inline bool is_limmI_low(jint imm, int n) {
+ int imml = imm & right_n_bits(n);
+ return is_limmI(imml) || is_limmI(imm);
+}
+
+static inline int limmI_low(jint imm, int n) {
+ int imml = imm & right_n_bits(n);
+ return is_limmI(imml) ? imml : imm;
+}
+
+%}
+
+source %{
+
+// Given a register encoding, produce a Integer Register object
+static Register reg_to_register_object(int register_encoding) {
+ assert(R0->encoding() == R_R0_enc && R15->encoding() == R_R15_enc, "right coding");
+ return as_Register(register_encoding);
+}
+
+// Given a register encoding, produce a single-precision Float Register object
+static FloatRegister reg_to_FloatRegister_object(int register_encoding) {
+ assert(S0->encoding() == R_S0_enc && S31->encoding() == R_S31_enc, "right coding");
+ return as_FloatRegister(register_encoding);
+}
+
+void Compile::pd_compiler2_init() {
+ // Umimplemented
+}
+
+// Location of compiled Java return values. Same as C
+OptoRegPair c2::return_value(int ideal_reg) {
+ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+#ifndef __ABI_HARD__
+ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_R0_num, R_R0_num, R_R0_num };
+ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_R1_num, R_R1_num };
+#else
+ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num };
+ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_hf_ret_hi_num, R_R1_num };
+#endif
+ return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
+}
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+// from the start of the call to the point where the return address
+// will point.
+
+int MachCallStaticJavaNode::ret_addr_offset() {
+ bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable();
+ return ((far ? 3 : 1) + (_method_handle_invoke ? 1 : 0)) *
+ NativeInstruction::instruction_size;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+ bool far = !cache_reachable();
+ // mov_oop is always 2 words
+ return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size;
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+ // bl or movw; movt; blx
+ bool far = maybe_far_call(this);
+ return (far ? 3 : 1) * NativeInstruction::instruction_size;
+}
+%}
+
+// The intptr_t operand types, defined by textual substitution.
+// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.)
+#define immX immI
+#define immXRot immIRot
+#define iRegX iRegI
+#define aimmX aimmI
+#define limmX limmI
+#define immX10x2 immI10x2
+#define LShiftX LShiftI
+#define shimmX immU5
+
+// Compatibility interface
+#define aimmP immPRot
+#define immIMov immIRot
+
+#define store_RegL iRegL
+#define store_RegLd iRegLd
+#define store_RegI iRegI
+#define store_ptr_RegP iRegP
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1); // Required cost attribute
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+
+operand immIRot() %{
+ predicate(AsmOperand::is_rotated_imm(n->get_int()));
+ match(ConI);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIRotn() %{
+ predicate(n->get_int() != 0 && AsmOperand::is_rotated_imm(~n->get_int()));
+ match(ConI);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIRotneg() %{
+ // if AsmOperand::is_rotated_imm() is true for this constant, it is
+ // a immIRot and an optimal instruction combination exists to handle the
+ // constant as an immIRot
+ predicate(!AsmOperand::is_rotated_imm(n->get_int()) && AsmOperand::is_rotated_imm(-n->get_int()));
+ match(ConI);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Non-negative integer immediate that is encodable using the rotation scheme,
+// and that when expanded fits in 31 bits.
+operand immU31Rot() %{
+ predicate((0 <= n->get_int()) && AsmOperand::is_rotated_imm(n->get_int()));
+ match(ConI);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immPRot() %{
+ predicate(n->get_ptr() == 0 || (AsmOperand::is_rotated_imm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
+
+ match(ConP);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLlowRot() %{
+ predicate(n->get_long() >> 32 == 0 && AsmOperand::is_rotated_imm((int)n->get_long()));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLRot2() %{
+ predicate(AsmOperand::is_rotated_imm((int)(n->get_long() >> 32)) &&
+ AsmOperand::is_rotated_imm((int)(n->get_long())));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 12-bit - for addressing mode
+operand immI12() %{
+ predicate((-4096 < n->get_int()) && (n->get_int() < 4096));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 10-bit disp and disp+4 - for addressing float pair
+operand immI10x2() %{
+ predicate((-1024 < n->get_int()) && (n->get_int() < 1024 - 4));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer Immediate: 12-bit disp and disp+4 - for addressing word pair
+operand immI12x2() %{
+ predicate((-4096 < n->get_int()) && (n->get_int() < 4096 - 4));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/arm_64.ad Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,998 @@
+//
+// Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// ARM Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name ( register save type, C convention save type,
+// ideal register type, encoding, vm name );
+// Register Save Types:
+//
+// NS = No-Save: The register allocator assumes that these registers
+// can be used without saving upon entry to the method, &
+// that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call: The register allocator assumes that these registers
+// can be used without saving upon entry to the method,
+// but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, but they do not need to be saved at call
+// sites.
+//
+// AS = Always-Save: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+// FIXME: above comment seems wrong. Spill done through MachSpillCopyNode
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+
+// ----------------------------
+// Integer/Long Registers
+// ----------------------------
+
+// TODO: would be nice to keep track of high-word state:
+// zeroRegI --> RegL
+// signedRegI --> RegL
+// junkRegI --> RegL
+// how to tell C2 to treak RegI as RegL, or RegL as RegI?
+reg_def R_R0 (SOC, SOC, Op_RegI, 0, R0->as_VMReg());
+reg_def R_R0x (SOC, SOC, Op_RegI, 255, R0->as_VMReg()->next());
+reg_def R_R1 (SOC, SOC, Op_RegI, 1, R1->as_VMReg());
+reg_def R_R1x (SOC, SOC, Op_RegI, 255, R1->as_VMReg()->next());
+reg_def R_R2 (SOC, SOC, Op_RegI, 2, R2->as_VMReg());
+reg_def R_R2x (SOC, SOC, Op_RegI, 255, R2->as_VMReg()->next());
+reg_def R_R3 (SOC, SOC, Op_RegI, 3, R3->as_VMReg());
+reg_def R_R3x (SOC, SOC, Op_RegI, 255, R3->as_VMReg()->next());
+reg_def R_R4 (SOC, SOC, Op_RegI, 4, R4->as_VMReg());
+reg_def R_R4x (SOC, SOC, Op_RegI, 255, R4->as_VMReg()->next());
+reg_def R_R5 (SOC, SOC, Op_RegI, 5, R5->as_VMReg());
+reg_def R_R5x (SOC, SOC, Op_RegI, 255, R5->as_VMReg()->next());
+reg_def R_R6 (SOC, SOC, Op_RegI, 6, R6->as_VMReg());
+reg_def R_R6x (SOC, SOC, Op_RegI, 255, R6->as_VMReg()->next());
+reg_def R_R7 (SOC, SOC, Op_RegI, 7, R7->as_VMReg());
+reg_def R_R7x (SOC, SOC, Op_RegI, 255, R7->as_VMReg()->next());
+
+reg_def R_R8 (SOC, SOC, Op_RegI, 8, R8->as_VMReg());
+reg_def R_R8x (SOC, SOC, Op_RegI, 255, R8->as_VMReg()->next());
+reg_def R_R9 (SOC, SOC, Op_RegI, 9, R9->as_VMReg());
+reg_def R_R9x (SOC, SOC, Op_RegI, 255, R9->as_VMReg()->next());
+reg_def R_R10 (SOC, SOC, Op_RegI, 10, R10->as_VMReg());
+reg_def R_R10x(SOC, SOC, Op_RegI, 255, R10->as_VMReg()->next());
+reg_def R_R11 (SOC, SOC, Op_RegI, 11, R11->as_VMReg());
+reg_def R_R11x(SOC, SOC, Op_RegI, 255, R11->as_VMReg()->next());
+reg_def R_R12 (SOC, SOC, Op_RegI, 12, R12->as_VMReg());
+reg_def R_R12x(SOC, SOC, Op_RegI, 255, R12->as_VMReg()->next());
+reg_def R_R13 (SOC, SOC, Op_RegI, 13, R13->as_VMReg());
+reg_def R_R13x(SOC, SOC, Op_RegI, 255, R13->as_VMReg()->next());
+reg_def R_R14 (SOC, SOC, Op_RegI, 14, R14->as_VMReg());
+reg_def R_R14x(SOC, SOC, Op_RegI, 255, R14->as_VMReg()->next());
+reg_def R_R15 (SOC, SOC, Op_RegI, 15, R15->as_VMReg());
+reg_def R_R15x(SOC, SOC, Op_RegI, 255, R15->as_VMReg()->next());
+
+reg_def R_R16 (SOC, SOC, Op_RegI, 16, R16->as_VMReg()); // IP0
+reg_def R_R16x(SOC, SOC, Op_RegI, 255, R16->as_VMReg()->next());
+reg_def R_R17 (SOC, SOC, Op_RegI, 17, R17->as_VMReg()); // IP1
+reg_def R_R17x(SOC, SOC, Op_RegI, 255, R17->as_VMReg()->next());
+reg_def R_R18 (SOC, SOC, Op_RegI, 18, R18->as_VMReg()); // Platform Register
+reg_def R_R18x(SOC, SOC, Op_RegI, 255, R18->as_VMReg()->next());
+
+reg_def R_R19 (SOC, SOE, Op_RegI, 19, R19->as_VMReg());
+reg_def R_R19x(SOC, SOE, Op_RegI, 255, R19->as_VMReg()->next());
+reg_def R_R20 (SOC, SOE, Op_RegI, 20, R20->as_VMReg());
+reg_def R_R20x(SOC, SOE, Op_RegI, 255, R20->as_VMReg()->next());
+reg_def R_R21 (SOC, SOE, Op_RegI, 21, R21->as_VMReg());
+reg_def R_R21x(SOC, SOE, Op_RegI, 255, R21->as_VMReg()->next());
+reg_def R_R22 (SOC, SOE, Op_RegI, 22, R22->as_VMReg());
+reg_def R_R22x(SOC, SOE, Op_RegI, 255, R22->as_VMReg()->next());
+reg_def R_R23 (SOC, SOE, Op_RegI, 23, R23->as_VMReg());
+reg_def R_R23x(SOC, SOE, Op_RegI, 255, R23->as_VMReg()->next());
+reg_def R_R24 (SOC, SOE, Op_RegI, 24, R24->as_VMReg());
+reg_def R_R24x(SOC, SOE, Op_RegI, 255, R24->as_VMReg()->next());
+reg_def R_R25 (SOC, SOE, Op_RegI, 25, R25->as_VMReg());
+reg_def R_R25x(SOC, SOE, Op_RegI, 255, R25->as_VMReg()->next());
+reg_def R_R26 (SOC, SOE, Op_RegI, 26, R26->as_VMReg());
+reg_def R_R26x(SOC, SOE, Op_RegI, 255, R26->as_VMReg()->next());
+reg_def R_R27 (SOC, SOE, Op_RegI, 27, R27->as_VMReg()); // Rheap_base
+reg_def R_R27x(SOC, SOE, Op_RegI, 255, R27->as_VMReg()->next()); // Rheap_base
+reg_def R_R28 ( NS, SOE, Op_RegI, 28, R28->as_VMReg()); // TLS
+reg_def R_R28x( NS, SOE, Op_RegI, 255, R28->as_VMReg()->next()); // TLS
+
+reg_def R_R29 ( NS, SOE, Op_RegI, 29, R29->as_VMReg()); // FP
+reg_def R_R29x( NS, SOE, Op_RegI, 255, R29->as_VMReg()->next()); // FP
+reg_def R_R30 (SOC, SOC, Op_RegI, 30, R30->as_VMReg()); // LR
+reg_def R_R30x(SOC, SOC, Op_RegI, 255, R30->as_VMReg()->next()); // LR
+
+reg_def R_ZR ( NS, NS, Op_RegI, 31, ZR->as_VMReg()); // ZR
+reg_def R_ZRx( NS, NS, Op_RegI, 255, ZR->as_VMReg()->next()); // ZR
+
+// FIXME
+//reg_def R_SP ( NS, NS, Op_RegP, 32, SP->as_VMReg());
+reg_def R_SP ( NS, NS, Op_RegI, 32, SP->as_VMReg());
+//reg_def R_SPx( NS, NS, Op_RegP, 255, SP->as_VMReg()->next());
+reg_def R_SPx( NS, NS, Op_RegI, 255, SP->as_VMReg()->next());
+
+// ----------------------------
+// Float/Double/Vector Registers
+// ----------------------------
+
+reg_def R_V0(SOC, SOC, Op_RegF, 0, V0->as_VMReg());
+reg_def R_V1(SOC, SOC, Op_RegF, 1, V1->as_VMReg());
+reg_def R_V2(SOC, SOC, Op_RegF, 2, V2->as_VMReg());
+reg_def R_V3(SOC, SOC, Op_RegF, 3, V3->as_VMReg());
+reg_def R_V4(SOC, SOC, Op_RegF, 4, V4->as_VMReg());
+reg_def R_V5(SOC, SOC, Op_RegF, 5, V5->as_VMReg());
+reg_def R_V6(SOC, SOC, Op_RegF, 6, V6->as_VMReg());
+reg_def R_V7(SOC, SOC, Op_RegF, 7, V7->as_VMReg());
+reg_def R_V8(SOC, SOC, Op_RegF, 8, V8->as_VMReg());
+reg_def R_V9(SOC, SOC, Op_RegF, 9, V9->as_VMReg());
+reg_def R_V10(SOC, SOC, Op_RegF, 10, V10->as_VMReg());
+reg_def R_V11(SOC, SOC, Op_RegF, 11, V11->as_VMReg());
+reg_def R_V12(SOC, SOC, Op_RegF, 12, V12->as_VMReg());
+reg_def R_V13(SOC, SOC, Op_RegF, 13, V13->as_VMReg());
+reg_def R_V14(SOC, SOC, Op_RegF, 14, V14->as_VMReg());
+reg_def R_V15(SOC, SOC, Op_RegF, 15, V15->as_VMReg());
+reg_def R_V16(SOC, SOC, Op_RegF, 16, V16->as_VMReg());
+reg_def R_V17(SOC, SOC, Op_RegF, 17, V17->as_VMReg());
+reg_def R_V18(SOC, SOC, Op_RegF, 18, V18->as_VMReg());
+reg_def R_V19(SOC, SOC, Op_RegF, 19, V19->as_VMReg());
+reg_def R_V20(SOC, SOC, Op_RegF, 20, V20->as_VMReg());
+reg_def R_V21(SOC, SOC, Op_RegF, 21, V21->as_VMReg());
+reg_def R_V22(SOC, SOC, Op_RegF, 22, V22->as_VMReg());
+reg_def R_V23(SOC, SOC, Op_RegF, 23, V23->as_VMReg());
+reg_def R_V24(SOC, SOC, Op_RegF, 24, V24->as_VMReg());
+reg_def R_V25(SOC, SOC, Op_RegF, 25, V25->as_VMReg());
+reg_def R_V26(SOC, SOC, Op_RegF, 26, V26->as_VMReg());
+reg_def R_V27(SOC, SOC, Op_RegF, 27, V27->as_VMReg());
+reg_def R_V28(SOC, SOC, Op_RegF, 28, V28->as_VMReg());
+reg_def R_V29(SOC, SOC, Op_RegF, 29, V29->as_VMReg());
+reg_def R_V30(SOC, SOC, Op_RegF, 30, V30->as_VMReg());
+reg_def R_V31(SOC, SOC, Op_RegF, 31, V31->as_VMReg());
+
+reg_def R_V0b(SOC, SOC, Op_RegF, 255, V0->as_VMReg()->next(1));
+reg_def R_V1b(SOC, SOC, Op_RegF, 255, V1->as_VMReg()->next(1));
+reg_def R_V2b(SOC, SOC, Op_RegF, 255, V2->as_VMReg()->next(1));
+reg_def R_V3b(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(1));
+reg_def R_V4b(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(1));
+reg_def R_V5b(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(1));
+reg_def R_V6b(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(1));
+reg_def R_V7b(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(1));
+reg_def R_V8b(SOC, SOC, Op_RegF, 255, V8->as_VMReg()->next(1));
+reg_def R_V9b(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(1));
+reg_def R_V10b(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(1));
+reg_def R_V11b(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(1));
+reg_def R_V12b(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(1));
+reg_def R_V13b(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(1));
+reg_def R_V14b(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(1));
+reg_def R_V15b(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(1));
+reg_def R_V16b(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(1));
+reg_def R_V17b(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(1));
+reg_def R_V18b(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(1));
+reg_def R_V19b(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(1));
+reg_def R_V20b(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(1));
+reg_def R_V21b(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(1));
+reg_def R_V22b(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(1));
+reg_def R_V23b(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(1));
+reg_def R_V24b(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(1));
+reg_def R_V25b(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(1));
+reg_def R_V26b(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(1));
+reg_def R_V27b(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(1));
+reg_def R_V28b(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(1));
+reg_def R_V29b(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(1));
+reg_def R_V30b(SOC, SOC, Op_RegD, 30, V30->as_VMReg()->next(1));
+reg_def R_V31b(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(1));
+
+reg_def R_V0c(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(2));
+reg_def R_V1c(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(2));
+reg_def R_V2c(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(2));
+reg_def R_V3c(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(2));
+reg_def R_V4c(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(2));
+reg_def R_V5c(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(2));
+reg_def R_V6c(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(2));
+reg_def R_V7c(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(2));
+reg_def R_V8c(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(2));
+reg_def R_V9c(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(2));
+reg_def R_V10c(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(2));
+reg_def R_V11c(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(2));
+reg_def R_V12c(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(2));
+reg_def R_V13c(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(2));
+reg_def R_V14c(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(2));
+reg_def R_V15c(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(2));
+reg_def R_V16c(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(2));
+reg_def R_V17c(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(2));
+reg_def R_V18c(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(2));
+reg_def R_V19c(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(2));
+reg_def R_V20c(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(2));
+reg_def R_V21c(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(2));
+reg_def R_V22c(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(2));
+reg_def R_V23c(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(2));
+reg_def R_V24c(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(2));
+reg_def R_V25c(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(2));
+reg_def R_V26c(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(2));
+reg_def R_V27c(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(2));
+reg_def R_V28c(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(2));
+reg_def R_V29c(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(2));
+reg_def R_V30c(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(2));
+reg_def R_V31c(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(2));
+
+reg_def R_V0d(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(3));
+reg_def R_V1d(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(3));
+reg_def R_V2d(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(3));
+reg_def R_V3d(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(3));
+reg_def R_V4d(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(3));
+reg_def R_V5d(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(3));
+reg_def R_V6d(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(3));
+reg_def R_V7d(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(3));
+reg_def R_V8d(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(3));
+reg_def R_V9d(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(3));
+reg_def R_V10d(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(3));
+reg_def R_V11d(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(3));
+reg_def R_V12d(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(3));
+reg_def R_V13d(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(3));
+reg_def R_V14d(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(3));
+reg_def R_V15d(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(3));
+reg_def R_V16d(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(3));
+reg_def R_V17d(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(3));
+reg_def R_V18d(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(3));
+reg_def R_V19d(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(3));
+reg_def R_V20d(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(3));
+reg_def R_V21d(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(3));
+reg_def R_V22d(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(3));
+reg_def R_V23d(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(3));
+reg_def R_V24d(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(3));
+reg_def R_V25d(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(3));
+reg_def R_V26d(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(3));
+reg_def R_V27d(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(3));
+reg_def R_V28d(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(3));
+reg_def R_V29d(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(3));
+reg_def R_V30d(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(3));
+reg_def R_V31d(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(3));
+
+// ----------------------------
+// Special Registers
+// Condition Codes Flag Registers
+reg_def APSR (SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad());
+reg_def FPSCR(SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad());
+
+// ----------------------------
+// Specify the enum values for the registers. These enums are only used by the
+// OptoReg "class". We can convert these enum values at will to VMReg when needed
+// for visibility to the rest of the vm. The order of this enum influences the
+// register allocator so having the freedom to set this order and not be stuck
+// with the order that is natural for the rest of the vm is worth it.
+
+// Quad vector must be aligned here, so list them first.
+alloc_class fprs(
+ R_V8, R_V8b, R_V8c, R_V8d, R_V9, R_V9b, R_V9c, R_V9d,
+ R_V10, R_V10b, R_V10c, R_V10d, R_V11, R_V11b, R_V11c, R_V11d,
+ R_V12, R_V12b, R_V12c, R_V12d, R_V13, R_V13b, R_V13c, R_V13d,
+ R_V14, R_V14b, R_V14c, R_V14d, R_V15, R_V15b, R_V15c, R_V15d,
+ R_V16, R_V16b, R_V16c, R_V16d, R_V17, R_V17b, R_V17c, R_V17d,
+ R_V18, R_V18b, R_V18c, R_V18d, R_V19, R_V19b, R_V19c, R_V19d,
+ R_V20, R_V20b, R_V20c, R_V20d, R_V21, R_V21b, R_V21c, R_V21d,
+ R_V22, R_V22b, R_V22c, R_V22d, R_V23, R_V23b, R_V23c, R_V23d,
+ R_V24, R_V24b, R_V24c, R_V24d, R_V25, R_V25b, R_V25c, R_V25d,
+ R_V26, R_V26b, R_V26c, R_V26d, R_V27, R_V27b, R_V27c, R_V27d,
+ R_V28, R_V28b, R_V28c, R_V28d, R_V29, R_V29b, R_V29c, R_V29d,
+ R_V30, R_V30b, R_V30c, R_V30d, R_V31, R_V31b, R_V31c, R_V31d,
+ R_V0, R_V0b, R_V0c, R_V0d, R_V1, R_V1b, R_V1c, R_V1d,
+ R_V2, R_V2b, R_V2c, R_V2d, R_V3, R_V3b, R_V3c, R_V3d,
+ R_V4, R_V4b, R_V4c, R_V4d, R_V5, R_V5b, R_V5c, R_V5d,
+ R_V6, R_V6b, R_V6c, R_V6d, R_V7, R_V7b, R_V7c, R_V7d
+);
+
+// Need double-register alignment here.
+// We are already quad-register aligned because of vectors above.
+alloc_class gprs(
+ R_R0, R_R0x, R_R1, R_R1x, R_R2, R_R2x, R_R3, R_R3x,
+ R_R4, R_R4x, R_R5, R_R5x, R_R6, R_R6x, R_R7, R_R7x,
+ R_R8, R_R8x, R_R9, R_R9x, R_R10, R_R10x, R_R11, R_R11x,
+ R_R12, R_R12x, R_R13, R_R13x, R_R14, R_R14x, R_R15, R_R15x,
+ R_R16, R_R16x, R_R17, R_R17x, R_R18, R_R18x, R_R19, R_R19x,
+ R_R20, R_R20x, R_R21, R_R21x, R_R22, R_R22x, R_R23, R_R23x,
+ R_R24, R_R24x, R_R25, R_R25x, R_R26, R_R26x, R_R27, R_R27x,
+ R_R28, R_R28x, R_R29, R_R29x, R_R30, R_R30x
+);
+// Continuing with double-reigister alignment...
+alloc_class chunk2(APSR, FPSCR);
+alloc_class chunk3(R_SP, R_SPx);
+alloc_class chunk4(R_ZR, R_ZRx);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg ( as defined in frame section )
+// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// ----------------------------
+// Integer Register Classes
+// ----------------------------
+reg_class int_reg_all(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7,
+ R_R8, R_R9, R_R10, R_R11, R_R12, R_R13, R_R14, R_R15,
+ R_R16, R_R17, R_R18, R_R19, R_R20, R_R21, R_R22, R_R23,
+ R_R24, R_R25, R_R26, R_R27, R_R28, R_R29, R_R30
+);
+
+// Exclusions from i_reg:
+// SP (R31)
+// Rthread/R28: reserved by HotSpot to the TLS register (invariant within Java)
+reg_class int_reg %{
+ return _INT_REG_mask;
+%}
+reg_class ptr_reg %{
+ return _PTR_REG_mask;
+%}
+reg_class vectorx_reg %{
+ return _VECTORX_REG_mask;
+%}
+
+reg_class R0_regI(R_R0);
+reg_class R1_regI(R_R1);
+reg_class R2_regI(R_R2);
+reg_class R3_regI(R_R3);
+//reg_class R12_regI(R_R12);
+
+// ----------------------------
+// Pointer Register Classes
+// ----------------------------
+
+// Special class for storeP instructions, which can store SP or RPC to TLS.
+// It is also used for memory addressing, allowing direct TLS addressing.
+
+reg_class sp_ptr_reg %{
+ return _SP_PTR_REG_mask;
+%}
+
+reg_class store_reg %{
+ return _STR_REG_mask;
+%}
+
+reg_class store_ptr_reg %{
+ return _STR_PTR_REG_mask;
+%}
+
+reg_class spillP_reg %{
+ return _SPILLP_REG_mask;
+%}
+
+// Other special pointer regs
+reg_class R0_regP(R_R0, R_R0x);
+reg_class R1_regP(R_R1, R_R1x);
+reg_class R2_regP(R_R2, R_R2x);
+reg_class Rexception_regP(R_R19, R_R19x);
+reg_class Ricklass_regP(R_R8, R_R8x);
+reg_class Rmethod_regP(R_R27, R_R27x);
+
+reg_class Rthread_regP(R_R28, R_R28x);
+reg_class IP_regP(R_R16, R_R16x);
+#define RtempRegP IPRegP
+reg_class LR_regP(R_R30, R_R30x);
+
+reg_class SP_regP(R_SP, R_SPx);
+reg_class FP_regP(R_R29, R_R29x);
+
+reg_class ZR_regP(R_ZR, R_ZRx);
+reg_class ZR_regI(R_ZR);
+
+// ----------------------------
+// Long Register Classes
+// ----------------------------
+reg_class long_reg %{ return _PTR_REG_mask; %}
+// for ldrexd, strexd: first reg of pair must be even
+reg_class long_reg_align %{ return LONG_REG_mask(); %}
+
+reg_class R0_regL(R_R0,R_R0x); // arg 1 or return value
+
+// ----------------------------
+// Special Class for Condition Code Flags Register
+reg_class int_flags(APSR);
+reg_class float_flags(FPSCR);
+
+
+// ----------------------------
+// Float Point Register Classes
+// ----------------------------
+reg_class sflt_reg_0(
+ R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7,
+ R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15,
+ R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23,
+ R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, R_V31);
+
+reg_class sflt_reg %{
+ return _SFLT_REG_mask;
+%}
+
+reg_class dflt_low_reg %{
+ return _DFLT_REG_mask;
+%}
+
+reg_class actual_dflt_reg %{
+ return _DFLT_REG_mask;
+%}
+
+reg_class vectorx_reg_0(
+ R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7,
+ R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15,
+ R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23,
+ R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, /*R_V31,*/
+ R_V0b, R_V1b, R_V2b, R_V3b, R_V4b, R_V5b, R_V6b, R_V7b,
+ R_V8b, R_V9b, R_V10b, R_V11b, R_V12b, R_V13b, R_V14b, R_V15b,
+ R_V16b, R_V17b, R_V18b, R_V19b, R_V20b, R_V21b, R_V22b, R_V23b,
+ R_V24b, R_V25b, R_V26b, R_V27b, R_V28b, R_V29b, R_V30b, /*R_V31b,*/
+ R_V0c, R_V1c, R_V2c, R_V3c, R_V4c, R_V5c, R_V6c, R_V7c,
+ R_V8c, R_V9c, R_V10c, R_V11c, R_V12c, R_V13c, R_V14c, R_V15c,
+ R_V16c, R_V17c, R_V18c, R_V19c, R_V20c, R_V21c, R_V22c, R_V23c,
+ R_V24c, R_V25c, R_V26c, R_V27c, R_V28c, R_V29c, R_V30c, /*R_V31c,*/
+ R_V0d, R_V1d, R_V2d, R_V3d, R_V4d, R_V5d, R_V6d, R_V7d,
+ R_V8d, R_V9d, R_V10d, R_V11d, R_V12d, R_V13d, R_V14d, R_V15d,
+ R_V16d, R_V17d, R_V18d, R_V19d, R_V20d, R_V21d, R_V22d, R_V23d,
+ R_V24d, R_V25d, R_V26d, R_V27d, R_V28d, R_V29d, R_V30d, /*R_V31d*/);
+
+reg_class Rmemcopy_reg %{
+ return _RMEMCOPY_REG_mask;
+%}
+
+%}
+
+source_hpp %{
+
+const MachRegisterNumbers R_mem_copy_lo_num = R_V31_num;
+const MachRegisterNumbers R_mem_copy_hi_num = R_V31b_num;
+const FloatRegister Rmemcopy = V31;
+
+const MachRegisterNumbers R_hf_ret_lo_num = R_V0_num;
+const MachRegisterNumbers R_hf_ret_hi_num = R_V0b_num;
+const FloatRegister Rhfret = V0;
+
+extern OptoReg::Name R_Ricklass_num;
+extern OptoReg::Name R_Rmethod_num;
+extern OptoReg::Name R_tls_num;
+extern OptoReg::Name R_Rheap_base_num;
+
+extern RegMask _INT_REG_mask;
+extern RegMask _PTR_REG_mask;
+extern RegMask _SFLT_REG_mask;
+extern RegMask _DFLT_REG_mask;
+extern RegMask _VECTORX_REG_mask;
+extern RegMask _RMEMCOPY_REG_mask;
+extern RegMask _SP_PTR_REG_mask;
+extern RegMask _SPILLP_REG_mask;
+extern RegMask _STR_REG_mask;
+extern RegMask _STR_PTR_REG_mask;
+
+#define LDR_DOUBLE "LDR_D"
+#define LDR_FLOAT "LDR_S"
+#define STR_DOUBLE "STR_D"
+#define STR_FLOAT "STR_S"
+#define STR_64 "STR"
+#define LDR_64 "LDR"
+#define STR_32 "STR_W"
+#define LDR_32 "LDR_W"
+#define MOV_DOUBLE "FMOV_D"
+#define MOV_FLOAT "FMOV_S"
+#define FMSR "FMOV_SW"
+#define FMRS "FMOV_WS"
+#define LDREX "ldxr "
+#define STREX "stxr "
+
+#define str_64 str
+#define ldr_64 ldr
+#define ldr_32 ldr_w
+#define ldrex ldxr
+#define strex stxr
+
+#define fmsr fmov_sw
+#define fmrs fmov_ws
+#define fconsts fmov_s
+#define fconstd fmov_d
+
+static inline bool is_uimm12(jlong imm, int shift) {
+ return Assembler::is_unsigned_imm_in_range(imm, 12, shift);
+}
+
+static inline bool is_memoryD(int offset) {
+ int scale = 3; // LogBytesPerDouble
+ return is_uimm12(offset, scale);
+}
+
+static inline bool is_memoryfp(int offset) {
+ int scale = LogBytesPerInt; // include 32-bit word accesses
+ return is_uimm12(offset, scale);
+}
+
+static inline bool is_memoryI(int offset) {
+ int scale = LogBytesPerInt;
+ return is_uimm12(offset, scale);
+}
+
+static inline bool is_memoryP(int offset) {
+ int scale = LogBytesPerWord;
+ return is_uimm12(offset, scale);
+}
+
+static inline bool is_memoryHD(int offset) {
+ int scale = LogBytesPerInt; // include 32-bit word accesses
+ return is_uimm12(offset, scale);
+}
+
+uintx limmL_low(uintx imm, int n);
+
+static inline bool Xis_aimm(int imm) {
+ return Assembler::ArithmeticImmediate(imm).is_encoded();
+}
+
+static inline bool is_aimm(intptr_t imm) {
+ return Assembler::ArithmeticImmediate(imm).is_encoded();
+}
+
+static inline bool is_limmL(uintptr_t imm) {
+ return Assembler::LogicalImmediate(imm).is_encoded();
+}
+
+static inline bool is_limmL_low(intptr_t imm, int n) {
+ return is_limmL(limmL_low(imm, n));
+}
+
+static inline bool is_limmI(jint imm) {
+ return Assembler::LogicalImmediate(imm, true).is_encoded();
+}
+
+static inline uintx limmI_low(jint imm, int n) {
+ return limmL_low(imm, n);
+}
+
+static inline bool is_limmI_low(jint imm, int n) {
+ return is_limmL_low(imm, n);
+}
+
+%}
+
+source %{
+
+// Given a register encoding, produce a Integer Register object
+static Register reg_to_register_object(int register_encoding) {
+ assert(R0->encoding() == R_R0_enc && R30->encoding() == R_R30_enc, "right coding");
+ assert(Rthread->encoding() == R_R28_enc, "right coding");
+ assert(SP->encoding() == R_SP_enc, "right coding");
+ return as_Register(register_encoding);
+}
+
+// Given a register encoding, produce a single-precision Float Register object
+static FloatRegister reg_to_FloatRegister_object(int register_encoding) {
+ assert(V0->encoding() == R_V0_enc && V31->encoding() == R_V31_enc, "right coding");
+ return as_FloatRegister(register_encoding);
+}
+
+RegMask _INT_REG_mask;
+RegMask _PTR_REG_mask;
+RegMask _SFLT_REG_mask;
+RegMask _DFLT_REG_mask;
+RegMask _VECTORX_REG_mask;
+RegMask _RMEMCOPY_REG_mask;
+RegMask _SP_PTR_REG_mask;
+RegMask _SPILLP_REG_mask;
+RegMask _STR_REG_mask;
+RegMask _STR_PTR_REG_mask;
+
+OptoReg::Name R_Ricklass_num = -1;
+OptoReg::Name R_Rmethod_num = -1;
+OptoReg::Name R_tls_num = -1;
+OptoReg::Name R_Rtemp_num = -1;
+OptoReg::Name R_Rheap_base_num = -1;
+
+static int mov_oop_size = -1;
+
+#ifdef ASSERT
+static bool same_mask(const RegMask &a, const RegMask &b) {
+ RegMask a_sub_b = a; a_sub_b.SUBTRACT(b);
+ RegMask b_sub_a = b; b_sub_a.SUBTRACT(a);
+ return a_sub_b.Size() == 0 && b_sub_a.Size() == 0;
+}
+#endif
+
+void Compile::pd_compiler2_init() {
+
+ R_Ricklass_num = OptoReg::as_OptoReg(Ricklass->as_VMReg());
+ R_Rmethod_num = OptoReg::as_OptoReg(Rmethod->as_VMReg());
+ R_tls_num = OptoReg::as_OptoReg(Rthread->as_VMReg());
+ R_Rtemp_num = OptoReg::as_OptoReg(Rtemp->as_VMReg());
+ R_Rheap_base_num = OptoReg::as_OptoReg(Rheap_base->as_VMReg());
+
+ _INT_REG_mask = _INT_REG_ALL_mask;
+ _INT_REG_mask.Remove(R_tls_num);
+ _INT_REG_mask.Remove(R_SP_num);
+ if (UseCompressedOops) {
+ _INT_REG_mask.Remove(R_Rheap_base_num);
+ }
+ // Remove Rtemp because safepoint poll can trash it
+ // (see SharedRuntime::generate_handler_blob)
+ _INT_REG_mask.Remove(R_Rtemp_num);
+
+ _PTR_REG_mask = _INT_REG_mask;
+ _PTR_REG_mask.smear_to_sets(2);
+
+ // STR_REG = INT_REG+ZR
+ // SPILLP_REG = INT_REG+SP
+ // SP_PTR_REG = INT_REG+SP+TLS
+ _STR_REG_mask = _INT_REG_mask;
+ _SP_PTR_REG_mask = _STR_REG_mask;
+ _STR_REG_mask.Insert(R_ZR_num);
+ _SP_PTR_REG_mask.Insert(R_SP_num);
+ _SPILLP_REG_mask = _SP_PTR_REG_mask;
+ _SP_PTR_REG_mask.Insert(R_tls_num);
+ _STR_PTR_REG_mask = _STR_REG_mask;
+ _STR_PTR_REG_mask.smear_to_sets(2);
+ _SP_PTR_REG_mask.smear_to_sets(2);
+ _SPILLP_REG_mask.smear_to_sets(2);
+
+ _RMEMCOPY_REG_mask = RegMask(R_mem_copy_lo_num);
+assert(OptoReg::as_OptoReg(Rmemcopy->as_VMReg()) == R_mem_copy_lo_num, "!");
+
+ _SFLT_REG_mask = _SFLT_REG_0_mask;
+ _SFLT_REG_mask.SUBTRACT(_RMEMCOPY_REG_mask);
+ _DFLT_REG_mask = _SFLT_REG_mask;
+ _DFLT_REG_mask.smear_to_sets(2);
+ _VECTORX_REG_mask = _SFLT_REG_mask;
+ _VECTORX_REG_mask.smear_to_sets(4);
+ assert(same_mask(_VECTORX_REG_mask, _VECTORX_REG_0_mask), "!");
+
+#ifdef ASSERT
+ RegMask r((RegMask *)&SFLT_REG_mask());
+ r.smear_to_sets(2);
+ assert(same_mask(r, _DFLT_REG_mask), "!");
+#endif
+
+ if (VM_Version::prefer_moves_over_load_literal()) {
+ mov_oop_size = 4;
+ } else {
+ mov_oop_size = 1;
+ }
+
+ assert(Matcher::interpreter_method_oop_reg_encode() == Rmethod->encoding(), "should be");
+}
+
+uintx limmL_low(uintx imm, int n) {
+ // 1: try as is
+ if (is_limmL(imm)) {
+ return imm;
+ }
+ // 2: try low bits + all 0's
+ uintx imm0 = imm & right_n_bits(n);
+ if (is_limmL(imm0)) {
+ return imm0;
+ }
+ // 3: try low bits + all 1's
+ uintx imm1 = imm0 | left_n_bits(BitsPerWord - n);
+ if (is_limmL(imm1)) {
+ return imm1;
+ }
+#if 0
+ // 4: try low bits replicated
+ int field = 1 << log2_intptr(n + n - 1);
+ assert(field >= n, "!");
+ assert(field / n == 1, "!");
+ intptr_t immr = immx;
+ while (field < BitsPerWord) {
+ intrptr_t bits = immr & right_n_bits(field);
+ immr = bits | (bits << field);
+ field = field << 1;
+ }
+ // replicate at power-of-2 boundary
+ if (is_limmL(immr)) {
+ return immr;
+ }
+#endif
+ return imm;
+}
+
+// Convert the raw encoding form into the form expected by the
+// constructor for Address.
+Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
+ RelocationHolder rspec;
+ if (disp_reloc != relocInfo::none) {
+ rspec = Relocation::spec_simple(disp_reloc);
+ }
+
+ Register rbase = (base == 0xff) ? SP : as_Register(base);
+ if (index != 0xff) {
+ Register rindex = as_Register(index);
+ if (disp == 0x7fffffff) { // special value to indicate sign-extend
+ Address madr(rbase, rindex, ex_sxtw, scale);
+ madr._rspec = rspec;
+ return madr;
+ } else {
+ assert(disp == 0, "unsupported");
+ Address madr(rbase, rindex, ex_lsl, scale);
+ madr._rspec = rspec;
+ return madr;
+ }
+ } else {
+ assert(scale == 0, "not supported");
+ Address madr(rbase, disp);
+ madr._rspec = rspec;
+ return madr;
+ }
+}
+
+// Location of compiled Java return values. Same as C
+OptoRegPair c2::return_value(int ideal_reg) {
+ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num };
+ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, R_R0x_num, OptoReg::Bad, R_hf_ret_hi_num, R_R0x_num };
+ return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
+}
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+// from the start of the call to the point where the return address
+// will point.
+
+int MachCallStaticJavaNode::ret_addr_offset() {
+ bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable();
+ bool patchable = _method != NULL;
+ int call_size = MacroAssembler::call_size(entry_point(), far, patchable);
+ return (call_size + (_method_handle_invoke ? 1 : 0)) * NativeInstruction::instruction_size;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+ bool far = !cache_reachable();
+ int call_size = MacroAssembler::call_size(entry_point(), far, true);
+ return (mov_oop_size + call_size) * NativeInstruction::instruction_size;
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+ int call_size = 0;
+ // TODO: check if Leaf nodes also need this
+ if (!is_MachCallLeaf()) {
+ // adr $temp, ret_addr
+ // str $temp, [SP + last_java_pc]
+ call_size += 2;
+ }
+ // bl or mov_slow; blr
+ bool far = maybe_far_call(this);
+ call_size += MacroAssembler::call_size(entry_point(), far, false);
+ return call_size * NativeInstruction::instruction_size;
+}
+
+%}
+
+// The intptr_t operand types, defined by textual substitution.
+// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.)
+#define immX immL
+#define iRegX iRegL
+#define aimmX aimmL
+#define limmX limmL
+#define immX9 immL9
+#define LShiftX LShiftL
+#define shimmX immU6
+
+#define store_RegLd store_RegL
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1); // Required cost attribute
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+
+// Integer Immediate: 9-bit (including sign bit), so same as immI8?
+// FIXME: simm9 allows -256, but immI8 doesn't...
+operand simm9() %{
+ predicate(Assembler::is_imm_in_range(n->get_int(), 9, 0));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+
+operand uimm12() %{
+ predicate(Assembler::is_unsigned_imm_in_range(n->get_int(), 12, 0));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand aimmP() %{
+ predicate(n->get_ptr() == 0 || (is_aimm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
+ match(ConP);
+
+ op_cost(0);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 12-bit - for addressing mode
+operand immL12() %{
+ predicate((-4096 < n->get_long()) && (n->get_long() < 4096));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: 9-bit - for addressing mode
+operand immL9() %{
+ predicate((-256 <= n->get_long()) && (n->get_long() < 256));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIMov() %{
+ predicate(n->get_int() >> 16 == 0);
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLMov() %{
+ predicate(n->get_long() >> 16 == 0);
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immUL12() %{
+ predicate(is_uimm12(n->get_long(), 0));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immUL12x2() %{
+ predicate(is_uimm12(n->get_long(), 1));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immUL12x4() %{
+ predicate(is_uimm12(n->get_long(), 2));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immUL12x8() %{
+ predicate(is_uimm12(n->get_long(), 3));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immUL12x16() %{
+ predicate(is_uimm12(n->get_long(), 4));
+ match(ConL);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Used for long shift
+operand immU6() %{
+ predicate(0 <= n->get_int() && (n->get_int() <= 63));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Used for register extended shift
+operand immI_0_4() %{
+ predicate(0 <= n->get_int() && (n->get_int() <= 4));
+ match(ConI);
+ op_cost(0);
+
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Compressed Pointer Register
+operand iRegN() %{
+ constraint(ALLOC_IN_RC(int_reg));
+ match(RegN);
+ match(ZRRegN);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand SPRegP() %{
+ constraint(ALLOC_IN_RC(SP_regP));
+ match(RegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand ZRRegP() %{
+ constraint(ALLOC_IN_RC(ZR_regP));
+ match(RegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand ZRRegL() %{
+ constraint(ALLOC_IN_RC(ZR_regP));
+ match(RegL);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand ZRRegI() %{
+ constraint(ALLOC_IN_RC(ZR_regI));
+ match(RegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand ZRRegN() %{
+ constraint(ALLOC_IN_RC(ZR_regI));
+ match(RegN);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "ci/ciEnv.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jvm_misc.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/hashtable.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+int AbstractAssembler::code_fill_byte() {
+ return 0xff; // illegal instruction 0xffffffff
+}
+
+#ifdef ASSERT
+bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_ASSEMBLER_ARM_HPP
+#define CPU_ARM_VM_ASSEMBLER_ARM_HPP
+
+#include "utilities/macros.hpp"
+
+enum AsmCondition {
+ eq, ne, cs, cc, mi, pl, vs, vc,
+ hi, ls, ge, lt, gt, le, al, nv,
+ number_of_conditions,
+ // alternative names
+ hs = cs,
+ lo = cc
+};
+
+enum AsmShift {
+ lsl, lsr, asr, ror
+};
+
+#ifdef AARCH64
+enum AsmExtendOp {
+ ex_uxtb, ex_uxth, ex_uxtw, ex_uxtx,
+ ex_sxtb, ex_sxth, ex_sxtw, ex_sxtx,
+
+ ex_lsl = ex_uxtx
+};
+#endif
+
+enum AsmOffset {
+#ifdef AARCH64
+ basic_offset = 0b00,
+ pre_indexed = 0b11,
+ post_indexed = 0b01
+#else
+ basic_offset = 1 << 24,
+ pre_indexed = 1 << 24 | 1 << 21,
+ post_indexed = 0
+#endif
+};
+
+
+#ifndef AARCH64
+enum AsmWriteback {
+ no_writeback,
+ writeback
+};
+
+enum AsmOffsetOp {
+ sub_offset = 0,
+ add_offset = 1
+};
+#endif
+
+
+// ARM Addressing Modes 2 and 3 - Load and store
+class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+ Register _base;
+ Register _index;
+ int _disp;
+ AsmOffset _mode;
+ RelocationHolder _rspec;
+ int _shift_imm;
+#ifdef AARCH64
+ AsmExtendOp _extend;
+#else
+ AsmShift _shift;
+ AsmOffsetOp _offset_op;
+
+ static inline int abs(int x) { return x < 0 ? -x : x; }
+ static inline int up (int x) { return x < 0 ? 0 : 1; }
+#endif
+
+#ifdef AARCH64
+ static const AsmExtendOp LSL = ex_lsl;
+#else
+ static const AsmShift LSL = lsl;
+#endif
+
+ public:
+ Address() : _base(noreg) {}
+
+ Address(Register rn, int offset = 0, AsmOffset mode = basic_offset) {
+ _base = rn;
+ _index = noreg;
+ _disp = offset;
+ _mode = mode;
+ _shift_imm = 0;
+#ifdef AARCH64
+ _extend = ex_lsl;
+#else
+ _shift = lsl;
+ _offset_op = add_offset;
+#endif
+ }
+
+#ifdef ASSERT
+ Address(Register rn, ByteSize offset, AsmOffset mode = basic_offset) {
+ _base = rn;
+ _index = noreg;
+ _disp = in_bytes(offset);
+ _mode = mode;
+ _shift_imm = 0;
+#ifdef AARCH64
+ _extend = ex_lsl;
+#else
+ _shift = lsl;
+ _offset_op = add_offset;
+#endif
+ }
+#endif
+
+#ifdef AARCH64
+ Address(Register rn, Register rm, AsmExtendOp extend = ex_lsl, int shift_imm = 0) {
+ assert ((extend == ex_uxtw) || (extend == ex_lsl) || (extend == ex_sxtw) || (extend == ex_sxtx), "invalid extend for address mode");
+ assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range");
+ _base = rn;
+ _index = rm;
+ _disp = 0;
+ _mode = basic_offset;
+ _extend = extend;
+ _shift_imm = shift_imm;
+ }
+#else
+ Address(Register rn, Register rm, AsmShift shift = lsl,
+ int shift_imm = 0, AsmOffset mode = basic_offset,
+ AsmOffsetOp offset_op = add_offset) {
+ _base = rn;
+ _index = rm;
+ _disp = 0;
+ _shift = shift;
+ _shift_imm = shift_imm;
+ _mode = mode;
+ _offset_op = offset_op;
+ }
+
+ Address(Register rn, RegisterOrConstant offset, AsmShift shift = lsl,
+ int shift_imm = 0) {
+ _base = rn;
+ if (offset.is_constant()) {
+ _index = noreg;
+ {
+ int off = (int) offset.as_constant();
+ if (shift_imm != 0) {
+ assert(shift == lsl,"shift not yet encoded");
+ off = off << shift_imm;
+ }
+ _disp = off;
+ }
+ _shift = lsl;
+ _shift_imm = 0;
+ } else {
+ _index = offset.as_register();
+ _disp = 0;
+ _shift = shift;
+ _shift_imm = shift_imm;
+ }
+ _mode = basic_offset;
+ _offset_op = add_offset;
+ }
+#endif // AARCH64
+
+ // [base + index * wordSize]
+ static Address indexed_ptr(Register base, Register index) {
+ return Address(base, index, LSL, LogBytesPerWord);
+ }
+
+ // [base + index * BytesPerInt]
+ static Address indexed_32(Register base, Register index) {
+ return Address(base, index, LSL, LogBytesPerInt);
+ }
+
+ // [base + index * BytesPerHeapOop]
+ static Address indexed_oop(Register base, Register index) {
+ return Address(base, index, LSL, LogBytesPerHeapOop);
+ }
+
+ Address plus_disp(int disp) const {
+ assert((disp == 0) || (_index == noreg),"can't apply an offset to a register indexed address");
+ Address a = (*this);
+ a._disp += disp;
+ return a;
+ }
+
+ Address rebase(Register new_base) const {
+ Address a = (*this);
+ a._base = new_base;
+ return a;
+ }
+
+#ifdef AARCH64
+ int encoding_simd() const {
+ assert(_index != SP, "encoding constraint");
+ assert(_disp == 0 || _mode == post_indexed, "encoding constraint");
+ assert(_index == noreg || _mode == basic_offset, "encoding constraint");
+ assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint");
+ assert(_extend == ex_lsl, "encoding constraint");
+ int index;
+ if (_index == noreg) {
+ if (_mode == post_indexed)
+ index = 0b100 << 5 | 31;
+ else
+ index = 0;
+ } else {
+ index = 0b100 << 5 | _index->encoding();
+ }
+ return index << 16 | _base->encoding_with_sp() << 5;
+ }
+#else /* !AARCH64 */
+ int encoding2() const {
+ assert(_mode == basic_offset || _base != PC, "unpredictable instruction");
+ if (_index == noreg) {
+ assert(-4096 < _disp && _disp < 4096, "encoding constraint");
+ return _mode | up(_disp) << 23 | _base->encoding() << 16 | abs(_disp);
+ } else {
+ assert(_index != PC && (_mode == basic_offset || _index != _base), "unpredictable instruction");
+ assert(_disp == 0 && (_shift_imm >> 5) == 0, "encoding constraint");
+ return 1 << 25 | _offset_op << 23 | _mode | _base->encoding() << 16 |
+ _shift_imm << 7 | _shift << 5 | _index->encoding();
+ }
+ }
+
+ int encoding3() const {
+ assert(_mode == basic_offset || _base != PC, "unpredictable instruction");
+ if (_index == noreg) {
+ assert(-256 < _disp && _disp < 256, "encoding constraint");
+ return _mode | up(_disp) << 23 | 1 << 22 | _base->encoding() << 16 |
+ (abs(_disp) & 0xf0) << 4 | abs(_disp) & 0x0f;
+ } else {
+ assert(_index != PC && (_mode == basic_offset || _index != _base), "unpredictable instruction");
+ assert(_disp == 0 && _shift == lsl && _shift_imm == 0, "encoding constraint");
+ return _mode | _offset_op << 23 | _base->encoding() << 16 | _index->encoding();
+ }
+ }
+
+ int encoding_ex() const {
+ assert(_index == noreg && _disp == 0 && _mode == basic_offset &&
+ _base != PC, "encoding constraint");
+ return _base->encoding() << 16;
+ }
+
+ int encoding_vfp() const {
+ assert(_index == noreg && _mode == basic_offset, "encoding constraint");
+ assert(-1024 < _disp && _disp < 1024 && (_disp & 3) == 0, "encoding constraint");
+ return _base->encoding() << 16 | up(_disp) << 23 | abs(_disp) >> 2;
+ }
+
+ int encoding_simd() const {
+ assert(_base != PC, "encoding constraint");
+ assert(_index != PC && _index != SP, "encoding constraint");
+ assert(_disp == 0, "encoding constraint");
+ assert(_shift == 0, "encoding constraint");
+ assert(_index == noreg || _mode == basic_offset, "encoding constraint");
+ assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint");
+ int index;
+ if (_index == noreg) {
+ if (_mode == post_indexed)
+ index = 13;
+ else
+ index = 15;
+ } else {
+ index = _index->encoding();
+ }
+
+ return _base->encoding() << 16 | index;
+ }
+#endif // !AARCH64
+
+ Register base() const {
+ return _base;
+ }
+
+ Register index() const {
+ return _index;
+ }
+
+ int disp() const {
+ return _disp;
+ }
+
+ AsmOffset mode() const {
+ return _mode;
+ }
+
+ int shift_imm() const {
+ return _shift_imm;
+ }
+
+#ifdef AARCH64
+ AsmExtendOp extend() const {
+ return _extend;
+ }
+#else
+ AsmShift shift() const {
+ return _shift;
+ }
+
+ AsmOffsetOp offset_op() const {
+ return _offset_op;
+ }
+#endif
+
+ bool uses(Register reg) const { return _base == reg || _index == reg; }
+
+ const relocInfo::relocType rtype() { return _rspec.type(); }
+ const RelocationHolder& rspec() { return _rspec; }
+
+ // Convert the raw encoding form into the form expected by the
+ // constructor for Address.
+ static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
+};
+
+#ifdef COMPILER2
+class VFP VALUE_OBJ_CLASS_SPEC {
+ // Helper classes to detect whether a floating point constant can be
+ // encoded in a fconstd or fconsts instruction
+ // The conversion from the imm8, 8 bit constant, to the floating
+ // point value encoding is done with either:
+ // for single precision: imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5):imm8<5:0>:Zeros(19)
+ // or
+ // for double precision: imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8):imm8<5:0>:Zeros(48)
+
+ private:
+ class fpnum {
+ public:
+ virtual unsigned int f_hi4() const = 0;
+ virtual bool f_lo_is_null() const = 0;
+ virtual int e() const = 0;
+ virtual unsigned int s() const = 0;
+
+ inline bool can_be_imm8() const { return e() >= -3 && e() <= 4 && f_lo_is_null(); }
+ inline unsigned char imm8() const { int v = (s() << 7) | (((e() - 1) & 0x7) << 4) | f_hi4(); assert((v >> 8) == 0, "overflow"); return v; }
+ };
+
+ public:
+ class float_num : public fpnum {
+ public:
+ float_num(float v) {
+ _num.val = v;
+ }
+
+ virtual unsigned int f_hi4() const { return (_num.bits << 9) >> (19+9); }
+ virtual bool f_lo_is_null() const { return (_num.bits & ((1 << 19) - 1)) == 0; }
+ virtual int e() const { return ((_num.bits << 1) >> (23+1)) - 127; }
+ virtual unsigned int s() const { return _num.bits >> 31; }
+
+ private:
+ union {
+ float val;
+ unsigned int bits;
+ } _num;
+ };
+
+ class double_num : public fpnum {
+ public:
+ double_num(double v) {
+ _num.val = v;
+ }
+
+ virtual unsigned int f_hi4() const { return (_num.bits << 12) >> (48+12); }
+ virtual bool f_lo_is_null() const { return (_num.bits & ((1LL << 48) - 1)) == 0; }
+ virtual int e() const { return ((_num.bits << 1) >> (52+1)) - 1023; }
+ virtual unsigned int s() const { return _num.bits >> 63; }
+
+ private:
+ union {
+ double val;
+ unsigned long long bits;
+ } _num;
+ };
+};
+#endif
+
+#ifdef AARCH64
+#include "assembler_arm_64.hpp"
+#else
+#include "assembler_arm_32.hpp"
+#endif
+
+
+#endif // CPU_ARM_VM_ASSEMBLER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP
+#define CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP
+
+
+#endif // CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm_32.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "ci/ciEnv.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jvm_misc.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/hashtable.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef COMPILER2
+// Convert the raw encoding form into the form expected by the
+// constructor for Address.
+Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
+ RelocationHolder rspec;
+ if (disp_reloc != relocInfo::none) {
+ rspec = Relocation::spec_simple(disp_reloc);
+ }
+
+ Register rindex = as_Register(index);
+ if (rindex != PC) {
+ assert(disp == 0, "unsupported");
+ Address madr(as_Register(base), rindex, lsl, scale);
+ madr._rspec = rspec;
+ return madr;
+ } else {
+ assert(scale == 0, "not supported");
+ Address madr(as_Register(base), disp);
+ madr._rspec = rspec;
+ return madr;
+ }
+}
+#endif
+
+void AsmOperand::initialize_rotated_imm(unsigned int imm) {
+ for (int shift = 2; shift <= 24; shift += 2) {
+ if ((imm & ~(0xff << shift)) == 0) {
+ _encoding = 1 << 25 | (32 - shift) << 7 | imm >> shift;
+ return;
+ }
+ }
+ assert((imm & 0x0ffffff0) == 0, "too complicated constant: %d (%x)", imm, imm);
+ _encoding = 1 << 25 | 4 << 7 | imm >> 28 | imm << 4;
+}
+
+bool AsmOperand::is_rotated_imm(unsigned int imm) {
+ if ((imm >> 8) == 0) {
+ return true;
+ }
+ for (int shift = 2; shift <= 24; shift += 2) {
+ if ((imm & ~(0xff << shift)) == 0) {
+ return true;
+ }
+ }
+ if ((imm & 0x0ffffff0) == 0) {
+ return true;
+ }
+ return false;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm_32.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1245 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_ASSEMBLER_ARM_32_HPP
+#define CPU_ARM_VM_ASSEMBLER_ARM_32_HPP
+
+// ARM Addressing Mode 1 - Data processing operands
+class AsmOperand VALUE_OBJ_CLASS_SPEC {
+ private:
+ int _encoding;
+
+ void initialize_rotated_imm(unsigned int imm);
+
+ void encode(int imm_8) {
+ if ((imm_8 >> 8) == 0) {
+ _encoding = 1 << 25 | imm_8; // the most common case
+ } else {
+ initialize_rotated_imm((unsigned int)imm_8); // slow case
+ }
+ }
+
+ void encode(Register rm, AsmShift shift, int shift_imm) {
+ assert((shift_imm >> 5) == 0, "encoding constraint");
+ _encoding = shift_imm << 7 | shift << 5 | rm->encoding();
+ }
+
+ public:
+
+ AsmOperand(Register reg) {
+ _encoding = reg->encoding();
+ }
+
+ AsmOperand(int imm_8) {
+ encode(imm_8);
+ }
+
+#ifdef ASSERT
+ AsmOperand(ByteSize bytesize_8) {
+ const int imm_8 = in_bytes(bytesize_8);
+ encode(imm_8);
+ }
+#endif // ASSERT
+
+ AsmOperand(Register rm, AsmShift shift, int shift_imm) {
+ encode(rm,shift,shift_imm);
+ }
+
+ AsmOperand(Register rm, AsmShift shift, Register rs) {
+ assert(rm != PC && rs != PC, "unpredictable instruction");
+ _encoding = rs->encoding() << 8 | shift << 5 | 1 << 4 | rm->encoding();
+ }
+
+ AsmOperand(RegisterOrConstant offset, AsmShift shift = lsl, int shift_imm = 0) {
+ if (offset.is_register()) {
+ encode(offset.as_register(), shift, shift_imm);
+ } else {
+ assert(shift == lsl,"shift type not yet encoded");
+ int imm_8 = ((int)offset.as_constant()) << shift_imm;
+ encode(imm_8);
+ }
+ }
+
+ int encoding() const {
+ return _encoding;
+ }
+
+ bool is_immediate() const {
+ return _encoding & (1 << 25) ? true : false;
+ }
+
+ Register base_register() const {
+ assert(!is_immediate(), "is_immediate, no base reg");
+ return as_Register(_encoding & 15);
+ }
+
+ static bool is_rotated_imm(unsigned int imm);
+};
+
+
+// ARM Addressing Mode 4 - Load and store multiple
+class RegisterSet VALUE_OBJ_CLASS_SPEC {
+ private:
+ int _encoding;
+
+ RegisterSet(int encoding) {
+ _encoding = encoding;
+ }
+
+ public:
+
+ RegisterSet(Register reg) {
+ _encoding = 1 << reg->encoding();
+ }
+
+ RegisterSet() {
+ _encoding = 0;
+ }
+
+ RegisterSet(Register first, Register last) {
+ assert(first < last, "encoding constraint");
+ _encoding = (1 << (last->encoding() + 1)) - (1 << first->encoding());
+ }
+
+ friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) {
+ assert((set1._encoding & set2._encoding) == 0,
+ "encoding constraint");
+ return RegisterSet(set1._encoding | set2._encoding);
+ }
+
+ int encoding() const {
+ return _encoding;
+ }
+
+ bool contains(Register reg) const {
+ return (_encoding & (1 << reg->encoding())) != 0;
+ }
+
+ // number of registers in the set
+ int size() const {
+ int count = 0;
+ unsigned int remaining = (unsigned int) _encoding;
+ while (remaining != 0) {
+ if ((remaining & 1) != 0) count++;
+ remaining >>= 1;
+ }
+ return count;
+ }
+};
+
+#if R9_IS_SCRATCHED
+#define R9ifScratched RegisterSet(R9)
+#else
+#define R9ifScratched RegisterSet()
+#endif
+
+// ARM Addressing Mode 5 - Load and store multiple VFP registers
+class FloatRegisterSet VALUE_OBJ_CLASS_SPEC {
+ private:
+ int _encoding;
+
+ public:
+
+ FloatRegisterSet(FloatRegister reg) {
+ if (reg->hi_bit() == 0) {
+ _encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1;
+ } else {
+ assert (reg->lo_bit() == 0, "impossible encoding");
+ _encoding = reg->hi_bits() << 12 | reg->hi_bit() << 22 | 1;
+ }
+ }
+
+ FloatRegisterSet(FloatRegister first, int count) {
+ assert(count >= 1, "encoding constraint");
+ if (first->hi_bit() == 0) {
+ _encoding = first->hi_bits() << 12 | first->lo_bit() << 22 | count;
+ } else {
+ assert (first->lo_bit() == 0, "impossible encoding");
+ _encoding = first->hi_bits() << 12 | first->hi_bit() << 22 | count;
+ }
+ }
+
+ int encoding_s() const {
+ return _encoding;
+ }
+
+ int encoding_d() const {
+ assert((_encoding & 0xFF) <= 16, "no more than 16 double registers" );
+ return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1);
+ }
+
+};
+
+
+class Assembler : public AbstractAssembler {
+
+ public:
+
+ static const int LogInstructionSize = 2;
+ static const int InstructionSize = 1 << LogInstructionSize;
+
+ static inline AsmCondition inverse(AsmCondition cond) {
+ assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed");
+ return (AsmCondition)((int)cond ^ 1);
+ }
+
+ // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions.
+ static inline bool is_arith_imm_in_range(intx value) {
+ return AsmOperand::is_rotated_imm(value);
+ }
+
+ // Arithmetic instructions
+
+#define F(mnemonic, opcode) \
+ void mnemonic(Register rd, Register rn, AsmOperand operand, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | rn->encoding() << 16 | \
+ rd->encoding() << 12 | operand.encoding()); \
+ } \
+ void mnemonic##s(Register rd, Register rn, AsmOperand operand, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rn->encoding() << 16 | \
+ rd->encoding() << 12 | operand.encoding()); \
+ }
+
+ F(andr, 0)
+ F(eor, 1)
+ F(sub, 2)
+ F(rsb, 3)
+ F(add, 4)
+ F(adc, 5)
+ F(sbc, 6)
+ F(rsc, 7)
+ F(orr, 12)
+ F(bic, 14)
+#undef F
+
+#define F(mnemonic, opcode) \
+ void mnemonic(Register rn, AsmOperand operand, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rn->encoding() << 16 | \
+ operand.encoding()); \
+ }
+
+ F(tst, 8)
+ F(teq, 9)
+ F(cmp, 10)
+ F(cmn, 11)
+#undef F
+
+#define F(mnemonic, opcode) \
+ void mnemonic(Register rd, AsmOperand operand, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | rd->encoding() << 12 | \
+ operand.encoding()); \
+ } \
+ void mnemonic##s(Register rd, AsmOperand operand, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rd->encoding() << 12 | \
+ operand.encoding()); \
+ }
+
+ F(mov, 13)
+ F(mvn, 15)
+#undef F
+
+ void msr(uint fields, AsmOperand operand, AsmCondition cond = al) {
+ assert((operand.encoding() & (1<<25)) || ((operand.encoding() & 0xff0) == 0), "invalid addressing mode");
+ emit_int32(cond << 28 | 1 << 24 | 1 << 21 | fields << 16 | 0xf << 12 | operand.encoding());
+ }
+
+ void mrs(uint fields, Register Rd, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 1 << 24 | (fields|0xf) << 16 | (Rd->encoding() << 12));
+ }
+
+
+ enum {
+ CPSR = 0x00, CPSR_c = 0x01, CPSR_x = 0x02, CPSR_xc = 0x03,
+ CPSR_s = 0x004, CPSR_sc = 0x05, CPSR_sx = 0x06, CPSR_sxc = 0x07,
+ CPSR_f = 0x08, CPSR_fc = 0x09, CPSR_fx = 0x0a, CPSR_fxc = 0x0b,
+ CPSR_fs = 0x0c, CPSR_fsc = 0x0d, CPSR_fsx = 0x0e, CPSR_fsxc = 0x0f,
+ SPSR = 0x40, SPSR_c = 0x41, SPSR_x = 0x42, SPSR_xc = 0x43,
+ SPSR_s = 0x44, SPSR_sc = 0x45, SPSR_sx = 0x46, SPSR_sxc = 0x47,
+ SPSR_f = 0x48, SPSR_fc = 0x49, SPSR_fx = 0x4a, SPSR_fxc = 0x4b,
+ SPSR_fs = 0x4c, SPSR_fsc = 0x4d, SPSR_fsx = 0x4e, SPSR_fsxc = 0x4f
+ };
+
+#define F(mnemonic, opcode) \
+ void mnemonic(Register rdlo, Register rdhi, Register rm, Register rs, \
+ AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | rdhi->encoding() << 16 | \
+ rdlo->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); \
+ } \
+ void mnemonic##s(Register rdlo, Register rdhi, Register rm, Register rs, \
+ AsmCondition cond = al) { \
+ emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rdhi->encoding() << 16 | \
+ rdlo->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); \
+ }
+
+ F(umull, 4)
+ F(umlal, 5)
+ F(smull, 6)
+ F(smlal, 7)
+#undef F
+
+ void mul(Register rd, Register rm, Register rs, AsmCondition cond = al) {
+ emit_int32(cond << 28 | rd->encoding() << 16 |
+ rs->encoding() << 8 | 0x9 << 4 | rm->encoding());
+ }
+
+ void muls(Register rd, Register rm, Register rs, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 1 << 20 | rd->encoding() << 16 |
+ rs->encoding() << 8 | 0x9 << 4 | rm->encoding());
+ }
+
+ void mla(Register rd, Register rm, Register rs, Register rn, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 1 << 21 | rd->encoding() << 16 |
+ rn->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding());
+ }
+
+ void mlas(Register rd, Register rm, Register rs, Register rn, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 1 << 21 | 1 << 20 | rd->encoding() << 16 |
+ rn->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding());
+ }
+
+ // Loads and stores
+
+#define F(mnemonic, l, b) \
+ void mnemonic(Register rd, Address addr, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | 1 << 26 | b << 22 | l << 20 | \
+ rd->encoding() << 12 | addr.encoding2()); \
+ }
+
+ F(ldr, 1, 0)
+ F(ldrb, 1, 1)
+ F(str, 0, 0)
+ F(strb, 0, 1)
+#undef F
+
+#undef F
+
+#define F(mnemonic, l, sh, even) \
+ void mnemonic(Register rd, Address addr, AsmCondition cond = al) { \
+ assert(!even || (rd->encoding() & 1) == 0, "must be even"); \
+ emit_int32(cond << 28 | l << 20 | rd->encoding() << 12 | \
+ 1 << 7 | sh << 5 | 1 << 4 | addr.encoding3()); \
+ }
+
+ F(strh, 0, 1, false)
+ F(ldrh, 1, 1, false)
+ F(ldrsb, 1, 2, false)
+ F(ldrsh, 1, 3, false)
+ F(strd, 0, 3, true)
+
+#undef F
+
+ void ldrd(Register rd, Address addr, AsmCondition cond = al) {
+ assert((rd->encoding() & 1) == 0, "must be even");
+ assert(!addr.index()->is_valid() ||
+ (addr.index()->encoding() != rd->encoding() &&
+ addr.index()->encoding() != (rd->encoding()+1)), "encoding constraint");
+ emit_int32(cond << 28 | rd->encoding() << 12 | 0xD /* 0b1101 */ << 4 | addr.encoding3());
+ }
+
+#define F(mnemonic, l, pu) \
+ void mnemonic(Register rn, RegisterSet reg_set, \
+ AsmWriteback w = no_writeback, AsmCondition cond = al) { \
+ assert(reg_set.encoding() != 0 && (w == no_writeback || \
+ (reg_set.encoding() & (1 << rn->encoding())) == 0), \
+ "unpredictable instruction"); \
+ emit_int32(cond << 28 | 4 << 25 | pu << 23 | w << 21 | l << 20 | \
+ rn->encoding() << 16 | reg_set.encoding()); \
+ }
+
+ F(ldmda, 1, 0) F(ldmfa, 1, 0)
+ F(ldmia, 1, 1) F(ldmfd, 1, 1)
+ F(ldmdb, 1, 2) F(ldmea, 1, 2)
+ F(ldmib, 1, 3) F(ldmed, 1, 3)
+ F(stmda, 0, 0) F(stmed, 0, 0)
+ F(stmia, 0, 1) F(stmea, 0, 1)
+ F(stmdb, 0, 2) F(stmfd, 0, 2)
+ F(stmib, 0, 3) F(stmfa, 0, 3)
+#undef F
+
+ void ldrex(Register rd, Address addr, AsmCondition cond = al) {
+ assert(rd != PC, "unpredictable instruction");
+ emit_int32(cond << 28 | 0x19 << 20 | addr.encoding_ex() |
+ rd->encoding() << 12 | 0xf9f);
+ }
+
+ void strex(Register rs, Register rd, Address addr, AsmCondition cond = al) {
+ assert(rd != PC && rs != PC &&
+ rs != rd && rs != addr.base(), "unpredictable instruction");
+ emit_int32(cond << 28 | 0x18 << 20 | addr.encoding_ex() |
+ rs->encoding() << 12 | 0xf90 | rd->encoding());
+ }
+
+ void ldrexd(Register rd, Address addr, AsmCondition cond = al) {
+ assert(rd != PC, "unpredictable instruction");
+ emit_int32(cond << 28 | 0x1B << 20 | addr.encoding_ex() |
+ rd->encoding() << 12 | 0xf9f);
+ }
+
+ void strexd(Register rs, Register rd, Address addr, AsmCondition cond = al) {
+ assert(rd != PC && rs != PC &&
+ rs != rd && rs != addr.base(), "unpredictable instruction");
+ emit_int32(cond << 28 | 0x1A << 20 | addr.encoding_ex() |
+ rs->encoding() << 12 | 0xf90 | rd->encoding());
+ }
+
+ void clrex() {
+ emit_int32(0xF << 28 | 0x57 << 20 | 0xFF << 12 | 0x01f);
+ }
+
+ // Miscellaneous instructions
+
+ void clz(Register rd, Register rm, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 0x016f0f10 | rd->encoding() << 12 | rm->encoding());
+ }
+
+ void rev(Register rd, Register rm, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 0x06bf0f30 | rd->encoding() << 12 | rm->encoding());
+ }
+
+ void rev16(Register rd, Register rm, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 0x6bf0fb0 | rd->encoding() << 12 | rm->encoding());
+ }
+
+ void revsh(Register rd, Register rm, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 0x6ff0fb0 | rd->encoding() << 12 | rm->encoding());
+ }
+
+ void rbit(Register rd, Register rm, AsmCondition cond = al) {
+ emit_int32(cond << 28 | 0x6ff0f30 | rd->encoding() << 12 | rm->encoding());
+ }
+
+ void pld(Address addr) {
+ emit_int32(0xf550f000 | addr.encoding2());
+ }
+
+ void pldw(Address addr) {
+ assert(VM_Version::arm_arch() >= 7 && os::is_MP(), "no pldw on this processor");
+ emit_int32(0xf510f000 | addr.encoding2());
+ }
+
+ void svc(int imm_24, AsmCondition cond = al) {
+ assert((imm_24 >> 24) == 0, "encoding constraint");
+ emit_int32(cond << 28 | 0xf << 24 | imm_24);
+ }
+
+ void ubfx(Register rd, Register rn, unsigned int lsb, unsigned int width, AsmCondition cond = al) {
+ assert(VM_Version::arm_arch() >= 7, "no ubfx on this processor");
+ assert(width > 0, "must be");
+ assert(lsb < 32, "must be");
+ emit_int32(cond << 28 | 0x3f << 21 | (width - 1) << 16 | rd->encoding() << 12 |
+ lsb << 7 | 0x5 << 4 | rn->encoding());
+ }
+
+ void uxtb(Register rd, Register rm, unsigned int rotation = 0, AsmCondition cond = al) {
+ assert(VM_Version::arm_arch() >= 7, "no uxtb on this processor");
+ assert((rotation % 8) == 0 && (rotation <= 24), "encoding constraint");
+ emit_int32(cond << 28 | 0x6e << 20 | 0xf << 16 | rd->encoding() << 12 |
+ (rotation >> 3) << 10 | 0x7 << 4 | rm->encoding());
+ }
+
+ // ARM Memory Barriers
+ //
+ // There are two types of memory barriers defined for the ARM processor
+ // DataSynchronizationBarrier and DataMemoryBarrier
+ //
+ // The Linux kernel uses the DataMemoryBarrier for all of it's
+ // memory barrier operations (smp_mb, smp_rmb, smp_wmb)
+ //
+ // There are two forms of each barrier instruction.
+ // The mcr forms are supported on armv5 and newer architectures
+ //
+ // The dmb, dsb instructions were added in armv7
+ // architectures and are compatible with their mcr
+ // predecessors.
+ //
+ // Here are the encodings for future reference:
+ //
+ // DataSynchronizationBarrier (dsb)
+ // on ARMv7 - emit_int32(0xF57FF04F)
+ //
+ // on ARMv5+ - mcr p15, 0, Rtmp, c7, c10, 4 on earlier processors
+ // emit_int32(0xe << 28 | 0xe << 24 | 0x7 << 16 | Rtmp->encoding() << 12 |
+ // 0xf << 8 | 0x9 << 4 | 0xa);
+ //
+ // DataMemoryBarrier (dmb)
+ // on ARMv7 - emit_int32(0xF57FF05F)
+ //
+ // on ARMv5+ - mcr p15, 0, Rtmp, c7, c10, 5 on earlier processors
+ // emit_int32(0xe << 28 | 0xe << 24 | 0x7 << 16 | Rtmp->encoding() << 12 |
+ // 0xf << 8 | 0xb << 4 | 0xa);
+ //
+
+ enum DMB_Opt {
+ DMB_all = 0xf,
+ DMB_st = 0xe,
+ };
+
+ void dmb(DMB_Opt opt, Register reg) {
+ if (VM_Version::arm_arch() >= 7) {
+ emit_int32(0xF57FF050 | opt);
+ } else {
+ bool preserve_tmp = (reg == noreg);
+ if(preserve_tmp) {
+ reg = Rtemp;
+ str(reg, Address(SP, -wordSize, pre_indexed));
+ }
+ mov(reg, 0);
+ // DataMemoryBarrier
+ emit_int32(0xe << 28 |
+ 0xe << 24 |
+ 0x7 << 16 |
+ reg->encoding() << 12 |
+ 0xf << 8 |
+ 0xb << 4 |
+ 0xa);
+ if(preserve_tmp) {
+ ldr(reg, Address(SP, wordSize, post_indexed));
+ }
+ }
+ }
+
+ void dsb(Register reg) {
+ if (VM_Version::arm_arch() >= 7) {
+ emit_int32(0xF57FF04F);
+ } else {
+ bool preserve_tmp = (reg == noreg);
+ if(preserve_tmp) {
+ reg = Rtemp;
+ str(reg, Address(SP, -wordSize, pre_indexed));
+ }
+ mov(reg, 0);
+ // DataSynchronizationBarrier
+ emit_int32(0xe << 28 |
+ 0xe << 24 |
+ 0x7 << 16 |
+ reg->encoding() << 12 |
+ 0xf << 8 |
+ 0x9 << 4 |
+ 0xa);
+ if(preserve_tmp) {
+ ldr(reg, Address(SP, wordSize, post_indexed));
+ }
+ }
+ }
+
+
+#define F(mnemonic, b) \
+ void mnemonic(Register rd, Register rm, Register rn, AsmCondition cond = al) { \
+ assert(rn != rm && rn != rd, "unpredictable instruction"); \
+ emit_int32(cond << 28 | 0x2 << 23 | b << 22 | rn->encoding() << 16 | \
+ rd->encoding() << 12 | 9 << 4 | rm->encoding()); \
+ }
+
+ F(swp, 0)
+ F(swpb, 1)
+#undef F
+
+ // Branches
+
+#define F(mnemonic, l) \
+ void mnemonic(Register rm, AsmCondition cond = al) { \
+ emit_int32(cond << 28 | 0x012fff10 | l << 5 | rm->encoding()); \
+ }
+
+ F(bx, 0)
+ F(blx, 1)
+#undef F
+
+#define F(mnemonic, l) \
+ void mnemonic(address target, AsmCondition cond = al) { \
+ unsigned int offset = (unsigned int)(target - pc() - 8); \
+ assert((offset & 3) == 0, "bad alignment"); \
+ assert((offset >> 25) == 0 || ((int)offset >> 25) == -1, "offset is too large"); \
+ emit_int32(cond << 28 | l << 24 | offset << 6 >> 8); \
+ }
+
+ F(b, 0xa)
+ F(bl, 0xb)
+#undef F
+
+ // ARMv7 instructions
+
+#define F(mnemonic, wt) \
+ void mnemonic(Register rd, int imm_16, AsmCondition cond = al) { \
+ assert((imm_16 >> 16) == 0, "encoding constraint"); \
+ emit_int32(cond << 28 | wt << 20 | rd->encoding() << 12 | \
+ (imm_16 & 0xf000) << 4 | (imm_16 & 0xfff)); \
+ }
+
+ F(movw, 0x30)
+ F(movt, 0x34)
+#undef F
+
+ // VFP Support
+
+// Checks that VFP instructions are not used in SOFTFP mode.
+#ifdef __SOFTFP__
+#define CHECK_VFP_PRESENT ShouldNotReachHere()
+#else
+#define CHECK_VFP_PRESENT
+#endif // __SOFTFP__
+
+ static const int single_cp_num = 0xa00;
+ static const int double_cp_num = 0xb00;
+
+ // Bits P, Q, R, S collectively form the opcode
+#define F(mnemonic, P, Q, R, S) \
+ void mnemonic##d(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fn->lo_bit() == 0 && fd->lo_bit() == 0 && fm->lo_bit() == 0, "single precision register?"); \
+ emit_int32(cond << 28 | 0x7 << 25 | double_cp_num | \
+ P << 23 | Q << 21 | R << 20 | S << 6 | \
+ fn->hi_bits() << 16 | fn->hi_bit() << 7 | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ fm->hi_bits() | fm->hi_bit() << 5); \
+ } \
+ void mnemonic##s(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ AsmCondition cond = al) { \
+ assert(fn->hi_bit() == 0 && fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \
+ CHECK_VFP_PRESENT; \
+ emit_int32(cond << 28 | 0x7 << 25 | single_cp_num | \
+ P << 23 | Q << 21 | R << 20 | S << 6 | \
+ fn->hi_bits() << 16 | fn->lo_bit() << 7 | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ }
+
+ F(fmac, 0, 0, 0, 0) // Fd = Fd + (Fn * Fm)
+ F(fnmac, 0, 0, 0, 1) // Fd = Fd - (Fn * Fm)
+ F(fmsc, 0, 0, 1, 0) // Fd = -Fd + (Fn * Fm)
+ F(fnmsc, 0, 0, 1, 1) // Fd = -Fd - (Fn * Fm)
+
+ F(fmul, 0, 1, 0, 0) // Fd = Fn * Fm
+ F(fnmul, 0, 1, 0, 1) // Fd = -(Fn * Fm)
+ F(fadd, 0, 1, 1, 0) // Fd = Fn + Fm
+ F(fsub, 0, 1, 1, 1) // Fd = Fn - Fm
+ F(fdiv, 1, 0, 0, 0) // Fd = Fn / Fm
+#undef F
+
+ enum VElem_Size {
+ VELEM_SIZE_8 = 0x00,
+ VELEM_SIZE_16 = 0x01,
+ VELEM_SIZE_32 = 0x02,
+ VELEM_SIZE_64 = 0x03
+ };
+
+ enum VLD_Type {
+ VLD1_TYPE_1_REG = 0x7 /* 0b0111 */,
+ VLD1_TYPE_2_REGS = 0xA /* 0b1010 */,
+ VLD1_TYPE_3_REGS = 0x6 /* 0b0110 */,
+ VLD1_TYPE_4_REGS = 0x2 /* 0b0010 */
+ };
+
+ enum VFloat_Arith_Size {
+ VFA_SIZE_F32 = 0x0 /* 0b0 */,
+ };
+
+ // Bits P, Q, R, S collectively form the opcode
+#define F(mnemonic, P, Q, R, S) \
+ void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ int size, int quad) { \
+ CHECK_VFP_PRESENT; \
+ assert(VM_Version::has_simd(), "simd instruction"); \
+ assert(fn->lo_bit() == 0 && fd->lo_bit() == 0 && fm->lo_bit() == 0, \
+ "single precision register?"); \
+ assert(!quad || ((fn->hi_bits() | fd->hi_bits() | fm->hi_bits()) & 1) == 0, \
+ "quad precision register?"); \
+ emit_int32(0xf << 28 | P << 23 | Q << 8 | R << 4 | \
+ S << 21 | size << 20 | quad << 6 | \
+ fn->hi_bits() << 16 | fn->hi_bit() << 7 | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ fm->hi_bits() | fm->hi_bit() << 5); \
+ }
+
+ F(vmulI, 0x4 /* 0b0100 */, 0x9 /* 0b1001 */, 1, 0) // Vd = Vn * Vm (int)
+ F(vaddI, 0x4 /* 0b0100 */, 0x8 /* 0b1000 */, 0, 0) // Vd = Vn + Vm (int)
+ F(vsubI, 0x6 /* 0b0110 */, 0x8 /* 0b1000 */, 0, 0) // Vd = Vn - Vm (int)
+ F(vaddF, 0x4 /* 0b0100 */, 0xD /* 0b1101 */, 0, 0) // Vd = Vn + Vm (float)
+ F(vsubF, 0x4 /* 0b0100 */, 0xD /* 0b1101 */, 0, 1) // Vd = Vn - Vm (float)
+ F(vmulF, 0x6 /* 0b0110 */, 0xD /* 0b1101 */, 1, 0) // Vd = Vn * Vm (float)
+ F(vshlSI, 0x4 /* 0b0100 */, 0x4 /* 0b0100 */, 0, 0) // Vd = ashift(Vm,Vn) (int)
+ F(vshlUI, 0x6 /* 0b0110 */, 0x4 /* 0b0100 */, 0, 0) // Vd = lshift(Vm,Vn) (int)
+ F(_vandI, 0x4 /* 0b0100 */, 0x1 /* 0b0001 */, 1, 0) // Vd = Vn & Vm (int)
+ F(_vorI, 0x4 /* 0b0100 */, 0x1 /* 0b0001 */, 1, 1) // Vd = Vn | Vm (int)
+ F(_vxorI, 0x6 /* 0b0110 */, 0x1 /* 0b0001 */, 1, 0) // Vd = Vn ^ Vm (int)
+#undef F
+
+ void vandI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) {
+ _vandI(fd, fn, fm, 0, quad);
+ }
+ void vorI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) {
+ _vorI(fd, fn, fm, 0, quad);
+ }
+ void vxorI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) {
+ _vxorI(fd, fn, fm, 0, quad);
+ }
+
+ void vneg(FloatRegister fd, FloatRegister fm, int size, int flt, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(fd->lo_bit() == 0 && fm->lo_bit() == 0,
+ "single precision register?");
+ assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0,
+ "quad precision register?");
+ emit_int32(0xf << 28 | 0x3B /* 0b00111011 */ << 20 | 0x1 /* 0b01 */ << 16 | 0x7 /* 0b111 */ << 7 |
+ size << 18 | quad << 6 | flt << 10 |
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 |
+ fm->hi_bits() << 0 | fm->hi_bit() << 5);
+ }
+
+ void vnegI(FloatRegister fd, FloatRegister fm, int size, int quad) {
+ int flt = 0;
+ vneg(fd, fm, size, flt, quad);
+ }
+
+ void vshli(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(fd->lo_bit() == 0 && fm->lo_bit() == 0,
+ "single precision register?");
+ assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0,
+ "quad precision register?");
+
+ if (imm >= size) {
+ // maximum shift gives all zeroes, direction doesn't matter,
+ // but only available for shift right
+ vshri(fd, fm, size, size, true /* unsigned */, quad);
+ return;
+ }
+ assert(imm >= 0 && imm < size, "out of range");
+
+ int imm6 = 0;
+ int L = 0;
+ switch (size) {
+ case 8:
+ case 16:
+ case 32:
+ imm6 = size + imm ;
+ break;
+ case 64:
+ L = 1;
+ imm6 = imm ;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ emit_int32(0xf << 28 | 0x5 /* 0b00101 */ << 23 | 0x51 /* 0b01010001 */ << 4 |
+ imm6 << 16 | L << 7 | quad << 6 |
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 |
+ fm->hi_bits() << 0 | fm->hi_bit() << 5);
+ }
+
+ void vshri(FloatRegister fd, FloatRegister fm, int size, int imm,
+ bool U /* unsigned */, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(fd->lo_bit() == 0 && fm->lo_bit() == 0,
+ "single precision register?");
+ assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0,
+ "quad precision register?");
+ assert(imm > 0, "out of range");
+ if (imm >= size) {
+ // maximum shift (all zeroes)
+ imm = size;
+ }
+ int imm6 = 0;
+ int L = 0;
+ switch (size) {
+ case 8:
+ case 16:
+ case 32:
+ imm6 = 2 * size - imm ;
+ break;
+ case 64:
+ L = 1;
+ imm6 = 64 - imm ;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ emit_int32(0xf << 28 | 0x5 /* 0b00101 */ << 23 | 0x1 /* 0b00000001 */ << 4 |
+ imm6 << 16 | L << 7 | quad << 6 | U << 24 |
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 |
+ fm->hi_bits() << 0 | fm->hi_bit() << 5);
+ }
+ void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
+ vshri(fd, fm, size, imm, true /* unsigned */, quad);
+ }
+ void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
+ vshri(fd, fm, size, imm, false /* signed */, quad);
+ }
+
+ // Extension opcodes where P,Q,R,S = 1 opcode is in Fn
+#define F(mnemonic, N, opcode) \
+ void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->lo_bit() == 0 && fm->hi_bit() == 0, "incorrect register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ double_cp_num | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ } \
+ void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ single_cp_num | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ }
+
+ F(fuito, 0, 0x8) // Unsigned integer to floating point conversion
+ F(fsito, 1, 0x8) // Signed integer to floating point conversion
+#undef F
+
+#define F(mnemonic, N, opcode) \
+ void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0 && fm->lo_bit() == 0, "incorrect register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ double_cp_num | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->hi_bit() << 5); \
+ } \
+ void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ single_cp_num | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ }
+
+ F(ftoui, 0, 0xc) // Float to unsigned int conversion
+ F(ftouiz, 1, 0xc) // Float to unsigned int conversion, RZ mode
+ F(ftosi, 0, 0xd) // Float to signed int conversion
+ F(ftosiz, 1, 0xd) // Float to signed int conversion, RZ mode
+#undef F
+
+#define F(mnemonic, N, opcode) \
+ void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0 && fm->lo_bit() == 0, "incorrect register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ double_cp_num | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->hi_bit() << 5); \
+ } \
+ void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->lo_bit() == 0 && fm->hi_bit() == 0, "incorrect register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ single_cp_num | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ }
+
+ F(fcvtd, 1, 0x7) // Single->Double conversion
+ F(fcvts, 1, 0x7) // Double->Single conversion
+#undef F
+
+#define F(mnemonic, N, opcode) \
+ void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->lo_bit() == 0 && fm->lo_bit() == 0, "single precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ double_cp_num | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ fm->hi_bits() | fm->hi_bit() << 5); \
+ } \
+ void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ single_cp_num | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ fm->hi_bits() | fm->lo_bit() << 5); \
+ }
+
+ F(fcpy, 0, 0x0) // Fd = Fm
+ F(fabs, 1, 0x0) // Fd = abs(Fm)
+ F(fneg, 0, 0x1) // Fd = -Fm
+ F(fsqrt, 1, 0x1) // Fd = sqrt(Fm)
+ F(fcmp, 0, 0x4) // Compare Fd with Fm no exceptions on quiet NANs
+ F(fcmpe, 1, 0x4) // Compare Fd with Fm with exceptions on quiet NANs
+#undef F
+
+ // Opcodes with one operand only
+#define F(mnemonic, N, opcode) \
+ void mnemonic##d(FloatRegister fd, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->lo_bit() == 0, "single precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ double_cp_num | fd->hi_bits() << 12 | fd->hi_bit() << 22); \
+ } \
+ void mnemonic##s(FloatRegister fd, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0, "double precision register?"); \
+ emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \
+ single_cp_num | fd->hi_bits() << 12 | fd->lo_bit() << 22); \
+ }
+
+ F(fcmpz, 0, 0x5) // Compare Fd with 0, no exceptions quiet NANs
+ F(fcmpez, 1, 0x5) // Compare Fd with 0, with exceptions quiet NANs
+#undef F
+
+ // Float loads (L==1) and stores (L==0)
+#define F(mnemonic, L) \
+ void mnemonic##d(FloatRegister fd, Address addr, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->lo_bit() == 0, "single precision register?"); \
+ emit_int32(cond << 28 | 0xd << 24 | L << 20 | \
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | \
+ double_cp_num | addr.encoding_vfp()); \
+ } \
+ void mnemonic##s(FloatRegister fd, Address addr, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(fd->hi_bit() == 0, "double precision register?"); \
+ emit_int32(cond << 28 | 0xd << 24 | L << 20 | \
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | \
+ single_cp_num | addr.encoding_vfp()); \
+ }
+
+ F(fst, 0) // Store 1 register
+ F(fld, 1) // Load 1 register
+#undef F
+
+ // Float load and store multiple
+#define F(mnemonic, l, pu) \
+ void mnemonic##d(Register rn, FloatRegisterSet reg_set, \
+ AsmWriteback w = no_writeback, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(w == no_writeback || rn != PC, "unpredictable instruction"); \
+ assert(!(w == no_writeback && pu == 2), "encoding constraint"); \
+ assert((reg_set.encoding_d() & 1) == 0, "encoding constraint"); \
+ emit_int32(cond << 28 | 6 << 25 | pu << 23 | w << 21 | l << 20 | \
+ rn->encoding() << 16 | reg_set.encoding_d() | double_cp_num); \
+ } \
+ void mnemonic##s(Register rn, FloatRegisterSet reg_set, \
+ AsmWriteback w = no_writeback, AsmCondition cond = al) { \
+ CHECK_VFP_PRESENT; \
+ assert(w == no_writeback || rn != PC, "unpredictable instruction"); \
+ assert(!(w == no_writeback && pu == 2), "encoding constraint"); \
+ emit_int32(cond << 28 | 6 << 25 | pu << 23 | w << 21 | l << 20 | \
+ rn->encoding() << 16 | reg_set.encoding_s() | single_cp_num); \
+ }
+
+ F(fldmia, 1, 1) F(fldmfd, 1, 1)
+ F(fldmdb, 1, 2) F(fldmea, 1, 2)
+ F(fstmia, 0, 1) F(fstmfd, 0, 1)
+ F(fstmdb, 0, 2) F(fstmea, 0, 2)
+#undef F
+
+ // fconst{s,d} encoding:
+ // 31 28 27 23 22 21 20 19 16 15 12 10 9 8 7 4 3 0
+ // | cond | 11101 | D | 11 | imm4H | Vd | 101 | sz | 0000 | imm4L |
+ // sz = 0 for single precision, 1 otherwise
+ // Register number is Vd:D for single precision, D:Vd otherwise
+ // immediate value is imm4H:imm4L
+
+ void fconsts(FloatRegister fd, unsigned char imm_8, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->hi_bit() == 0, "double precision register?");
+ emit_int32(cond << 28 | 0xeb << 20 | single_cp_num |
+ fd->hi_bits() << 12 | fd->lo_bit() << 22 | (imm_8 & 0xf) | (imm_8 >> 4) << 16);
+ }
+
+ void fconstd(FloatRegister fd, unsigned char imm_8, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->lo_bit() == 0, "double precision register?");
+ emit_int32(cond << 28 | 0xeb << 20 | double_cp_num |
+ fd->hi_bits() << 12 | fd->hi_bit() << 22 | (imm_8 & 0xf) | (imm_8 >> 4) << 16);
+ }
+
+ // GPR <-> FPR transfers
+ void fmsr(FloatRegister fd, Register rd, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->hi_bit() == 0, "double precision register?");
+ emit_int32(cond << 28 | 0xe0 << 20 | single_cp_num | 1 << 4 |
+ fd->hi_bits() << 16 | fd->lo_bit() << 7 | rd->encoding() << 12);
+ }
+
+ void fmrs(Register rd, FloatRegister fd, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->hi_bit() == 0, "double precision register?");
+ emit_int32(cond << 28 | 0xe1 << 20 | single_cp_num | 1 << 4 |
+ fd->hi_bits() << 16 | fd->lo_bit() << 7 | rd->encoding() << 12);
+ }
+
+ void fmdrr(FloatRegister fd, Register rd, Register rn, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->lo_bit() == 0, "single precision register?");
+ emit_int32(cond << 28 | 0xc4 << 20 | double_cp_num | 1 << 4 |
+ fd->hi_bits() | fd->hi_bit() << 5 |
+ rn->encoding() << 16 | rd->encoding() << 12);
+ }
+
+ void fmrrd(Register rd, Register rn, FloatRegister fd, AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(fd->lo_bit() == 0, "single precision register?");
+ emit_int32(cond << 28 | 0xc5 << 20 | double_cp_num | 1 << 4 |
+ fd->hi_bits() | fd->hi_bit() << 5 |
+ rn->encoding() << 16 | rd->encoding() << 12);
+ }
+
+ void fmstat(AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ emit_int32(cond << 28 | 0xef1fa10);
+ }
+
+ void vmrs(Register rt, VFPSystemRegister sr, AsmCondition cond = al) {
+ assert((sr->encoding() & (~0xf)) == 0, "what system register is that?");
+ emit_int32(cond << 28 | rt->encoding() << 12 | sr->encoding() << 16 | 0xef00a10);
+ }
+
+ void vmsr(VFPSystemRegister sr, Register rt, AsmCondition cond = al) {
+ assert((sr->encoding() & (~0xf)) == 0, "what system register is that?");
+ emit_int32(cond << 28 | rt->encoding() << 12 | sr->encoding() << 16 | 0xee00a10);
+ }
+
+ void vcnt(FloatRegister Dd, FloatRegister Dm) {
+ CHECK_VFP_PRESENT;
+ // emitted at VM startup to detect whether the instruction is available
+ assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0 && Dm->lo_bit() == 0, "single precision registers?");
+ emit_int32(0xf3b00500 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | Dm->hi_bit() << 5 | Dm->hi_bits());
+ }
+
+ void vpaddl(FloatRegister Dd, FloatRegister Dm, int size, bool s) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0 && Dm->lo_bit() == 0, "single precision registers?");
+ assert(size == 8 || size == 16 || size == 32, "unexpected size");
+ emit_int32(0xf3b00200 | Dd->hi_bit() << 22 | (size >> 4) << 18 | Dd->hi_bits() << 12 | (s ? 0 : 1) << 7 | Dm->hi_bit() << 5 | Dm->hi_bits());
+ }
+
+ void vld1(FloatRegister Dd, Address addr, VElem_Size size, int bits) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision registers?");
+ int align = 0;
+ assert(bits == 128, "code assumption");
+ VLD_Type type = VLD1_TYPE_2_REGS; // 2x64
+ emit_int32(0xf4200000 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | type << 8 | size << 6 | align << 4 | addr.encoding_simd());
+ }
+
+ void vst1(FloatRegister Dd, Address addr, VElem_Size size, int bits) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision registers?");
+ int align = 0;
+ assert(bits == 128, "code assumption");
+ VLD_Type type = VLD1_TYPE_2_REGS; // 2x64
+ emit_int32(0xf4000000 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | type << 8 | size << 6 | align << 4 | addr.encoding_simd());
+ }
+
+ void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision register?");
+ assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?");
+ assert(imm8 >= 0 && imm8 < 256, "out of range");
+ int op;
+ int cmode;
+ switch (size) {
+ case VELEM_SIZE_8:
+ op = 0;
+ cmode = 0xE /* 0b1110 */;
+ break;
+ case VELEM_SIZE_16:
+ op = 0;
+ cmode = 0x8 /* 0b1000 */;
+ break;
+ case VELEM_SIZE_32:
+ op = 0;
+ cmode = 0x0 /* 0b0000 */;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ emit_int32(0xf << 28 | 0x1 << 25 | 0x1 << 23 | 0x1 << 4 |
+ (imm8 >> 7) << 24 | ((imm8 & 0x70) >> 4) << 16 | (imm8 & 0xf) |
+ quad << 6 | op << 5 | cmode << 8 |
+ Dd->hi_bits() << 12 | Dd->hi_bit() << 22);
+ }
+
+ void vdupI(FloatRegister Dd, Register Rs, VElem_Size size, int quad,
+ AsmCondition cond = al) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision register?");
+ assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?");
+ int b;
+ int e;
+ switch (size) {
+ case VELEM_SIZE_8:
+ b = 1;
+ e = 0;
+ break;
+ case VELEM_SIZE_16:
+ b = 0;
+ e = 1;
+ break;
+ case VELEM_SIZE_32:
+ b = 0;
+ e = 0;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ emit_int32(cond << 28 | 0x1D /* 0b11101 */ << 23 | 0xB /* 0b1011 */ << 8 | 0x1 << 4 |
+ quad << 21 | b << 22 | e << 5 | Rs->encoding() << 12 |
+ Dd->hi_bits() << 16 | Dd->hi_bit() << 7);
+ }
+
+ void vdup(FloatRegister Dd, FloatRegister Ds, int index, int size, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision register?");
+ assert(Ds->lo_bit() == 0, "single precision register?");
+ assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?");
+ int range = 64 / size;
+ assert(index < range, "overflow");
+ int imm4;
+ switch (size) {
+ case 8:
+ assert((index & 0x7 /* 0b111 */) == index, "overflow");
+ imm4 = index << 1 | 0x1 /* 0b0001 */;
+ break;
+ case 16:
+ assert((index & 0x3 /* 0b11 */) == index, "overflow");
+ imm4 = index << 2 | 0x2 /* 0b0010 */;
+ break;
+ case 32:
+ assert((index & 0x1 /* 0b1 */) == index, "overflow");
+ imm4 = index << 3 | 0x4 /* 0b0100 */;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ emit_int32(0xF /* 0b1111 */ << 28 | 0x3B /* 0b00111011 */ << 20 | 0x6 /* 0b110 */ << 9 |
+ quad << 6 | imm4 << 16 |
+ Dd->hi_bits() << 12 | Dd->hi_bit() << 22 |
+ Ds->hi_bits() << 00 | Ds->hi_bit() << 5);
+ }
+
+ void vdupF(FloatRegister Dd, FloatRegister Ss, int quad) {
+ int index = 0;
+ FloatRegister Ds = as_FloatRegister(Ss->encoding() & ~1);
+ if (Ss->lo_bit() != 0) {
+ /* odd S register */
+ assert(Ds->successor() == Ss, "bad reg");
+ index = 1;
+ } else {
+ /* even S register */
+ assert(Ds == Ss, "bad reg");
+ }
+ vdup(Dd, Ds, index, 32, quad);
+ }
+
+ void vrev(FloatRegister Dd, FloatRegister Dm, int quad, int region_size, VElem_Size size) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision register?");
+ assert(Dm->lo_bit() == 0, "single precision register?");
+ assert(!quad || ((Dd->hi_bits() | Dm->hi_bits()) & 1) == 0,
+ "quad precision register?");
+ unsigned int op = 0;
+ switch (region_size) {
+ case 16: op = 0x2; /*0b10*/ break;
+ case 32: op = 0x1; /*0b01*/ break;
+ case 64: op = 0x0; /*0b00*/ break;
+ default: assert(false, "encoding constraint");
+ }
+ emit_int32(0xf << 28 | 0x7 << 23 | Dd->hi_bit() << 22 | 0x3 << 20 |
+ size << 18 | Dd->hi_bits() << 12 | op << 7 | quad << 6 | Dm->hi_bit() << 5 |
+ Dm->hi_bits());
+ }
+
+ void veor(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, int quad) {
+ CHECK_VFP_PRESENT;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Dd->lo_bit() == 0, "single precision register?");
+ assert(Dm->lo_bit() == 0, "single precision register?");
+ assert(Dn->lo_bit() == 0, "single precision register?");
+ assert(!quad || ((Dd->hi_bits() | Dm->hi_bits() | Dn->hi_bits()) & 1) == 0,
+ "quad precision register?");
+
+ emit_int32(0xf << 28 | 0x3 << 24 | Dd->hi_bit() << 22 | Dn->hi_bits() << 16 |
+ Dd->hi_bits() << 12 | 0x1 << 8 | Dn->hi_bit() << 7 | quad << 6 |
+ Dm->hi_bit() << 5 | 0x1 << 4 | Dm->hi_bits());
+ }
+
+
+ Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
+
+#ifdef COMPILER2
+ typedef VFP::double_num double_num;
+ typedef VFP::float_num float_num;
+#endif
+};
+
+#ifdef __SOFTFP__
+// Soft float function declarations
+extern "C" {
+extern float __aeabi_fadd(float, float);
+extern float __aeabi_fmul(float, float);
+extern float __aeabi_fsub(float, float);
+extern float __aeabi_fdiv(float, float);
+
+extern double __aeabi_dadd(double, double);
+extern double __aeabi_dmul(double, double);
+extern double __aeabi_dsub(double, double);
+extern double __aeabi_ddiv(double, double);
+
+extern double __aeabi_f2d(float);
+extern float __aeabi_d2f(double);
+extern float __aeabi_i2f(int);
+extern double __aeabi_i2d(int);
+extern int __aeabi_f2iz(float);
+
+extern int __aeabi_fcmpeq(float, float);
+extern int __aeabi_fcmplt(float, float);
+extern int __aeabi_fcmple(float, float);
+extern int __aeabi_fcmpge(float, float);
+extern int __aeabi_fcmpgt(float, float);
+
+extern int __aeabi_dcmpeq(double, double);
+extern int __aeabi_dcmplt(double, double);
+extern int __aeabi_dcmple(double, double);
+extern int __aeabi_dcmpge(double, double);
+extern int __aeabi_dcmpgt(double, double);
+
+// Imported code from glibc soft-fp bundle for
+// calculation accuracy improvement. See CR 6757269.
+extern double __aeabi_fadd_glibc(float, float);
+extern double __aeabi_fsub_glibc(float, float);
+extern double __aeabi_dadd_glibc(double, double);
+extern double __aeabi_dsub_glibc(double, double);
+};
+#endif // __SOFTFP__
+
+
+#endif // CPU_ARM_VM_ASSEMBLER_ARM_32_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm_64.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "ci/ciEnv.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jvm_misc.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/hashtable.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+// Returns whether given imm has equal bit fields <0:size-1> and <size:2*size-1>.
+inline bool Assembler::LogicalImmediate::has_equal_subpatterns(uintx imm, int size) {
+ uintx mask = right_n_bits(size);
+ uintx subpattern1 = mask_bits(imm, mask);
+ uintx subpattern2 = mask_bits(imm >> size, mask);
+ return subpattern1 == subpattern2;
+}
+
+// Returns least size that is a power of two from 2 to 64 with the proviso that given
+// imm is composed of repeating patterns of this size.
+inline int Assembler::LogicalImmediate::least_pattern_size(uintx imm) {
+ int size = BitsPerWord;
+ while (size > 2 && has_equal_subpatterns(imm, size >> 1)) {
+ size >>= 1;
+ }
+ return size;
+}
+
+// Returns count of set bits in given imm. Based on variable-precision SWAR algorithm.
+inline int Assembler::LogicalImmediate::population_count(uintx x) {
+ x -= ((x >> 1) & 0x5555555555555555L);
+ x = (((x >> 2) & 0x3333333333333333L) + (x & 0x3333333333333333L));
+ x = (((x >> 4) + x) & 0x0f0f0f0f0f0f0f0fL);
+ x += (x >> 8);
+ x += (x >> 16);
+ x += (x >> 32);
+ return(x & 0x7f);
+}
+
+// Let given x be <A:B> where B = 0 and least bit of A = 1. Returns <A:C>, where C is B-size set bits.
+inline uintx Assembler::LogicalImmediate::set_least_zeroes(uintx x) {
+ return x | (x - 1);
+}
+
+
+#ifdef ASSERT
+
+// Restores immediate by encoded bit masks.
+uintx Assembler::LogicalImmediate::decode() {
+ assert (_encoded, "should be");
+
+ int len_code = (_immN << 6) | ((~_imms) & 0x3f);
+ assert (len_code != 0, "should be");
+
+ int len = 6;
+ while (!is_set_nth_bit(len_code, len)) len--;
+ int esize = 1 << len;
+ assert (len > 0, "should be");
+ assert ((_is32bit ? 32 : 64) >= esize, "should be");
+
+ int levels = right_n_bits(len);
+ int S = _imms & levels;
+ int R = _immr & levels;
+
+ assert (S != levels, "should be");
+
+ uintx welem = right_n_bits(S + 1);
+ uintx wmask = (R == 0) ? welem : ((welem >> R) | (welem << (esize - R)));
+
+ for (int size = esize; size < 64; size <<= 1) {
+ wmask |= (wmask << size);
+ }
+
+ return wmask;
+}
+
+#endif
+
+
+// Constructs LogicalImmediate by given imm. Figures out if given imm can be used in AArch64 logical
+// instructions (AND, ANDS, EOR, ORR) and saves its encoding.
+void Assembler::LogicalImmediate::construct(uintx imm, bool is32) {
+ _is32bit = is32;
+
+ if (is32) {
+ assert(((imm >> 32) == 0) || (((intx)imm >> 31) == -1), "32-bit immediate is out of range");
+
+ // Replicate low 32 bits.
+ imm &= 0xffffffff;
+ imm |= imm << 32;
+ }
+
+ // All-zeroes and all-ones can not be encoded.
+ if (imm != 0 && (~imm != 0)) {
+
+ // Let LPS (least pattern size) be the least size (power of two from 2 to 64) of repeating
+ // patterns in the immediate. If immediate value can be encoded, it is encoded by pattern
+ // of exactly LPS size (due to structure of valid patterns). In order to verify
+ // that immediate value can be encoded, LPS is calculated and <LPS-1:0> bits of immediate
+ // are verified to be valid pattern.
+ int lps = least_pattern_size(imm);
+ uintx lps_mask = right_n_bits(lps);
+
+ // A valid pattern has one of the following forms:
+ // | 0 x A | 1 x B | 0 x C |, where B > 0 and C > 0, or
+ // | 1 x A | 0 x B | 1 x C |, where B > 0 and C > 0.
+ // For simplicity, the second form of the pattern is inverted into the first form.
+ bool inverted = imm & 0x1;
+ uintx pattern = (inverted ? ~imm : imm) & lps_mask;
+
+ // | 0 x A | 1 x (B + C) |
+ uintx without_least_zeroes = set_least_zeroes(pattern);
+
+ // Pattern is valid iff without least zeroes it is a power of two - 1.
+ if ((without_least_zeroes & (without_least_zeroes + 1)) == 0) {
+
+ // Count B as population count of pattern.
+ int bits_count = population_count(pattern);
+
+ // Count B+C as population count of pattern without least zeroes
+ int left_range = population_count(without_least_zeroes);
+
+ // S-prefix is a part of imms field which encodes LPS.
+ // LPS | S prefix
+ // 64 | not defined
+ // 32 | 0b0
+ // 16 | 0b10
+ // 8 | 0b110
+ // 4 | 0b1110
+ // 2 | 0b11110
+ int s_prefix = (lps == 64) ? 0 : ~set_least_zeroes(lps) & 0x3f;
+
+ // immN bit is set iff LPS == 64.
+ _immN = (lps == 64) ? 1 : 0;
+ assert (!is32 || (_immN == 0), "32-bit immediate should be encoded with zero N-bit");
+
+ // immr is the rotation size.
+ _immr = lps + (inverted ? 0 : bits_count) - left_range;
+
+ // imms is the field that encodes bits count and S-prefix.
+ _imms = ((inverted ? (lps - bits_count) : bits_count) - 1) | s_prefix;
+
+ _encoded = true;
+ assert (decode() == imm, "illegal encoding");
+
+ return;
+ }
+ }
+
+ _encoded = false;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/assembler_arm_64.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1717 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
+#define CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
+
+enum AsmShift12 {
+ lsl0, lsl12
+};
+
+enum AsmPrefetchOp {
+ pldl1keep = 0b00000,
+ pldl1strm,
+ pldl2keep,
+ pldl2strm,
+ pldl3keep,
+ pldl3strm,
+
+ plil1keep = 0b01000,
+ plil1strm,
+ plil2keep,
+ plil2strm,
+ plil3keep,
+ plil3strm,
+
+ pstl1keep = 0b10000,
+ pstl1strm,
+ pstl2keep,
+ pstl2strm,
+ pstl3keep,
+ pstl3strm,
+};
+
+// Shifted register operand for data processing instructions.
+class AsmOperand VALUE_OBJ_CLASS_SPEC {
+ private:
+ Register _reg;
+ AsmShift _shift;
+ int _shift_imm;
+
+ public:
+ AsmOperand(Register reg) {
+ assert(reg != SP, "SP is not allowed in shifted register operand");
+ _reg = reg;
+ _shift = lsl;
+ _shift_imm = 0;
+ }
+
+ AsmOperand(Register reg, AsmShift shift, int shift_imm) {
+ assert(reg != SP, "SP is not allowed in shifted register operand");
+ assert(shift_imm >= 0, "shift amount should be non-negative");
+ _reg = reg;
+ _shift = shift;
+ _shift_imm = shift_imm;
+ }
+
+ Register reg() const {
+ return _reg;
+ }
+
+ AsmShift shift() const {
+ return _shift;
+ }
+
+ int shift_imm() const {
+ return _shift_imm;
+ }
+};
+
+
+class Assembler : public AbstractAssembler {
+
+ public:
+
+ static const int LogInstructionSize = 2;
+ static const int InstructionSize = 1 << LogInstructionSize;
+
+ Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
+
+ static inline AsmCondition inverse(AsmCondition cond) {
+ assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed");
+ return (AsmCondition)((int)cond ^ 1);
+ }
+
+ // Returns value of nzcv flags conforming to the given condition.
+ static inline int flags_for_condition(AsmCondition cond) {
+ switch(cond) { // NZCV
+ case mi: case lt: return 0b1000;
+ case eq: case le: return 0b0100;
+ case hs: case hi: return 0b0010;
+ case vs: return 0b0001;
+ default: return 0b0000;
+ }
+ }
+
+ // Immediate, encoded into logical instructions.
+ class LogicalImmediate {
+ private:
+ bool _encoded;
+ bool _is32bit;
+ int _immN;
+ int _immr;
+ int _imms;
+
+ static inline bool has_equal_subpatterns(uintx imm, int size);
+ static inline int least_pattern_size(uintx imm);
+ static inline int population_count(uintx x);
+ static inline uintx set_least_zeroes(uintx x);
+
+#ifdef ASSERT
+ uintx decode();
+#endif
+
+ void construct(uintx imm, bool is32);
+
+ public:
+ LogicalImmediate(uintx imm, bool is32 = false) { construct(imm, is32); }
+
+ // Returns true if given immediate can be used in AArch64 logical instruction.
+ bool is_encoded() const { return _encoded; }
+
+ bool is32bit() const { return _is32bit; }
+ int immN() const { assert(_encoded, "should be"); return _immN; }
+ int immr() const { assert(_encoded, "should be"); return _immr; }
+ int imms() const { assert(_encoded, "should be"); return _imms; }
+ };
+
+ // Immediate, encoded into arithmetic add/sub instructions.
+ class ArithmeticImmediate {
+ private:
+ bool _encoded;
+ int _imm;
+ AsmShift12 _shift;
+
+ public:
+ ArithmeticImmediate(intx x) {
+ if (is_unsigned_imm_in_range(x, 12, 0)) {
+ _encoded = true;
+ _imm = x;
+ _shift = lsl0;
+ } else if (is_unsigned_imm_in_range(x, 12, 12)) {
+ _encoded = true;
+ _imm = x >> 12;
+ _shift = lsl12;
+ } else {
+ _encoded = false;
+ }
+ }
+
+ ArithmeticImmediate(intx x, AsmShift12 sh) {
+ if (is_unsigned_imm_in_range(x, 12, 0)) {
+ _encoded = true;
+ _imm = x;
+ _shift = sh;
+ } else {
+ _encoded = false;
+ }
+ }
+
+ // Returns true if this immediate can be used in AArch64 arithmetic (add/sub/cmp/cmn) instructions.
+ bool is_encoded() const { return _encoded; }
+
+ int imm() const { assert(_encoded, "should be"); return _imm; }
+ AsmShift12 shift() const { assert(_encoded, "should be"); return _shift; }
+ };
+
+ static inline bool is_imm_in_range(intx value, int bits, int align_bits) {
+ intx sign_bits = (value >> (bits + align_bits - 1));
+ return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1));
+ }
+
+ static inline int encode_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) {
+ assert (is_imm_in_range(value, bits, align_bits), "immediate value is out of range");
+ return ((value >> align_bits) & right_n_bits(bits)) << low_bit_in_encoding;
+ }
+
+ static inline bool is_unsigned_imm_in_range(intx value, int bits, int align_bits) {
+ return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0);
+ }
+
+ static inline int encode_unsigned_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) {
+ assert (is_unsigned_imm_in_range(value, bits, align_bits), "immediate value is out of range");
+ return (value >> align_bits) << low_bit_in_encoding;
+ }
+
+ static inline bool is_offset_in_range(intx offset, int bits) {
+ assert (bits == 14 || bits == 19 || bits == 26, "wrong bits number");
+ return is_imm_in_range(offset, bits, 2);
+ }
+
+ static inline int encode_offset(intx offset, int bits, int low_bit_in_encoding) {
+ return encode_imm(offset, bits, 2, low_bit_in_encoding);
+ }
+
+ // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions.
+ static inline bool is_arith_imm_in_range(intx value) {
+ return ArithmeticImmediate(value).is_encoded();
+ }
+
+
+ // Load/store instructions
+
+#define F(mnemonic, opc) \
+ void mnemonic(Register rd, address literal_addr) { \
+ intx offset = literal_addr - pc(); \
+ assert (opc != 0b01 || offset == 0 || ((uintx)literal_addr & 7) == 0, "ldr target should be aligned"); \
+ assert (is_offset_in_range(offset, 19), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b011 << 27 | encode_offset(offset, 19, 5) | rd->encoding_with_zr()); \
+ }
+
+ F(ldr_w, 0b00)
+ F(ldr, 0b01)
+ F(ldrsw, 0b10)
+#undef F
+
+#define F(mnemonic, opc) \
+ void mnemonic(FloatRegister rt, address literal_addr) { \
+ intx offset = literal_addr - pc(); \
+ assert (offset == 0 || ((uintx)literal_addr & right_n_bits(2 + opc)) == 0, "ldr target should be aligned"); \
+ assert (is_offset_in_range(offset, 19), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b011100 << 24 | encode_offset(offset, 19, 5) | rt->encoding()); \
+ }
+
+ F(ldr_s, 0b00)
+ F(ldr_d, 0b01)
+ F(ldr_q, 0b10)
+#undef F
+
+#define F(mnemonic, size, o2, L, o1, o0) \
+ void mnemonic(Register rt, Register rn) { \
+ emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \
+ o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(ldxrb, 0b00, 0, 1, 0, 0)
+ F(ldaxrb, 0b00, 0, 1, 0, 1)
+ F(ldarb, 0b00, 1, 1, 0, 1)
+ F(ldxrh, 0b01, 0, 1, 0, 0)
+ F(ldaxrh, 0b01, 0, 1, 0, 1)
+ F(ldarh, 0b01, 1, 1, 0, 1)
+ F(ldxr_w, 0b10, 0, 1, 0, 0)
+ F(ldaxr_w, 0b10, 0, 1, 0, 1)
+ F(ldar_w, 0b10, 1, 1, 0, 1)
+ F(ldxr, 0b11, 0, 1, 0, 0)
+ F(ldaxr, 0b11, 0, 1, 0, 1)
+ F(ldar, 0b11, 1, 1, 0, 1)
+
+ F(stlrb, 0b00, 1, 0, 0, 1)
+ F(stlrh, 0b01, 1, 0, 0, 1)
+ F(stlr_w, 0b10, 1, 0, 0, 1)
+ F(stlr, 0b11, 1, 0, 0, 1)
+#undef F
+
+#define F(mnemonic, size, o2, L, o1, o0) \
+ void mnemonic(Register rs, Register rt, Register rn) { \
+ assert (rs != rt, "should be different"); \
+ assert (rs != rn, "should be different"); \
+ emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \
+ o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(stxrb, 0b00, 0, 0, 0, 0)
+ F(stlxrb, 0b00, 0, 0, 0, 1)
+ F(stxrh, 0b01, 0, 0, 0, 0)
+ F(stlxrh, 0b01, 0, 0, 0, 1)
+ F(stxr_w, 0b10, 0, 0, 0, 0)
+ F(stlxr_w, 0b10, 0, 0, 0, 1)
+ F(stxr, 0b11, 0, 0, 0, 0)
+ F(stlxr, 0b11, 0, 0, 0, 1)
+#undef F
+
+#define F(mnemonic, size, o2, L, o1, o0) \
+ void mnemonic(Register rt, Register rt2, Register rn) { \
+ assert (rt != rt2, "should be different"); \
+ emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \
+ o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(ldxp_w, 0b10, 0, 1, 1, 0)
+ F(ldaxp_w, 0b10, 0, 1, 1, 1)
+ F(ldxp, 0b11, 0, 1, 1, 0)
+ F(ldaxp, 0b11, 0, 1, 1, 1)
+#undef F
+
+#define F(mnemonic, size, o2, L, o1, o0) \
+ void mnemonic(Register rs, Register rt, Register rt2, Register rn) { \
+ assert (rs != rt, "should be different"); \
+ assert (rs != rt2, "should be different"); \
+ assert (rs != rn, "should be different"); \
+ emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \
+ o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(stxp_w, 0b10, 0, 0, 1, 0)
+ F(stlxp_w, 0b10, 0, 0, 1, 1)
+ F(stxp, 0b11, 0, 0, 1, 0)
+ F(stlxp, 0b11, 0, 0, 1, 1)
+#undef F
+
+#define F(mnemonic, opc, V, L) \
+ void mnemonic(Register rt, Register rt2, Register rn, int offset = 0) { \
+ assert (!L || rt != rt2, "should be different"); \
+ int align_bits = 2 + (opc >> 1); \
+ assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \
+ rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(stnp_w, 0b00, 0, 0)
+ F(ldnp_w, 0b00, 0, 1)
+ F(stnp, 0b10, 0, 0)
+ F(ldnp, 0b10, 0, 1)
+#undef F
+
+#define F(mnemonic, opc, V, L) \
+ void mnemonic(FloatRegister rt, FloatRegister rt2, Register rn, int offset = 0) { \
+ assert (!L || (rt != rt2), "should be different"); \
+ int align_bits = 2 + opc; \
+ assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \
+ rt2->encoding() << 10 | rn->encoding_with_sp() << 5 | rt->encoding()); \
+ }
+
+ F(stnp_s, 0b00, 1, 0)
+ F(stnp_d, 0b01, 1, 0)
+ F(stnp_q, 0b10, 1, 0)
+ F(ldnp_s, 0b00, 1, 1)
+ F(ldnp_d, 0b01, 1, 1)
+ F(ldnp_q, 0b10, 1, 1)
+#undef F
+
+#define F(mnemonic, size, V, opc) \
+ void mnemonic(Register rt, Address addr) { \
+ assert((addr.mode() == basic_offset) || (rt != addr.base()), "should be different"); \
+ if (addr.index() == noreg) { \
+ if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, size)) { \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \
+ encode_unsigned_imm(addr.disp(), 12, size, 10) | \
+ addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ } else { \
+ assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \
+ addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ } \
+ } else { \
+ assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \
+ assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \
+ addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \
+ 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ } \
+ }
+
+ F(strb, 0b00, 0, 0b00)
+ F(ldrb, 0b00, 0, 0b01)
+ F(ldrsb, 0b00, 0, 0b10)
+ F(ldrsb_w, 0b00, 0, 0b11)
+
+ F(strh, 0b01, 0, 0b00)
+ F(ldrh, 0b01, 0, 0b01)
+ F(ldrsh, 0b01, 0, 0b10)
+ F(ldrsh_w, 0b01, 0, 0b11)
+
+ F(str_w, 0b10, 0, 0b00)
+ F(ldr_w, 0b10, 0, 0b01)
+ F(ldrsw, 0b10, 0, 0b10)
+
+ F(str, 0b11, 0, 0b00)
+ F(ldr, 0b11, 0, 0b01)
+#undef F
+
+#define F(mnemonic, size, V, opc) \
+ void mnemonic(AsmPrefetchOp prfop, Address addr) { \
+ assert (addr.mode() == basic_offset, #mnemonic " supports only basic_offset address mode"); \
+ if (addr.index() == noreg) { \
+ if (is_unsigned_imm_in_range(addr.disp(), 12, size)) { \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \
+ encode_unsigned_imm(addr.disp(), 12, size, 10) | \
+ addr.base()->encoding_with_sp() << 5 | prfop); \
+ } else { \
+ assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \
+ addr.base()->encoding_with_sp() << 5 | prfop); \
+ } \
+ } else { \
+ assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \
+ assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \
+ addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \
+ 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | prfop); \
+ } \
+ }
+
+ F(prfm, 0b11, 0, 0b10)
+#undef F
+
+#define F(mnemonic, size, V, opc) \
+ void mnemonic(FloatRegister rt, Address addr) { \
+ int align_bits = (((opc & 0b10) >> 1) << 2) | size; \
+ if (addr.index() == noreg) { \
+ if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, align_bits)) { \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \
+ encode_unsigned_imm(addr.disp(), 12, align_bits, 10) | \
+ addr.base()->encoding_with_sp() << 5 | rt->encoding()); \
+ } else { \
+ assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \
+ addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \
+ } \
+ } else { \
+ assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \
+ assert ((addr.shift_imm() == 0) || (addr.shift_imm() == align_bits), "invalid shift amount"); \
+ emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \
+ addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \
+ 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \
+ } \
+ }
+
+ F(str_b, 0b00, 1, 0b00)
+ F(ldr_b, 0b00, 1, 0b01)
+ F(str_h, 0b01, 1, 0b00)
+ F(ldr_h, 0b01, 1, 0b01)
+ F(str_s, 0b10, 1, 0b00)
+ F(ldr_s, 0b10, 1, 0b01)
+ F(str_d, 0b11, 1, 0b00)
+ F(ldr_d, 0b11, 1, 0b01)
+ F(str_q, 0b00, 1, 0b10)
+ F(ldr_q, 0b00, 1, 0b11)
+#undef F
+
+#define F(mnemonic, opc, V, L) \
+ void mnemonic(Register rt, Register rt2, Address addr) { \
+ assert((addr.mode() == basic_offset) || ((rt != addr.base()) && (rt2 != addr.base())), "should be different"); \
+ assert(!L || (rt != rt2), "should be different"); \
+ assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \
+ int align_bits = 2 + (opc >> 1); \
+ int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \
+ assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \
+ encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding_with_zr() << 10 | \
+ addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \
+ }
+
+ F(stp_w, 0b00, 0, 0)
+ F(ldp_w, 0b00, 0, 1)
+ F(ldpsw, 0b01, 0, 1)
+ F(stp, 0b10, 0, 0)
+ F(ldp, 0b10, 0, 1)
+#undef F
+
+#define F(mnemonic, opc, V, L) \
+ void mnemonic(FloatRegister rt, FloatRegister rt2, Address addr) { \
+ assert(!L || (rt != rt2), "should be different"); \
+ assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \
+ int align_bits = 2 + opc; \
+ int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \
+ assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \
+ emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \
+ encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding() << 10 | \
+ addr.base()->encoding_with_sp() << 5 | rt->encoding()); \
+ }
+
+ F(stp_s, 0b00, 1, 0)
+ F(ldp_s, 0b00, 1, 1)
+ F(stp_d, 0b01, 1, 0)
+ F(ldp_d, 0b01, 1, 1)
+ F(stp_q, 0b10, 1, 0)
+ F(ldp_q, 0b10, 1, 1)
+#undef F
+
+ // Data processing instructions
+
+#define F(mnemonic, sf, opc) \
+ void mnemonic(Register rd, Register rn, const LogicalImmediate& imm) { \
+ assert (imm.is_encoded(), "illegal immediate for logical instruction"); \
+ assert (imm.is32bit() == (sf == 0), "immediate size does not match instruction size"); \
+ emit_int32(sf << 31 | opc << 29 | 0b100100 << 23 | imm.immN() << 22 | imm.immr() << 16 | \
+ imm.imms() << 10 | rn->encoding_with_zr() << 5 | \
+ ((opc == 0b11) ? rd->encoding_with_zr() : rd->encoding_with_sp())); \
+ } \
+ void mnemonic(Register rd, Register rn, uintx imm) { \
+ LogicalImmediate limm(imm, (sf == 0)); \
+ mnemonic(rd, rn, limm); \
+ } \
+ void mnemonic(Register rd, Register rn, unsigned int imm) { \
+ mnemonic(rd, rn, (uintx)imm); \
+ }
+
+ F(andr_w, 0, 0b00)
+ F(orr_w, 0, 0b01)
+ F(eor_w, 0, 0b10)
+ F(ands_w, 0, 0b11)
+
+ F(andr, 1, 0b00)
+ F(orr, 1, 0b01)
+ F(eor, 1, 0b10)
+ F(ands, 1, 0b11)
+#undef F
+
+ void tst(Register rn, unsigned int imm) {
+ ands(ZR, rn, imm);
+ }
+
+ void tst_w(Register rn, unsigned int imm) {
+ ands_w(ZR, rn, imm);
+ }
+
+#define F(mnemonic, sf, opc, N) \
+ void mnemonic(Register rd, Register rn, AsmOperand operand) { \
+ assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \
+ emit_int32(sf << 31 | opc << 29 | 0b01010 << 24 | operand.shift() << 22 | N << 21 | \
+ operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \
+ rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(andr_w, 0, 0b00, 0)
+ F(bic_w, 0, 0b00, 1)
+ F(orr_w, 0, 0b01, 0)
+ F(orn_w, 0, 0b01, 1)
+ F(eor_w, 0, 0b10, 0)
+ F(eon_w, 0, 0b10, 1)
+ F(ands_w, 0, 0b11, 0)
+ F(bics_w, 0, 0b11, 1)
+
+ F(andr, 1, 0b00, 0)
+ F(bic, 1, 0b00, 1)
+ F(orr, 1, 0b01, 0)
+ F(orn, 1, 0b01, 1)
+ F(eor, 1, 0b10, 0)
+ F(eon, 1, 0b10, 1)
+ F(ands, 1, 0b11, 0)
+ F(bics, 1, 0b11, 1)
+#undef F
+
+ void tst(Register rn, AsmOperand operand) {
+ ands(ZR, rn, operand);
+ }
+
+ void tst_w(Register rn, AsmOperand operand) {
+ ands_w(ZR, rn, operand);
+ }
+
+ void mvn(Register rd, AsmOperand operand) {
+ orn(rd, ZR, operand);
+ }
+
+ void mvn_w(Register rd, AsmOperand operand) {
+ orn_w(rd, ZR, operand);
+ }
+
+#define F(mnemonic, sf, op, S) \
+ void mnemonic(Register rd, Register rn, const ArithmeticImmediate& imm) { \
+ assert(imm.is_encoded(), "immediate is out of range"); \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b10001 << 24 | imm.shift() << 22 | \
+ imm.imm() << 10 | rn->encoding_with_sp() << 5 | \
+ (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \
+ } \
+ void mnemonic(Register rd, Register rn, int imm) { \
+ mnemonic(rd, rn, ArithmeticImmediate(imm)); \
+ } \
+ void mnemonic(Register rd, Register rn, int imm, AsmShift12 shift) { \
+ mnemonic(rd, rn, ArithmeticImmediate(imm, shift)); \
+ } \
+ void mnemonic(Register rd, Register rn, Register rm, AsmExtendOp extend, int shift_imm = 0) { \
+ assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range"); \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011001 << 21 | rm->encoding_with_zr() << 16 | \
+ extend << 13 | shift_imm << 10 | rn->encoding_with_sp() << 5 | \
+ (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \
+ } \
+ void mnemonic(Register rd, Register rn, AsmOperand operand) { \
+ assert (operand.shift() != ror, "illegal shift type"); \
+ assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011 << 24 | operand.shift() << 22 | \
+ operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \
+ rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(add_w, 0, 0, 0)
+ F(adds_w, 0, 0, 1)
+ F(sub_w, 0, 1, 0)
+ F(subs_w, 0, 1, 1)
+
+ F(add, 1, 0, 0)
+ F(adds, 1, 0, 1)
+ F(sub, 1, 1, 0)
+ F(subs, 1, 1, 1)
+#undef F
+
+ void mov(Register rd, Register rm) {
+ if ((rd == SP) || (rm == SP)) {
+ add(rd, rm, 0);
+ } else {
+ orr(rd, ZR, rm);
+ }
+ }
+
+ void mov_w(Register rd, Register rm) {
+ if ((rd == SP) || (rm == SP)) {
+ add_w(rd, rm, 0);
+ } else {
+ orr_w(rd, ZR, rm);
+ }
+ }
+
+ void cmp(Register rn, int imm) {
+ subs(ZR, rn, imm);
+ }
+
+ void cmp_w(Register rn, int imm) {
+ subs_w(ZR, rn, imm);
+ }
+
+ void cmp(Register rn, Register rm) {
+ assert (rm != SP, "SP should not be used as the 2nd operand of cmp");
+ if (rn == SP) {
+ subs(ZR, rn, rm, ex_uxtx);
+ } else {
+ subs(ZR, rn, rm);
+ }
+ }
+
+ void cmp_w(Register rn, Register rm) {
+ assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp");
+ subs_w(ZR, rn, rm);
+ }
+
+ void cmp(Register rn, AsmOperand operand) {
+ assert (rn != SP, "SP is not allowed in cmp with shifted register (AsmOperand)");
+ subs(ZR, rn, operand);
+ }
+
+ void cmn(Register rn, int imm) {
+ adds(ZR, rn, imm);
+ }
+
+ void cmn_w(Register rn, int imm) {
+ adds_w(ZR, rn, imm);
+ }
+
+ void cmn(Register rn, Register rm) {
+ assert (rm != SP, "SP should not be used as the 2nd operand of cmp");
+ if (rn == SP) {
+ adds(ZR, rn, rm, ex_uxtx);
+ } else {
+ adds(ZR, rn, rm);
+ }
+ }
+
+ void cmn_w(Register rn, Register rm) {
+ assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp");
+ adds_w(ZR, rn, rm);
+ }
+
+ void neg(Register rd, Register rm) {
+ sub(rd, ZR, rm);
+ }
+
+ void neg_w(Register rd, Register rm) {
+ sub_w(rd, ZR, rm);
+ }
+
+#define F(mnemonic, sf, op, S) \
+ void mnemonic(Register rd, Register rn, Register rm) { \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010000 << 21 | rm->encoding_with_zr() << 16 | \
+ rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(adc_w, 0, 0, 0)
+ F(adcs_w, 0, 0, 1)
+ F(sbc_w, 0, 1, 0)
+ F(sbcs_w, 0, 1, 1)
+
+ F(adc, 1, 0, 0)
+ F(adcs, 1, 0, 1)
+ F(sbc, 1, 1, 0)
+ F(sbcs, 1, 1, 1)
+#undef F
+
+#define F(mnemonic, sf, N) \
+ void mnemonic(Register rd, Register rn, Register rm, int lsb) { \
+ assert ((lsb >> (5 + sf)) == 0, "illegal least significant bit position"); \
+ emit_int32(sf << 31 | 0b100111 << 23 | N << 22 | rm->encoding_with_zr() << 16 | \
+ lsb << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(extr_w, 0, 0)
+ F(extr, 1, 1)
+#undef F
+
+#define F(mnemonic, sf, opc) \
+ void mnemonic(Register rd, int imm, int shift) { \
+ assert ((imm >> 16) == 0, "immediate is out of range"); \
+ assert (((shift & 0xf) == 0) && ((shift >> (5 + sf)) == 0), "invalid shift"); \
+ emit_int32(sf << 31 | opc << 29 | 0b100101 << 23 | (shift >> 4) << 21 | \
+ imm << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(movn_w, 0, 0b00)
+ F(movz_w, 0, 0b10)
+ F(movk_w, 0, 0b11)
+ F(movn, 1, 0b00)
+ F(movz, 1, 0b10)
+ F(movk, 1, 0b11)
+#undef F
+
+ void mov(Register rd, int imm) {
+ assert ((imm >> 16) == 0, "immediate is out of range");
+ movz(rd, imm, 0);
+ }
+
+ void mov_w(Register rd, int imm) {
+ assert ((imm >> 16) == 0, "immediate is out of range");
+ movz_w(rd, imm, 0);
+ }
+
+#define F(mnemonic, sf, op, S) \
+ void mnemonic(Register rn, int imm, int nzcv, AsmCondition cond) { \
+ assert ((imm >> 5) == 0, "immediate is out of range"); \
+ assert ((nzcv >> 4) == 0, "illegal nzcv"); \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | imm << 16 | \
+ cond << 12 | 1 << 11 | rn->encoding_with_zr() << 5 | nzcv); \
+ }
+
+ F(ccmn_w, 0, 0, 1)
+ F(ccmp_w, 0, 1, 1)
+ F(ccmn, 1, 0, 1)
+ F(ccmp, 1, 1, 1)
+#undef F
+
+#define F(mnemonic, sf, op, S) \
+ void mnemonic(Register rn, Register rm, int nzcv, AsmCondition cond) { \
+ assert ((nzcv >> 4) == 0, "illegal nzcv"); \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | rm->encoding_with_zr() << 16 | \
+ cond << 12 | rn->encoding_with_zr() << 5 | nzcv); \
+ }
+
+ F(ccmn_w, 0, 0, 1)
+ F(ccmp_w, 0, 1, 1)
+ F(ccmn, 1, 0, 1)
+ F(ccmp, 1, 1, 1)
+#undef F
+
+#define F(mnemonic, sf, op, S, op2) \
+ void mnemonic(Register rd, Register rn, Register rm, AsmCondition cond) { \
+ emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010100 << 21 | rm->encoding_with_zr() << 16 | \
+ cond << 12 | op2 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(csel_w, 0, 0, 0, 0b00)
+ F(csinc_w, 0, 0, 0, 0b01)
+ F(csinv_w, 0, 1, 0, 0b00)
+ F(csneg_w, 0, 1, 0, 0b01)
+
+ F(csel, 1, 0, 0, 0b00)
+ F(csinc, 1, 0, 0, 0b01)
+ F(csinv, 1, 1, 0, 0b00)
+ F(csneg, 1, 1, 0, 0b01)
+#undef F
+
+ void cset(Register rd, AsmCondition cond) {
+ csinc(rd, ZR, ZR, inverse(cond));
+ }
+
+ void cset_w(Register rd, AsmCondition cond) {
+ csinc_w(rd, ZR, ZR, inverse(cond));
+ }
+
+ void csetm(Register rd, AsmCondition cond) {
+ csinv(rd, ZR, ZR, inverse(cond));
+ }
+
+ void csetm_w(Register rd, AsmCondition cond) {
+ csinv_w(rd, ZR, ZR, inverse(cond));
+ }
+
+ void cinc(Register rd, Register rn, AsmCondition cond) {
+ csinc(rd, rn, rn, inverse(cond));
+ }
+
+ void cinc_w(Register rd, Register rn, AsmCondition cond) {
+ csinc_w(rd, rn, rn, inverse(cond));
+ }
+
+ void cinv(Register rd, Register rn, AsmCondition cond) {
+ csinv(rd, rn, rn, inverse(cond));
+ }
+
+ void cinv_w(Register rd, Register rn, AsmCondition cond) {
+ csinv_w(rd, rn, rn, inverse(cond));
+ }
+
+#define F(mnemonic, sf, S, opcode) \
+ void mnemonic(Register rd, Register rn) { \
+ emit_int32(sf << 31 | 1 << 30 | S << 29 | 0b11010110 << 21 | opcode << 10 | \
+ rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(rbit_w, 0, 0, 0b000000)
+ F(rev16_w, 0, 0, 0b000001)
+ F(rev_w, 0, 0, 0b000010)
+ F(clz_w, 0, 0, 0b000100)
+ F(cls_w, 0, 0, 0b000101)
+
+ F(rbit, 1, 0, 0b000000)
+ F(rev16, 1, 0, 0b000001)
+ F(rev32, 1, 0, 0b000010)
+ F(rev, 1, 0, 0b000011)
+ F(clz, 1, 0, 0b000100)
+ F(cls, 1, 0, 0b000101)
+#undef F
+
+#define F(mnemonic, sf, S, opcode) \
+ void mnemonic(Register rd, Register rn, Register rm) { \
+ emit_int32(sf << 31 | S << 29 | 0b11010110 << 21 | rm->encoding_with_zr() << 16 | \
+ opcode << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(udiv_w, 0, 0, 0b000010)
+ F(sdiv_w, 0, 0, 0b000011)
+ F(lslv_w, 0, 0, 0b001000)
+ F(lsrv_w, 0, 0, 0b001001)
+ F(asrv_w, 0, 0, 0b001010)
+ F(rorv_w, 0, 0, 0b001011)
+
+ F(udiv, 1, 0, 0b000010)
+ F(sdiv, 1, 0, 0b000011)
+ F(lslv, 1, 0, 0b001000)
+ F(lsrv, 1, 0, 0b001001)
+ F(asrv, 1, 0, 0b001010)
+ F(rorv, 1, 0, 0b001011)
+#undef F
+
+#define F(mnemonic, sf, op31, o0) \
+ void mnemonic(Register rd, Register rn, Register rm, Register ra) { \
+ emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \
+ o0 << 15 | ra->encoding_with_zr() << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(madd_w, 0, 0b000, 0)
+ F(msub_w, 0, 0b000, 1)
+ F(madd, 1, 0b000, 0)
+ F(msub, 1, 0b000, 1)
+
+ F(smaddl, 1, 0b001, 0)
+ F(smsubl, 1, 0b001, 1)
+ F(umaddl, 1, 0b101, 0)
+ F(umsubl, 1, 0b101, 1)
+#undef F
+
+ void mul(Register rd, Register rn, Register rm) {
+ madd(rd, rn, rm, ZR);
+ }
+
+ void mul_w(Register rd, Register rn, Register rm) {
+ madd_w(rd, rn, rm, ZR);
+ }
+
+#define F(mnemonic, sf, op31, o0) \
+ void mnemonic(Register rd, Register rn, Register rm) { \
+ emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \
+ o0 << 15 | 0b11111 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(smulh, 1, 0b010, 0)
+ F(umulh, 1, 0b110, 0)
+#undef F
+
+#define F(mnemonic, op) \
+ void mnemonic(Register rd, address addr) { \
+ intx offset; \
+ if (op == 0) { \
+ offset = addr - pc(); \
+ } else { \
+ offset = (((intx)addr) - (((intx)pc()) & ~0xfff)) >> 12; \
+ } \
+ assert (is_imm_in_range(offset, 21, 0), "offset is out of range"); \
+ emit_int32(op << 31 | (offset & 3) << 29 | 0b10000 << 24 | \
+ encode_imm(offset >> 2, 19, 0, 5) | rd->encoding_with_zr()); \
+ } \
+
+ F(adr, 0)
+ F(adrp, 1)
+#undef F
+
+ void adr(Register rd, Label& L) {
+ adr(rd, target(L));
+ }
+
+#define F(mnemonic, sf, opc, N) \
+ void mnemonic(Register rd, Register rn, int immr, int imms) { \
+ assert ((immr >> (5 + sf)) == 0, "immr is out of range"); \
+ assert ((imms >> (5 + sf)) == 0, "imms is out of range"); \
+ emit_int32(sf << 31 | opc << 29 | 0b100110 << 23 | N << 22 | immr << 16 | \
+ imms << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(sbfm_w, 0, 0b00, 0)
+ F(bfm_w, 0, 0b01, 0)
+ F(ubfm_w, 0, 0b10, 0)
+
+ F(sbfm, 1, 0b00, 1)
+ F(bfm, 1, 0b01, 1)
+ F(ubfm, 1, 0b10, 1)
+#undef F
+
+#define F(alias, mnemonic, sf, immr, imms) \
+ void alias(Register rd, Register rn, int lsb, int width) { \
+ assert ((lsb >> (5 + sf)) == 0, "lsb is out of range"); \
+ assert ((1 <= width) && (width <= (32 << sf) - lsb), "width is out of range"); \
+ mnemonic(rd, rn, immr, imms); \
+ }
+
+ F(bfi_w, bfm_w, 0, (-lsb) & 0x1f, width - 1)
+ F(bfi, bfm, 1, (-lsb) & 0x3f, width - 1)
+ F(bfxil_w, bfm_w, 0, lsb, lsb + width - 1)
+ F(bfxil, bfm, 1, lsb, lsb + width - 1)
+ F(sbfiz_w, sbfm_w, 0, (-lsb) & 0x1f, width - 1)
+ F(sbfiz, sbfm, 1, (-lsb) & 0x3f, width - 1)
+ F(sbfx_w, sbfm_w, 0, lsb, lsb + width - 1)
+ F(sbfx, sbfm, 1, lsb, lsb + width - 1)
+ F(ubfiz_w, ubfm_w, 0, (-lsb) & 0x1f, width - 1)
+ F(ubfiz, ubfm, 1, (-lsb) & 0x3f, width - 1)
+ F(ubfx_w, ubfm_w, 0, lsb, lsb + width - 1)
+ F(ubfx, ubfm, 1, lsb, lsb + width - 1)
+#undef F
+
+#define F(alias, mnemonic, sf, immr, imms) \
+ void alias(Register rd, Register rn, int shift) { \
+ assert ((shift >> (5 + sf)) == 0, "shift is out of range"); \
+ mnemonic(rd, rn, immr, imms); \
+ }
+
+ F(_asr_w, sbfm_w, 0, shift, 31)
+ F(_asr, sbfm, 1, shift, 63)
+ F(_lsl_w, ubfm_w, 0, (-shift) & 0x1f, 31 - shift)
+ F(_lsl, ubfm, 1, (-shift) & 0x3f, 63 - shift)
+ F(_lsr_w, ubfm_w, 0, shift, 31)
+ F(_lsr, ubfm, 1, shift, 63)
+#undef F
+
+#define F(alias, mnemonic, immr, imms) \
+ void alias(Register rd, Register rn) { \
+ mnemonic(rd, rn, immr, imms); \
+ }
+
+ F(sxtb_w, sbfm_w, 0, 7)
+ F(sxtb, sbfm, 0, 7)
+ F(sxth_w, sbfm_w, 0, 15)
+ F(sxth, sbfm, 0, 15)
+ F(sxtw, sbfm, 0, 31)
+ F(uxtb_w, ubfm_w, 0, 7)
+ F(uxtb, ubfm, 0, 7)
+ F(uxth_w, ubfm_w, 0, 15)
+ F(uxth, ubfm, 0, 15)
+#undef F
+
+ // Branch instructions
+
+#define F(mnemonic, op) \
+ void mnemonic(Register rn) { \
+ emit_int32(0b1101011 << 25 | op << 21 | 0b11111 << 16 | rn->encoding_with_zr() << 5); \
+ }
+
+ F(br, 0b00)
+ F(blr, 0b01)
+ F(ret, 0b10)
+#undef F
+
+ void ret() {
+ ret(LR);
+ }
+
+#define F(mnemonic, op) \
+ void mnemonic(address target) { \
+ intx offset = target - pc(); \
+ assert (is_offset_in_range(offset, 26), "offset is out of range"); \
+ emit_int32(op << 31 | 0b00101 << 26 | encode_offset(offset, 26, 0)); \
+ }
+
+ F(b, 0)
+ F(bl, 1)
+#undef F
+
+ void b(address target, AsmCondition cond) {
+ if (cond == al) {
+ b(target);
+ } else {
+ intx offset = target - pc();
+ assert (is_offset_in_range(offset, 19), "offset is out of range");
+ emit_int32(0b0101010 << 25 | encode_offset(offset, 19, 5) | cond);
+ }
+ }
+
+
+#define F(mnemonic, sf, op) \
+ void mnemonic(Register rt, address target) { \
+ intx offset = target - pc(); \
+ assert (is_offset_in_range(offset, 19), "offset is out of range"); \
+ emit_int32(sf << 31 | 0b011010 << 25 | op << 24 | encode_offset(offset, 19, 5) | rt->encoding_with_zr()); \
+ } \
+
+ F(cbz_w, 0, 0)
+ F(cbnz_w, 0, 1)
+ F(cbz, 1, 0)
+ F(cbnz, 1, 1)
+#undef F
+
+#define F(mnemonic, op) \
+ void mnemonic(Register rt, int bit, address target) { \
+ intx offset = target - pc(); \
+ assert (is_offset_in_range(offset, 14), "offset is out of range"); \
+ assert (0 <= bit && bit < 64, "bit number is out of range"); \
+ emit_int32((bit >> 5) << 31 | 0b011011 << 25 | op << 24 | (bit & 0x1f) << 19 | \
+ encode_offset(offset, 14, 5) | rt->encoding_with_zr()); \
+ } \
+
+ F(tbz, 0)
+ F(tbnz, 1)
+#undef F
+
+ // System instructions
+
+ enum DMB_Opt {
+ DMB_ld = 0b1101,
+ DMB_st = 0b1110,
+ DMB_all = 0b1111
+ };
+
+#define F(mnemonic, L, op0, op1, CRn, op2, Rt) \
+ void mnemonic(DMB_Opt option) { \
+ emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \
+ CRn << 12 | option << 8 | op2 << 5 | Rt); \
+ }
+
+ F(dsb, 0, 0b00, 0b011, 0b0011, 0b100, 0b11111)
+ F(dmb, 0, 0b00, 0b011, 0b0011, 0b101, 0b11111)
+#undef F
+
+#define F(mnemonic, L, op0, op1, CRn, Rt) \
+ void mnemonic(int imm) { \
+ assert ((imm >> 7) == 0, "immediate is out of range"); \
+ emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \
+ CRn << 12 | imm << 5 | Rt); \
+ }
+
+ F(hint, 0, 0b00, 0b011, 0b0010, 0b11111)
+#undef F
+
+ void nop() {
+ hint(0);
+ }
+
+ void yield() {
+ hint(1);
+ }
+
+#define F(mnemonic, opc, op2, LL) \
+ void mnemonic(int imm = 0) { \
+ assert ((imm >> 16) == 0, "immediate is out of range"); \
+ emit_int32(0b11010100 << 24 | opc << 21 | imm << 5 | op2 << 2 | LL); \
+ }
+
+ F(brk, 0b001, 0b000, 0b00)
+ F(hlt, 0b010, 0b000, 0b00)
+#undef F
+
+ enum SystemRegister { // o0<1> op1<3> CRn<4> CRm<4> op2<3>
+ SysReg_NZCV = 0b101101000010000,
+ SysReg_FPCR = 0b101101000100000,
+ };
+
+ void mrs(Register rt, SystemRegister systemReg) {
+ assert ((systemReg >> 15) == 0, "systemReg is out of range");
+ emit_int32(0b110101010011 << 20 | systemReg << 5 | rt->encoding_with_zr());
+ }
+
+ void msr(SystemRegister systemReg, Register rt) {
+ assert ((systemReg >> 15) == 0, "systemReg is out of range");
+ emit_int32(0b110101010001 << 20 | systemReg << 5 | rt->encoding_with_zr());
+ }
+
+ // Floating-point instructions
+
+#define F(mnemonic, M, S, type, opcode2) \
+ void mnemonic(FloatRegister rn, FloatRegister rm) { \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rm->encoding() << 16 | 0b1000 << 10 | rn->encoding() << 5 | opcode2); \
+ }
+
+ F(fcmp_s, 0, 0, 0b00, 0b00000)
+ F(fcmpe_s, 0, 0, 0b00, 0b01000)
+ F(fcmp_d, 0, 0, 0b01, 0b00000)
+ F(fcmpe_d, 0, 0, 0b01, 0b10000)
+#undef F
+
+#define F(mnemonic, M, S, type, opcode2) \
+ void mnemonic(FloatRegister rn) { \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ 0b1000 << 10 | rn->encoding() << 5 | opcode2); \
+ }
+
+ F(fcmp0_s, 0, 0, 0b00, 0b01000)
+ F(fcmpe0_s, 0, 0, 0b00, 0b11000)
+ F(fcmp0_d, 0, 0, 0b01, 0b01000)
+ F(fcmpe0_d, 0, 0, 0b01, 0b11000)
+#undef F
+
+#define F(mnemonic, M, S, type, op) \
+ void mnemonic(FloatRegister rn, FloatRegister rm, int nzcv, AsmCondition cond) { \
+ assert ((nzcv >> 4) == 0, "illegal nzcv"); \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rm->encoding() << 16 | cond << 12 | 0b01 << 10 | rn->encoding() << 5 | op << 4 | nzcv); \
+ }
+
+ F(fccmp_s, 0, 0, 0b00, 0)
+ F(fccmpe_s, 0, 0, 0b00, 1)
+ F(fccmp_d, 0, 0, 0b01, 0)
+ F(fccmpe_d, 0, 0, 0b01, 1)
+#undef F
+
+#define F(mnemonic, M, S, type) \
+ void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, AsmCondition cond) { \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rm->encoding() << 16 | cond << 12 | 0b11 << 10 | rn->encoding() << 5 | rd->encoding()); \
+ }
+
+ F(fcsel_s, 0, 0, 0b00)
+ F(fcsel_d, 0, 0, 0b01)
+#undef F
+
+#define F(mnemonic, M, S, type, opcode) \
+ void mnemonic(FloatRegister rd, FloatRegister rn) { \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ opcode << 15 | 0b10000 << 10 | rn->encoding() << 5 | rd->encoding()); \
+ }
+
+ F(fmov_s, 0, 0, 0b00, 0b000000)
+ F(fabs_s, 0, 0, 0b00, 0b000001)
+ F(fneg_s, 0, 0, 0b00, 0b000010)
+ F(fsqrt_s, 0, 0, 0b00, 0b000011)
+ F(fcvt_ds, 0, 0, 0b00, 0b000101)
+ F(fcvt_hs, 0, 0, 0b00, 0b000111)
+ F(frintn_s, 0, 0, 0b00, 0b001000)
+ F(frintp_s, 0, 0, 0b00, 0b001001)
+ F(frintm_s, 0, 0, 0b00, 0b001010)
+ F(frintz_s, 0, 0, 0b00, 0b001011)
+ F(frinta_s, 0, 0, 0b00, 0b001100)
+ F(frintx_s, 0, 0, 0b00, 0b001110)
+ F(frinti_s, 0, 0, 0b00, 0b001111)
+
+ F(fmov_d, 0, 0, 0b01, 0b000000)
+ F(fabs_d, 0, 0, 0b01, 0b000001)
+ F(fneg_d, 0, 0, 0b01, 0b000010)
+ F(fsqrt_d, 0, 0, 0b01, 0b000011)
+ F(fcvt_sd, 0, 0, 0b01, 0b000100)
+ F(fcvt_hd, 0, 0, 0b01, 0b000111)
+ F(frintn_d, 0, 0, 0b01, 0b001000)
+ F(frintp_d, 0, 0, 0b01, 0b001001)
+ F(frintm_d, 0, 0, 0b01, 0b001010)
+ F(frintz_d, 0, 0, 0b01, 0b001011)
+ F(frinta_d, 0, 0, 0b01, 0b001100)
+ F(frintx_d, 0, 0, 0b01, 0b001110)
+ F(frinti_d, 0, 0, 0b01, 0b001111)
+
+ F(fcvt_sh, 0, 0, 0b11, 0b000100)
+ F(fcvt_dh, 0, 0, 0b11, 0b000101)
+#undef F
+
+#define F(mnemonic, M, S, type, opcode) \
+ void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm) { \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rm->encoding() << 16 | opcode << 12 | 0b10 << 10 | rn->encoding() << 5 | rd->encoding()); \
+ }
+
+ F(fmul_s, 0, 0, 0b00, 0b0000)
+ F(fdiv_s, 0, 0, 0b00, 0b0001)
+ F(fadd_s, 0, 0, 0b00, 0b0010)
+ F(fsub_s, 0, 0, 0b00, 0b0011)
+ F(fmax_s, 0, 0, 0b00, 0b0100)
+ F(fmin_s, 0, 0, 0b00, 0b0101)
+ F(fmaxnm_s, 0, 0, 0b00, 0b0110)
+ F(fminnm_s, 0, 0, 0b00, 0b0111)
+ F(fnmul_s, 0, 0, 0b00, 0b1000)
+
+ F(fmul_d, 0, 0, 0b01, 0b0000)
+ F(fdiv_d, 0, 0, 0b01, 0b0001)
+ F(fadd_d, 0, 0, 0b01, 0b0010)
+ F(fsub_d, 0, 0, 0b01, 0b0011)
+ F(fmax_d, 0, 0, 0b01, 0b0100)
+ F(fmin_d, 0, 0, 0b01, 0b0101)
+ F(fmaxnm_d, 0, 0, 0b01, 0b0110)
+ F(fminnm_d, 0, 0, 0b01, 0b0111)
+ F(fnmul_d, 0, 0, 0b01, 0b1000)
+#undef F
+
+#define F(mnemonic, M, S, type, o1, o0) \
+ void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, FloatRegister ra) { \
+ emit_int32(M << 31 | S << 29 | 0b11111 << 24 | type << 22 | o1 << 21 | rm->encoding() << 16 | \
+ o0 << 15 | ra->encoding() << 10 | rn->encoding() << 5 | rd->encoding()); \
+ }
+
+ F(fmadd_s, 0, 0, 0b00, 0, 0)
+ F(fmsub_s, 0, 0, 0b00, 0, 1)
+ F(fnmadd_s, 0, 0, 0b00, 1, 0)
+ F(fnmsub_s, 0, 0, 0b00, 1, 1)
+
+ F(fmadd_d, 0, 0, 0b01, 0, 0)
+ F(fmsub_d, 0, 0, 0b01, 0, 1)
+ F(fnmadd_d, 0, 0, 0b01, 1, 0)
+ F(fnmsub_d, 0, 0, 0b01, 1, 1)
+#undef F
+
+#define F(mnemonic, M, S, type) \
+ void mnemonic(FloatRegister rd, int imm8) { \
+ assert ((imm8 >> 8) == 0, "immediate is out of range"); \
+ emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ imm8 << 13 | 0b100 << 10 | rd->encoding()); \
+ }
+
+ F(fmov_s, 0, 0, 0b00)
+ F(fmov_d, 0, 0, 0b01)
+#undef F
+
+#define F(mnemonic, sf, S, type, rmode, opcode) \
+ void mnemonic(Register rd, FloatRegister rn) { \
+ emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rmode << 19 | opcode << 16 | rn->encoding() << 5 | rd->encoding_with_zr()); \
+ }
+
+ F(fcvtns_ws, 0, 0, 0b00, 0b00, 0b000)
+ F(fcvtnu_ws, 0, 0, 0b00, 0b00, 0b001)
+ F(fcvtas_ws, 0, 0, 0b00, 0b00, 0b100)
+ F(fcvtau_ws, 0, 0, 0b00, 0b00, 0b101)
+ F(fmov_ws, 0, 0, 0b00, 0b00, 0b110)
+ F(fcvtps_ws, 0, 0, 0b00, 0b01, 0b000)
+ F(fcvtpu_ws, 0, 0, 0b00, 0b01, 0b001)
+ F(fcvtms_ws, 0, 0, 0b00, 0b10, 0b000)
+ F(fcvtmu_ws, 0, 0, 0b00, 0b10, 0b001)
+ F(fcvtzs_ws, 0, 0, 0b00, 0b11, 0b000)
+ F(fcvtzu_ws, 0, 0, 0b00, 0b11, 0b001)
+
+ F(fcvtns_wd, 0, 0, 0b01, 0b00, 0b000)
+ F(fcvtnu_wd, 0, 0, 0b01, 0b00, 0b001)
+ F(fcvtas_wd, 0, 0, 0b01, 0b00, 0b100)
+ F(fcvtau_wd, 0, 0, 0b01, 0b00, 0b101)
+ F(fcvtps_wd, 0, 0, 0b01, 0b01, 0b000)
+ F(fcvtpu_wd, 0, 0, 0b01, 0b01, 0b001)
+ F(fcvtms_wd, 0, 0, 0b01, 0b10, 0b000)
+ F(fcvtmu_wd, 0, 0, 0b01, 0b10, 0b001)
+ F(fcvtzs_wd, 0, 0, 0b01, 0b11, 0b000)
+ F(fcvtzu_wd, 0, 0, 0b01, 0b11, 0b001)
+
+ F(fcvtns_xs, 1, 0, 0b00, 0b00, 0b000)
+ F(fcvtnu_xs, 1, 0, 0b00, 0b00, 0b001)
+ F(fcvtas_xs, 1, 0, 0b00, 0b00, 0b100)
+ F(fcvtau_xs, 1, 0, 0b00, 0b00, 0b101)
+ F(fcvtps_xs, 1, 0, 0b00, 0b01, 0b000)
+ F(fcvtpu_xs, 1, 0, 0b00, 0b01, 0b001)
+ F(fcvtms_xs, 1, 0, 0b00, 0b10, 0b000)
+ F(fcvtmu_xs, 1, 0, 0b00, 0b10, 0b001)
+ F(fcvtzs_xs, 1, 0, 0b00, 0b11, 0b000)
+ F(fcvtzu_xs, 1, 0, 0b00, 0b11, 0b001)
+
+ F(fcvtns_xd, 1, 0, 0b01, 0b00, 0b000)
+ F(fcvtnu_xd, 1, 0, 0b01, 0b00, 0b001)
+ F(fcvtas_xd, 1, 0, 0b01, 0b00, 0b100)
+ F(fcvtau_xd, 1, 0, 0b01, 0b00, 0b101)
+ F(fmov_xd, 1, 0, 0b01, 0b00, 0b110)
+ F(fcvtps_xd, 1, 0, 0b01, 0b01, 0b000)
+ F(fcvtpu_xd, 1, 0, 0b01, 0b01, 0b001)
+ F(fcvtms_xd, 1, 0, 0b01, 0b10, 0b000)
+ F(fcvtmu_xd, 1, 0, 0b01, 0b10, 0b001)
+ F(fcvtzs_xd, 1, 0, 0b01, 0b11, 0b000)
+ F(fcvtzu_xd, 1, 0, 0b01, 0b11, 0b001)
+
+ F(fmov_xq, 1, 0, 0b10, 0b01, 0b110)
+#undef F
+
+#define F(mnemonic, sf, S, type, rmode, opcode) \
+ void mnemonic(FloatRegister rd, Register rn) { \
+ emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \
+ rmode << 19 | opcode << 16 | rn->encoding_with_zr() << 5 | rd->encoding()); \
+ }
+
+ F(scvtf_sw, 0, 0, 0b00, 0b00, 0b010)
+ F(ucvtf_sw, 0, 0, 0b00, 0b00, 0b011)
+ F(fmov_sw, 0, 0, 0b00, 0b00, 0b111)
+ F(scvtf_dw, 0, 0, 0b01, 0b00, 0b010)
+ F(ucvtf_dw, 0, 0, 0b01, 0b00, 0b011)
+
+ F(scvtf_sx, 1, 0, 0b00, 0b00, 0b010)
+ F(ucvtf_sx, 1, 0, 0b00, 0b00, 0b011)
+ F(scvtf_dx, 1, 0, 0b01, 0b00, 0b010)
+ F(ucvtf_dx, 1, 0, 0b01, 0b00, 0b011)
+ F(fmov_dx, 1, 0, 0b01, 0b00, 0b111)
+
+ F(fmov_qx, 1, 0, 0b10, 0b01, 0b111)
+#undef F
+
+#define F(mnemonic, opcode) \
+ void mnemonic(FloatRegister Vd, FloatRegister Vn) { \
+ emit_int32( opcode << 10 | Vn->encoding() << 5 | Vd->encoding()); \
+ }
+
+ F(aese, 0b0100111000101000010010);
+ F(aesd, 0b0100111000101000010110);
+ F(aesmc, 0b0100111000101000011010);
+ F(aesimc, 0b0100111000101000011110);
+#undef F
+
+#ifdef COMPILER2
+ typedef VFP::double_num double_num;
+ typedef VFP::float_num float_num;
+#endif
+
+ void vcnt(FloatRegister Dd, FloatRegister Dn, int quad = 0, int size = 0) {
+ // emitted at VM startup to detect whether the instruction is available
+ assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction");
+ assert(size == 0, "illegal size value");
+ emit_int32(0x0e205800 | quad << 30 | size << 22 | Dn->encoding() << 5 | Dd->encoding());
+ }
+
+#ifdef COMPILER2
+ void addv(FloatRegister Dd, FloatRegister Dm, int quad, int size) {
+ // emitted at VM startup to detect whether the instruction is available
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert((quad & ~1) == 0, "illegal value");
+ assert(size >= 0 && size < 3, "illegal value");
+ assert(((size << 1) | quad) != 4, "illegal values (size 2, quad 0)");
+ emit_int32(0x0e31b800 | quad << 30 | size << 22 | Dm->encoding() << 5 | Dd->encoding());
+ }
+
+ enum VElem_Size {
+ VELEM_SIZE_8 = 0x00,
+ VELEM_SIZE_16 = 0x01,
+ VELEM_SIZE_32 = 0x02,
+ VELEM_SIZE_64 = 0x03
+ };
+
+ enum VLD_Type {
+ VLD1_TYPE_1_REG = 0b0111,
+ VLD1_TYPE_2_REGS = 0b1010,
+ VLD1_TYPE_3_REGS = 0b0110,
+ VLD1_TYPE_4_REGS = 0b0010
+ };
+
+ enum VFloat_Arith_Size {
+ VFA_SIZE_F32 = 0b0,
+ VFA_SIZE_F64 = 0b1
+ };
+
+#define F(mnemonic, U, S, P) \
+ void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ int size, int quad) { \
+ assert(VM_Version::has_simd(), "simd instruction"); \
+ assert(!(size == VFA_SIZE_F64 && !quad), "reserved"); \
+ assert((size & 1) == size, "overflow"); \
+ emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | \
+ S << 23 | size << 22 | 1 << 21 | P << 11 | 1 << 10 | \
+ fm->encoding() << 16 | \
+ fn->encoding() << 5 | \
+ fd->encoding()); \
+ }
+
+ F(vaddF, 0, 0, 0b11010) // Vd = Vn + Vm (float)
+ F(vsubF, 0, 1, 0b11010) // Vd = Vn - Vm (float)
+ F(vmulF, 1, 0, 0b11011) // Vd = Vn - Vm (float)
+ F(vdivF, 1, 0, 0b11111) // Vd = Vn / Vm (float)
+#undef F
+
+#define F(mnemonic, U) \
+ void mnemonic(FloatRegister fd, FloatRegister fm, FloatRegister fn, \
+ int size, int quad) { \
+ assert(VM_Version::has_simd(), "simd instruction"); \
+ assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \
+ assert((size & 0b11) == size, "overflow"); \
+ int R = 0; /* rounding */ \
+ int S = 0; /* saturating */ \
+ emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \
+ 1 << 21 | R << 12 | S << 11 | 0b10001 << 10 | \
+ fm->encoding() << 16 | \
+ fn->encoding() << 5 | \
+ fd->encoding()); \
+ }
+
+ F(vshlSI, 0) // Vd = ashift(Vn,Vm) (int)
+ F(vshlUI, 1) // Vd = lshift(Vn,Vm) (int)
+#undef F
+
+#define F(mnemonic, U, P, M) \
+ void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ int size, int quad) { \
+ assert(VM_Version::has_simd(), "simd instruction"); \
+ assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \
+ assert(!(size == VELEM_SIZE_64 && M), "reserved"); \
+ assert((size & 0b11) == size, "overflow"); \
+ emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \
+ 1 << 21 | P << 11 | 1 << 10 | \
+ fm->encoding() << 16 | \
+ fn->encoding() << 5 | \
+ fd->encoding()); \
+ }
+
+ F(vmulI, 0, 0b10011, true) // Vd = Vn * Vm (int)
+ F(vaddI, 0, 0b10000, false) // Vd = Vn + Vm (int)
+ F(vsubI, 1, 0b10000, false) // Vd = Vn - Vm (int)
+#undef F
+
+#define F(mnemonic, U, O) \
+ void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \
+ int quad) { \
+ assert(VM_Version::has_simd(), "simd instruction"); \
+ emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | O << 22 | \
+ 1 << 21 | 0b00011 << 11 | 1 << 10 | \
+ fm->encoding() << 16 | \
+ fn->encoding() << 5 | \
+ fd->encoding()); \
+ }
+
+ F(vandI, 0, 0b00) // Vd = Vn & Vm (int)
+ F(vorI, 0, 0b10) // Vd = Vn | Vm (int)
+ F(vxorI, 1, 0b00) // Vd = Vn ^ Vm (int)
+#undef F
+
+ void vnegI(FloatRegister fd, FloatRegister fn, int size, int quad) {
+ int U = 1;
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(quad || size != VELEM_SIZE_64, "reserved");
+ emit_int32(quad << 30 | U << 29 | 0b01110 << 24 |
+ size << 22 | 0b100000101110 << 10 |
+ fn->encoding() << 5 |
+ fd->encoding() << 0);
+ }
+
+ void vshli(FloatRegister fd, FloatRegister fn, int esize, int imm, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+
+ if (imm >= esize) {
+ // maximum shift gives all zeroes, direction doesn't matter,
+ // but only available for shift right
+ vshri(fd, fn, esize, esize, true /* unsigned */, quad);
+ return;
+ }
+ assert(imm >= 0 && imm < esize, "out of range");
+
+ int imm7 = esize + imm;
+ int immh = imm7 >> 3;
+ assert(immh != 0, "encoding constraint");
+ assert((uint)immh < 16, "sanity");
+ assert(((immh >> 2) | quad) != 0b10, "reserved");
+ emit_int32(quad << 30 | 0b011110 << 23 | imm7 << 16 |
+ 0b010101 << 10 | fn->encoding() << 5 | fd->encoding() << 0);
+ }
+
+ void vshri(FloatRegister fd, FloatRegister fn, int esize, int imm,
+ bool U /* unsigned */, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(imm > 0, "out of range");
+ if (imm >= esize) {
+ // maximum shift (all zeroes)
+ imm = esize;
+ }
+ int imm7 = 2 * esize - imm ;
+ int immh = imm7 >> 3;
+ assert(immh != 0, "encoding constraint");
+ assert((uint)immh < 16, "sanity");
+ assert(((immh >> 2) | quad) != 0b10, "reserved");
+ emit_int32(quad << 30 | U << 29 | 0b011110 << 23 | imm7 << 16 |
+ 0b000001 << 10 | fn->encoding() << 5 | fd->encoding() << 0);
+ }
+ void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
+ vshri(fd, fm, size, imm, true /* unsigned */, quad);
+ }
+ void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
+ vshri(fd, fm, size, imm, false /* signed */, quad);
+ }
+
+ void vld1(FloatRegister Vt, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(addr.disp() == 0 || addr.disp() == 16, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 1;
+ int opcode = VLD1_TYPE_1_REG;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vst1(FloatRegister Vt, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(addr.disp() == 0 || addr.disp() == 16, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 0;
+ int opcode = VLD1_TYPE_1_REG;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vld1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(Vt->successor() == Vt2, "Registers must be ordered");
+ assert(addr.disp() == 0 || addr.disp() == 32, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 1;
+ int opcode = VLD1_TYPE_2_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vst1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(Vt->successor() == Vt2, "Registers must be ordered");
+ assert(bits == 128, "unsupported");
+ assert(addr.disp() == 0 || addr.disp() == 32, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 0;
+ int opcode = VLD1_TYPE_2_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
+ Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,
+ "Registers must be ordered");
+ assert(addr.disp() == 0 || addr.disp() == 48, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 1;
+ int opcode = VLD1_TYPE_3_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
+ Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,
+ "Registers must be ordered");
+ assert(addr.disp() == 0 || addr.disp() == 48, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 0;
+ int opcode = VLD1_TYPE_3_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
+ FloatRegister Vt4, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&
+ Vt3->successor() == Vt4, "Registers must be ordered");
+ assert(addr.disp() == 0 || addr.disp() == 64, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 1;
+ int opcode = VLD1_TYPE_4_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
+ FloatRegister Vt4, Address addr, VElem_Size size, int bits) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(bits == 128, "unsupported");
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&
+ Vt3->successor() == Vt4, "Registers must be ordered");
+ assert(addr.disp() == 0 || addr.disp() == 64, "must be");
+ int type = 0b11; // 2D
+ int quad = 1;
+ int L = 0;
+ int opcode = VLD1_TYPE_4_REGS;
+ emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
+ Vt->encoding() << 0 | addr.encoding_simd());
+ }
+
+ void rev32(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(size == VELEM_SIZE_8 || size == VELEM_SIZE_16, "must be");
+ emit_int32(quad << 30 | 0b101110 << 24 | size << 22 |
+ 0b100000000010 << 10 | Vn->encoding() << 5 | Vd->encoding());
+ }
+
+ void eor(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(size == VELEM_SIZE_8, "must be");
+ emit_int32(quad << 30 | 0b101110001 << 21 | Vm->encoding() << 16 |
+ 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding());
+ }
+
+ void orr(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(size == VELEM_SIZE_8, "must be");
+ emit_int32(quad << 30 | 0b001110101 << 21 | Vm->encoding() << 16 |
+ 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding());
+ }
+
+ void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(imm8 >= 0 && imm8 < 256, "out of range");
+ int op;
+ int cmode;
+ switch (size) {
+ case VELEM_SIZE_8:
+ op = 0;
+ cmode = 0b1110;
+ break;
+ case VELEM_SIZE_16:
+ op = 0;
+ cmode = 0b1000;
+ break;
+ case VELEM_SIZE_32:
+ op = 0;
+ cmode = 0b0000;
+ break;
+ default:
+ cmode = 0;
+ ShouldNotReachHere();
+ }
+ int abc = imm8 >> 5;
+ int defgh = imm8 & 0b11111;
+ emit_int32(quad << 30 | op << 29 | 0b1111 << 24 |
+ abc << 16 | cmode << 12 | 0b01 << 10 |
+ defgh << 5 | Dd->encoding() << 0);
+ }
+
+ void vdupI(FloatRegister Dd, Register Rn, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ assert(size <= 3, "unallocated encoding");
+ assert(size != 3 || quad == 1, "reserved");
+ int imm5 = 1 << size;
+#ifdef ASSERT
+ switch (size) {
+ case VELEM_SIZE_8:
+ assert(imm5 == 0b00001, "sanity");
+ break;
+ case VELEM_SIZE_16:
+ assert(imm5 == 0b00010, "sanity");
+ break;
+ case VELEM_SIZE_32:
+ assert(imm5 == 0b00100, "sanity");
+ break;
+ case VELEM_SIZE_64:
+ assert(imm5 == 0b01000, "sanity");
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+#endif
+ emit_int32(quad << 30 | 0b111 << 25 | 0b11 << 10 |
+ imm5 << 16 | Rn->encoding() << 5 |
+ Dd->encoding() << 0);
+ }
+
+ void vdup(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) {
+ assert(VM_Version::has_simd(), "simd instruction");
+ int index = 0;
+ int bytes = 1 << size;
+ int range = 16 / bytes;
+ assert(index < range, "overflow");
+
+ assert(size != VELEM_SIZE_64 || quad, "reserved");
+ assert(8 << VELEM_SIZE_8 == 8, "sanity");
+ assert(8 << VELEM_SIZE_16 == 16, "sanity");
+ assert(8 << VELEM_SIZE_32 == 32, "sanity");
+ assert(8 << VELEM_SIZE_64 == 64, "sanity");
+
+ int imm5 = (index << (size + 1)) | bytes;
+
+ emit_int32(quad << 30 | 0b001110000 << 21 | imm5 << 16 | 0b000001 << 10 |
+ Vn->encoding() << 5 | Vd->encoding() << 0);
+ }
+
+ void vdupF(FloatRegister Vd, FloatRegister Vn, int quad) {
+ vdup(Vd, Vn, VELEM_SIZE_32, quad);
+ }
+
+ void vdupD(FloatRegister Vd, FloatRegister Vn, int quad) {
+ vdup(Vd, Vn, VELEM_SIZE_64, quad);
+ }
+#endif
+};
+
+
+#endif // CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/bytes_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_BYTES_ARM_HPP
+#define CPU_ARM_VM_BYTES_ARM_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/macros.hpp"
+
+#ifndef VM_LITTLE_ENDIAN
+#define VM_LITTLE_ENDIAN 1
+#endif
+
+class Bytes: AllStatic {
+
+ public:
+ // Returns true if the byte ordering used by Java is different from the native byte ordering
+ // of the underlying machine.
+ static inline bool is_Java_byte_ordering_different() {
+ return VM_LITTLE_ENDIAN != 0;
+ }
+
+ static inline u2 get_Java_u2(address p) {
+ return (u2(p[0]) << 8) | u2(p[1]);
+ }
+
+ static inline u4 get_Java_u4(address p) {
+ return u4(p[0]) << 24 |
+ u4(p[1]) << 16 |
+ u4(p[2]) << 8 |
+ u4(p[3]);
+ }
+
+ static inline u8 get_Java_u8(address p) {
+ return u8(p[0]) << 56 |
+ u8(p[1]) << 48 |
+ u8(p[2]) << 40 |
+ u8(p[3]) << 32 |
+ u8(p[4]) << 24 |
+ u8(p[5]) << 16 |
+ u8(p[6]) << 8 |
+ u8(p[7]);
+ }
+
+ static inline void put_Java_u2(address p, u2 x) {
+ p[0] = x >> 8;
+ p[1] = x;
+ }
+
+ static inline void put_Java_u4(address p, u4 x) {
+ ((u1*)p)[0] = x >> 24;
+ ((u1*)p)[1] = x >> 16;
+ ((u1*)p)[2] = x >> 8;
+ ((u1*)p)[3] = x;
+ }
+
+ static inline void put_Java_u8(address p, u8 x) {
+ ((u1*)p)[0] = x >> 56;
+ ((u1*)p)[1] = x >> 48;
+ ((u1*)p)[2] = x >> 40;
+ ((u1*)p)[3] = x >> 32;
+ ((u1*)p)[4] = x >> 24;
+ ((u1*)p)[5] = x >> 16;
+ ((u1*)p)[6] = x >> 8;
+ ((u1*)p)[7] = x;
+ }
+
+#ifdef VM_LITTLE_ENDIAN
+
+ static inline u2 get_native_u2(address p) {
+ return (intptr_t(p) & 1) == 0 ? *(u2*)p : u2(p[0]) | (u2(p[1]) << 8);
+ }
+
+ static inline u4 get_native_u4(address p) {
+ switch (intptr_t(p) & 3) {
+ case 0: return *(u4*)p;
+ case 2: return u4(((u2*)p)[0]) |
+ u4(((u2*)p)[1]) << 16;
+ default: return u4(p[0]) |
+ u4(p[1]) << 8 |
+ u4(p[2]) << 16 |
+ u4(p[3]) << 24;
+ }
+ }
+
+ static inline u8 get_native_u8(address p) {
+ switch (intptr_t(p) & 7) {
+ case 0: return *(u8*)p;
+ case 4: return u8(((u4*)p)[0]) |
+ u8(((u4*)p)[1]) << 32;
+ case 2: return u8(((u2*)p)[0]) |
+ u8(((u2*)p)[1]) << 16 |
+ u8(((u2*)p)[2]) << 32 |
+ u8(((u2*)p)[3]) << 48;
+ default: return u8(p[0]) |
+ u8(p[1]) << 8 |
+ u8(p[2]) << 16 |
+ u8(p[3]) << 24 |
+ u8(p[4]) << 32 |
+ u8(p[5]) << 40 |
+ u8(p[6]) << 48 |
+ u8(p[7]) << 56;
+ }
+ }
+
+ static inline void put_native_u2(address p, u2 x) {
+ if ((intptr_t(p) & 1) == 0) {
+ *(u2*)p = x;
+ } else {
+ p[0] = x;
+ p[1] = x >> 8;
+ }
+ }
+
+ static inline void put_native_u4(address p, u4 x) {
+ switch (intptr_t(p) & 3) {
+ case 0: *(u4*)p = x;
+ break;
+ case 2: ((u2*)p)[0] = x;
+ ((u2*)p)[1] = x >> 16;
+ break;
+ default: ((u1*)p)[0] = x;
+ ((u1*)p)[1] = x >> 8;
+ ((u1*)p)[2] = x >> 16;
+ ((u1*)p)[3] = x >> 24;
+ break;
+ }
+ }
+
+ static inline void put_native_u8(address p, u8 x) {
+ switch (intptr_t(p) & 7) {
+ case 0: *(u8*)p = x;
+ break;
+ case 4: ((u4*)p)[0] = x;
+ ((u4*)p)[1] = x >> 32;
+ break;
+ case 2: ((u2*)p)[0] = x;
+ ((u2*)p)[1] = x >> 16;
+ ((u2*)p)[2] = x >> 32;
+ ((u2*)p)[3] = x >> 48;
+ break;
+ default: ((u1*)p)[0] = x;
+ ((u1*)p)[1] = x >> 8;
+ ((u1*)p)[2] = x >> 16;
+ ((u1*)p)[3] = x >> 24;
+ ((u1*)p)[4] = x >> 32;
+ ((u1*)p)[5] = x >> 40;
+ ((u1*)p)[6] = x >> 48;
+ ((u1*)p)[7] = x >> 56;
+ }
+ }
+
+#else
+
+ static inline u2 get_native_u2(address p) { return get_Java_u2(p); }
+ static inline u4 get_native_u4(address p) { return get_Java_u4(p); }
+ static inline u8 get_native_u8(address p) { return get_Java_u8(p); }
+ static inline void put_native_u2(address p, u2 x) { put_Java_u2(p, x); }
+ static inline void put_native_u4(address p, u4 x) { put_Java_u4(p, x); }
+ static inline void put_native_u8(address p, u8 x) { put_Java_u8(p, x); }
+
+#endif // VM_LITTLE_ENDIAN
+
+ // Efficient swapping of byte ordering
+ static inline u2 swap_u2(u2 x);
+ static inline u4 swap_u4(u4 x);
+ static inline u8 swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8
+#include OS_CPU_HEADER_INLINE(bytes)
+
+#endif // CPU_ARM_VM_BYTES_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_CodeStubs_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,510 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_arm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_arm.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ ce->store_parameter(_bci, 0);
+ ce->store_parameter(_method->as_constant_ptr()->as_metadata(), 1);
+ __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+
+ __ b(_continuation);
+}
+
+
+// TODO: ARM - is it possible to inline these stubs into the main code stream?
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+ bool throw_index_out_of_bounds_exception)
+ : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+ , _index(index)
+{
+ _info = info == NULL ? NULL : new CodeEmitInfo(info);
+}
+
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+
+ if (_info->deoptimize_on_exception()) {
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+ __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+ return;
+ }
+ // Pass the array index on stack because all registers must be preserved
+ ce->verify_reserved_argument_area_size(1);
+ if (_index->is_cpu_register()) {
+ __ str_32(_index->as_register(), Address(SP));
+ } else {
+ __ mov_slow(Rtemp, _index->as_jint()); // Rtemp should be OK in C1
+ __ str_32(Rtemp, Address(SP));
+ }
+
+ if (_throw_index_out_of_bounds_exception) {
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+ __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type);
+ } else {
+ __ call(Runtime1::entry_for(Runtime1::throw_range_check_failed_id), relocInfo::runtime_call_type);
+ }
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ DEBUG_ONLY(STOP("RangeCheck");)
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+ _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ debug_only(__ should_not_reach_here());
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+ if (_offset != -1) {
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+ }
+ __ bind(_entry);
+ __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id),
+ relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ DEBUG_ONLY(STOP("DivByZero");)
+}
+
+
+// Implementation of NewInstanceStub
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+ _result = result;
+ _klass = klass;
+ _klass_reg = klass_reg;
+ _info = new CodeEmitInfo(info);
+ assert(stub_id == Runtime1::new_instance_id ||
+ stub_id == Runtime1::fast_new_instance_id ||
+ stub_id == Runtime1::fast_new_instance_init_check_id,
+ "need new_instance id");
+ _stub_id = stub_id;
+}
+
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+ assert(_result->as_register() == R0, "runtime call setup");
+ assert(_klass_reg->as_register() == R1, "runtime call setup");
+ __ bind(_entry);
+ __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+ _klass_reg = klass_reg;
+ _length = length;
+ _result = result;
+ _info = new CodeEmitInfo(info);
+}
+
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+ assert(_result->as_register() == R0, "runtime call setup");
+ assert(_klass_reg->as_register() == R1, "runtime call setup");
+ assert(_length->as_register() == R2, "runtime call setup");
+ __ bind(_entry);
+ __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+ _klass_reg = klass_reg;
+ _result = result;
+ _length = length;
+ _info = new CodeEmitInfo(info);
+}
+
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+ assert(_result->as_register() == R0, "runtime call setup");
+ assert(_klass_reg->as_register() == R1, "runtime call setup");
+ assert(_length->as_register() == R2, "runtime call setup");
+ __ bind(_entry);
+ __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ b(_continuation);
+}
+
+
+// Implementation of MonitorAccessStubs
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+: MonitorAccessStub(obj_reg, lock_reg)
+{
+ _info = new CodeEmitInfo(info);
+}
+
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ const Register obj_reg = _obj_reg->as_pointer_register();
+ const Register lock_reg = _lock_reg->as_pointer_register();
+
+ ce->verify_reserved_argument_area_size(2);
+#ifdef AARCH64
+ __ stp(obj_reg, lock_reg, Address(SP));
+#else
+ if (obj_reg < lock_reg) {
+ __ stmia(SP, RegisterSet(obj_reg) | RegisterSet(lock_reg));
+ } else {
+ __ str(obj_reg, Address(SP));
+ __ str(lock_reg, Address(SP, BytesPerWord));
+ }
+#endif // AARCH64
+
+ Runtime1::StubID enter_id = ce->compilation()->has_fpu_code() ?
+ Runtime1::monitorenter_id :
+ Runtime1::monitorenter_nofpu_id;
+ __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ __ b(_continuation);
+}
+
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ if (_compute_lock) {
+ ce->monitor_address(_monitor_ix, _lock_reg);
+ }
+ const Register lock_reg = _lock_reg->as_pointer_register();
+
+ ce->verify_reserved_argument_area_size(1);
+ __ str(lock_reg, Address(SP));
+
+ // Non-blocking leaf routine - no call info needed
+ Runtime1::StubID exit_id = ce->compilation()->has_fpu_code() ?
+ Runtime1::monitorexit_id :
+ Runtime1::monitorexit_nofpu_id;
+ __ call(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
+ __ b(_continuation);
+}
+
+
+// Call return is directly after patch word
+int PatchingStub::_patch_info_offset = 0;
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+#if 0
+ // TODO: investigate if we required to implement this
+ ShouldNotReachHere();
+#endif
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+ const int patchable_instruction_offset = AARCH64_ONLY(NativeInstruction::instruction_size) NOT_AARCH64(0);
+
+ assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+ "not enough room for call");
+ assert((_bytes_to_copy & 3) == 0, "must copy a multiple of four bytes");
+ Label call_patch;
+ bool is_load = (_id == load_klass_id) || (_id == load_mirror_id) || (_id == load_appendix_id);
+
+#ifdef AARCH64
+ assert(nativeInstruction_at(_pc_start)->is_nop(), "required for MT safe patching");
+
+ // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned.
+ __ align(wordSize);
+#endif // AARCH64
+
+ if (is_load NOT_AARCH64(&& !VM_Version::supports_movw())) {
+ address start = __ pc();
+
+ // The following sequence duplicates code provided in MacroAssembler::patchable_mov_oop()
+ // without creating relocation info entry.
+#ifdef AARCH64
+ // Extra nop for MT safe patching
+ __ nop();
+#endif // AARCH64
+
+ assert((__ pc() - start) == patchable_instruction_offset, "should be");
+#ifdef AARCH64
+ __ ldr(_obj, __ pc());
+#else
+ __ ldr(_obj, Address(PC));
+ // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
+ __ nop();
+#endif // AARCH64
+
+#ifdef ASSERT
+ for (int i = 0; i < _bytes_to_copy; i++) {
+ assert(((address)_pc_start)[i] == start[i], "should be the same code");
+ }
+#endif // ASSERT
+ }
+
+ address being_initialized_entry = __ pc();
+ if (CommentedAssembly) {
+ __ block_comment(" patch template");
+ }
+ if (is_load) {
+ address start = __ pc();
+ if (_id == load_mirror_id || _id == load_appendix_id) {
+ __ patchable_mov_oop(_obj, (jobject)Universe::non_oop_word(), _index);
+ } else {
+ __ patchable_mov_metadata(_obj, (Metadata*)Universe::non_oop_word(), _index);
+ }
+#ifdef ASSERT
+ for (int i = 0; i < _bytes_to_copy; i++) {
+ assert(((address)_pc_start)[i] == start[i], "should be the same code");
+ }
+#endif // ASSERT
+ } else {
+ int* start = (int*)_pc_start;
+ int* end = start + (_bytes_to_copy / BytesPerInt);
+ while (start < end) {
+ __ emit_int32(*start++);
+ }
+ }
+ address end_of_patch = __ pc();
+
+ int bytes_to_skip = 0;
+ if (_id == load_mirror_id) {
+ int offset = __ offset();
+ if (CommentedAssembly) {
+ __ block_comment(" being_initialized check");
+ }
+
+ assert(_obj != noreg, "must be a valid register");
+ // Rtemp should be OK in C1
+ __ ldr(Rtemp, Address(_obj, java_lang_Class::klass_offset_in_bytes()));
+ __ ldr(Rtemp, Address(Rtemp, InstanceKlass::init_thread_offset()));
+ __ cmp(Rtemp, Rthread);
+ __ b(call_patch, ne);
+ __ b(_patch_site_continuation);
+
+ bytes_to_skip += __ offset() - offset;
+ }
+
+ if (CommentedAssembly) {
+ __ block_comment("patch data - 3 high bytes of the word");
+ }
+ const int sizeof_patch_record = 4;
+ bytes_to_skip += sizeof_patch_record;
+ int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
+ __ emit_int32(0xff | being_initialized_entry_offset << 8 | bytes_to_skip << 16 | _bytes_to_copy << 24);
+
+ address patch_info_pc = __ pc();
+ assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+ // runtime call will return here
+ Label call_return;
+ __ bind(call_return);
+ ce->add_call_info_here(_info);
+ assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+ __ b(_patch_site_entry);
+
+ address entry = __ pc();
+ NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+ address target = NULL;
+ relocInfo::relocType reloc_type = relocInfo::none;
+ switch (_id) {
+ case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
+ case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break;
+ case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break;
+ case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break;
+ default: ShouldNotReachHere();
+ }
+ __ bind(call_patch);
+
+ if (CommentedAssembly) {
+ __ block_comment("patch entry point");
+ }
+
+ // arrange for call to return just after patch word
+ __ adr(LR, call_return);
+ __ jump(target, relocInfo::runtime_call_type, Rtemp);
+
+ if (is_load) {
+ CodeSection* cs = __ code_section();
+ address pc = (address)_pc_start;
+ RelocIterator iter(cs, pc, pc + 1);
+ relocInfo::change_reloc_info_for_address(&iter, pc, reloc_type, relocInfo::none);
+ }
+}
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ __ mov_slow(Rtemp, _trap_request);
+ ce->verify_reserved_argument_area_size(1);
+ __ str(Rtemp, Address(SP));
+ __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ DEBUG_ONLY(__ should_not_reach_here());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+ address a;
+ if (_info->deoptimize_on_exception()) {
+ // Deoptimize, do not throw the exception, because it is
+ // probably wrong to do it here.
+ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+ } else {
+ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+ }
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+ __ bind(_entry);
+ __ call(a, relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ ce->verify_oop_map(_info);
+ DEBUG_ONLY(STOP("ImplicitNullCheck");)
+}
+
+
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ // Pass the object on stack because all registers must be preserved
+ if (_obj->is_cpu_register()) {
+ ce->verify_reserved_argument_area_size(1);
+ __ str(_obj->as_pointer_register(), Address(SP));
+ } else {
+ assert(_obj->is_illegal(), "should be");
+ }
+ __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
+ ce->add_call_info_here(_info);
+ DEBUG_ONLY(STOP("SimpleException");)
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+
+ VMRegPair args[5];
+ BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
+ SharedRuntime::java_calling_convention(signature, args, 5, true);
+
+ Register r[5];
+ r[0] = src()->as_pointer_register();
+ r[1] = src_pos()->as_register();
+ r[2] = dst()->as_pointer_register();
+ r[3] = dst_pos()->as_register();
+ r[4] = length()->as_register();
+
+ for (int i = 0; i < 5; i++) {
+ VMReg arg = args[i].first();
+ if (arg->is_stack()) {
+ __ str(r[i], Address(SP, arg->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ assert(r[i] == arg->as_Register(), "Calling conventions must match");
+ }
+ }
+
+ ce->emit_static_call_stub();
+ if (ce->compilation()->bailed_out()) {
+ return; // CodeCache is full
+ }
+ int ret_addr_offset = __ patchable_call(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
+ assert(ret_addr_offset == __ offset(), "embedded return address not allowed");
+ ce->add_call_info_here(info());
+ ce->verify_oop_map(info());
+ __ b(_continuation);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+ // At this point we know that marking is in progress.
+ // If do_load() is true then we have to emit the
+ // load of the previous value; otherwise it has already
+ // been loaded into _pre_val.
+
+ __ bind(_entry);
+ assert(pre_val()->is_register(), "Precondition.");
+
+ Register pre_val_reg = pre_val()->as_register();
+
+ if (do_load()) {
+ ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+ }
+
+ __ cbz(pre_val_reg, _continuation);
+ ce->verify_reserved_argument_area_size(1);
+ __ str(pre_val_reg, Address(SP));
+ __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type);
+
+ __ b(_continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ assert(addr()->is_register(), "Precondition.");
+ assert(new_val()->is_register(), "Precondition.");
+ Register new_val_reg = new_val()->as_register();
+ __ cbz(new_val_reg, _continuation);
+ ce->verify_reserved_argument_area_size(1);
+ __ str(addr()->as_pointer_register(), Address(SP));
+ __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type);
+ __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+/////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_Defs_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_DEFS_ARM_HPP
+#define CPU_ARM_VM_C1_DEFS_ARM_HPP
+
+// native word offsets from memory address (little endian)
+enum {
+ pd_lo_word_offset_in_bytes = 0,
+ pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+enum {
+ pd_strict_fp_requires_explicit_rounding = false
+};
+
+#ifdef __SOFTFP__
+#define SOFT(n) n
+#define VFP(n)
+#else // __SOFTFP__
+#define SOFT(n)
+#define VFP(n) n
+#endif // __SOFTFP__
+
+
+// registers
+enum {
+ pd_nof_cpu_regs_frame_map = AARCH64_ONLY(33) NOT_AARCH64(16), // number of registers used during code emission
+ pd_nof_caller_save_cpu_regs_frame_map = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers killed by calls
+ pd_nof_cpu_regs_reg_alloc = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled)
+ pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map, // number of registers visible to linear scan
+ pd_nof_cpu_regs_processed_in_linearscan = pd_nof_cpu_regs_reg_alloc + 1, // number of registers processed in linear scan; includes LR as it is used as temporary register in c1_LIRGenerator_arm
+ pd_first_cpu_reg = 0,
+ pd_last_cpu_reg = pd_nof_cpu_regs_frame_map - 1,
+
+ pd_nof_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers used during code emission
+ pd_nof_caller_save_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers killed by calls
+ pd_nof_fpu_regs_reg_alloc = AARCH64_ONLY(32) NOT_AARCH64(VFP(30) SOFT(0)), // number of float registers that are visible to register allocator
+ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan
+ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+ pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1,
+
+ pd_nof_xmm_regs_linearscan = 0,
+ pd_nof_caller_save_xmm_regs = 0,
+ pd_first_xmm_reg = -1,
+ pd_last_xmm_reg = -1
+};
+
+
+// encoding of float value in debug info:
+enum {
+ pd_float_saved_as_double = false
+};
+
+#ifdef AARCH64
+#define PATCHED_ADDR 0xff8
+#else
+#define PATCHED_ADDR (204)
+#endif
+#define CARDTABLEMODREF_POST_BARRIER_HELPER
+#define GENERATE_ADDRESS_IS_PREFERRED
+
+#endif // CPU_ARM_VM_C1_DEFS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_FpuStackSim_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FpuStackSim.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "utilities/array.hpp"
+#include "utilities/ostream.hpp"
+
+// Nothing needed here
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_FpuStackSim_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP
+#define CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP
+
+// Nothing needed here
+
+#endif // CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_FrameMap_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_arm.inline.hpp"
+
+LIR_Opr FrameMap::R0_opr;
+LIR_Opr FrameMap::R1_opr;
+LIR_Opr FrameMap::R2_opr;
+LIR_Opr FrameMap::R3_opr;
+LIR_Opr FrameMap::R4_opr;
+LIR_Opr FrameMap::R5_opr;
+
+LIR_Opr FrameMap::R0_oop_opr;
+LIR_Opr FrameMap::R1_oop_opr;
+LIR_Opr FrameMap::R2_oop_opr;
+LIR_Opr FrameMap::R3_oop_opr;
+LIR_Opr FrameMap::R4_oop_opr;
+LIR_Opr FrameMap::R5_oop_opr;
+
+LIR_Opr FrameMap::R0_metadata_opr;
+LIR_Opr FrameMap::R1_metadata_opr;
+LIR_Opr FrameMap::R2_metadata_opr;
+LIR_Opr FrameMap::R3_metadata_opr;
+LIR_Opr FrameMap::R4_metadata_opr;
+LIR_Opr FrameMap::R5_metadata_opr;
+
+#ifdef AARCH64
+LIR_Opr FrameMap::ZR_opr;
+#endif // AARCH64
+
+LIR_Opr FrameMap::LR_opr;
+LIR_Opr FrameMap::LR_oop_opr;
+LIR_Opr FrameMap::LR_ptr_opr;
+LIR_Opr FrameMap::FP_opr;
+LIR_Opr FrameMap::SP_opr;
+LIR_Opr FrameMap::Rthread_opr;
+
+LIR_Opr FrameMap::Int_result_opr;
+LIR_Opr FrameMap::Long_result_opr;
+LIR_Opr FrameMap::Object_result_opr;
+LIR_Opr FrameMap::Float_result_opr;
+LIR_Opr FrameMap::Double_result_opr;
+
+LIR_Opr FrameMap::Exception_oop_opr;
+LIR_Opr FrameMap::Exception_pc_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
+LIR_Opr FrameMap::_caller_save_fpu_regs[]; // same as initialize to zero
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+ LIR_Opr opr = LIR_OprFact::illegalOpr;
+ VMReg r_1 = reg->first();
+ VMReg r_2 = reg->second();
+ if (r_1->is_stack()) {
+ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+ opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off, type));
+ } else if (r_1->is_Register()) {
+ Register reg = r_1->as_Register();
+ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+#ifdef AARCH64
+ assert(r_1->next() == r_2, "should be the same");
+ opr = as_long_opr(reg);
+#else
+ opr = as_long_opr(reg, r_2->as_Register());
+#endif
+ } else if (type == T_OBJECT || type == T_ARRAY) {
+ opr = as_oop_opr(reg);
+ } else if (type == T_METADATA) {
+ opr = as_metadata_opr(reg);
+ } else {
+ // PreferInterpreterNativeStubs should ensure we never need to
+ // handle a long opr passed as R3+stack_slot
+ assert(! r_2->is_stack(), "missing support for ALIGN_WIDE_ARGUMENTS==0");
+ opr = as_opr(reg);
+ }
+ } else if (r_1->is_FloatRegister()) {
+ FloatRegister reg = r_1->as_FloatRegister();
+ opr = type == T_FLOAT ? as_float_opr(reg) : as_double_opr(reg);
+ } else {
+ ShouldNotReachHere();
+ }
+ return opr;
+}
+
+
+void FrameMap::initialize() {
+ if (_init_done) return;
+
+ int i;
+ int rnum = 0;
+
+ // Registers used for allocation
+#ifdef AARCH64
+ assert(Rthread == R28 && Rheap_base == R27 && Rtemp == R16, "change the code here");
+ for (i = 0; i < 16; i++) {
+ map_register(rnum++, as_Register(i));
+ }
+ for (i = 17; i < 28; i++) {
+ map_register(rnum++, as_Register(i));
+ }
+#else
+ assert(Rthread == R10 && Rtemp == R12, "change the code here");
+ for (i = 0; i < 10; i++) {
+ map_register(rnum++, as_Register(i));
+ }
+#endif // AARCH64
+ assert(rnum == pd_nof_cpu_regs_reg_alloc, "should be");
+
+ // Registers not used for allocation
+ map_register(rnum++, LR); // LR register should be listed first, see c1_LinearScan_arm.hpp::is_processed_reg_num.
+ assert(rnum == pd_nof_cpu_regs_processed_in_linearscan, "should be");
+
+ map_register(rnum++, Rtemp);
+ map_register(rnum++, Rthread);
+ map_register(rnum++, FP); // ARM32: R7 or R11
+ map_register(rnum++, SP);
+#ifdef AARCH64
+ map_register(rnum++, ZR);
+#else
+ map_register(rnum++, PC);
+#endif
+ assert(rnum == pd_nof_cpu_regs_frame_map, "should be");
+
+ _init_done = true;
+
+ R0_opr = as_opr(R0); R0_oop_opr = as_oop_opr(R0); R0_metadata_opr = as_metadata_opr(R0);
+ R1_opr = as_opr(R1); R1_oop_opr = as_oop_opr(R1); R1_metadata_opr = as_metadata_opr(R1);
+ R2_opr = as_opr(R2); R2_oop_opr = as_oop_opr(R2); R2_metadata_opr = as_metadata_opr(R2);
+ R3_opr = as_opr(R3); R3_oop_opr = as_oop_opr(R3); R3_metadata_opr = as_metadata_opr(R3);
+ R4_opr = as_opr(R4); R4_oop_opr = as_oop_opr(R4); R4_metadata_opr = as_metadata_opr(R4);
+ R5_opr = as_opr(R5); R5_oop_opr = as_oop_opr(R5); R5_metadata_opr = as_metadata_opr(R5);
+
+#ifdef AARCH64
+ ZR_opr = as_opr(ZR);
+#endif // AARCH64
+
+ LR_opr = as_opr(LR);
+ LR_oop_opr = as_oop_opr(LR);
+ LR_ptr_opr = as_pointer_opr(LR);
+ FP_opr = as_pointer_opr(FP);
+ SP_opr = as_pointer_opr(SP);
+ Rthread_opr = as_pointer_opr(Rthread);
+
+ // LIR operands for result
+ Int_result_opr = R0_opr;
+ Object_result_opr = R0_oop_opr;
+#ifdef AARCH64
+ Long_result_opr = as_long_opr(R0);
+ Float_result_opr = as_float_opr(S0);
+ Double_result_opr = as_double_opr(D0);
+#else
+ Long_result_opr = as_long_opr(R0, R1);
+#ifdef __ABI_HARD__
+ Float_result_opr = as_float_opr(S0);
+ Double_result_opr = as_double_opr(D0);
+#else
+ Float_result_opr = LIR_OprFact::single_softfp(0);
+ Double_result_opr = LIR_OprFact::double_softfp(0, 1);
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ Exception_oop_opr = as_oop_opr(Rexception_obj);
+ Exception_pc_opr = as_opr(Rexception_pc);
+
+ for (i = 0; i < nof_caller_save_cpu_regs(); i++) {
+ _caller_save_cpu_regs[i] = LIR_OprFact::single_cpu(i);
+ }
+ for (i = 0; i < nof_caller_save_fpu_regs; i++) {
+ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+ }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+ return Address(SP, sp_offset);
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+ return FrameMap::SP_opr;
+}
+
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+ assert(Rmh_SP_save == FP, "Fix register used for saving SP for MethodHandle calls");
+ return FP_opr;
+}
+
+bool FrameMap::validate_frame() {
+ int max_offset = in_bytes(framesize_in_bytes());
+ int java_index = 0;
+ for (int i = 0; i < _incoming_arguments->length(); i++) {
+ LIR_Opr opr = _incoming_arguments->at(i);
+ if (opr->is_stack()) {
+ int arg_offset = _argument_locations->at(java_index);
+ if (arg_offset > max_offset) {
+ max_offset = arg_offset;
+ }
+ }
+ java_index += type2size[opr->type()];
+ }
+ return max_offset < AARCH64_ONLY(16384) NOT_AARCH64(4096); // TODO-AARCH64 check that LIRAssembler does not generate load/store of byte and half-word with SP as address base
+}
+
+VMReg FrameMap::fpu_regname(int n) {
+ return as_FloatRegister(n)->as_VMReg();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_FrameMap_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP
+#define CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP
+
+ public:
+
+ enum {
+ first_available_sp_in_frame = 0,
+ frame_pad_in_bytes = 2*wordSize // Account for FP/LR saved at build_frame().
+ };
+
+ static LIR_Opr R0_opr;
+ static LIR_Opr R1_opr;
+ static LIR_Opr R2_opr;
+ static LIR_Opr R3_opr;
+ static LIR_Opr R4_opr;
+ static LIR_Opr R5_opr;
+ // add more predefined register oprs as needed
+
+ static LIR_Opr R0_oop_opr;
+ static LIR_Opr R1_oop_opr;
+ static LIR_Opr R2_oop_opr;
+ static LIR_Opr R3_oop_opr;
+ static LIR_Opr R4_oop_opr;
+ static LIR_Opr R5_oop_opr;
+
+ static LIR_Opr R0_metadata_opr;
+ static LIR_Opr R1_metadata_opr;
+ static LIR_Opr R2_metadata_opr;
+ static LIR_Opr R3_metadata_opr;
+ static LIR_Opr R4_metadata_opr;
+ static LIR_Opr R5_metadata_opr;
+
+#ifdef AARCH64
+ static LIR_Opr ZR_opr;
+#endif // AARCH64
+
+ static LIR_Opr LR_opr;
+ static LIR_Opr LR_oop_opr;
+ static LIR_Opr LR_ptr_opr;
+
+ static LIR_Opr FP_opr;
+ static LIR_Opr SP_opr;
+ static LIR_Opr Rthread_opr;
+
+ static LIR_Opr Int_result_opr;
+ static LIR_Opr Long_result_opr;
+ static LIR_Opr Object_result_opr;
+ static LIR_Opr Float_result_opr;
+ static LIR_Opr Double_result_opr;
+
+ static LIR_Opr Exception_oop_opr;
+ static LIR_Opr Exception_pc_opr;
+
+#ifdef AARCH64
+ static LIR_Opr as_long_opr(Register r) {
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+ }
+
+ static LIR_Opr as_pointer_opr(Register r) {
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+ }
+
+ static LIR_Opr as_double_opr(FloatRegister r) {
+ return LIR_OprFact::double_fpu(r->encoding());
+ }
+#else
+ static LIR_Opr as_long_opr(Register r, Register r2) {
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2));
+ }
+
+ static LIR_Opr as_pointer_opr(Register r) {
+ return LIR_OprFact::single_cpu(cpu_reg2rnr(r));
+ }
+
+ static LIR_Opr as_double_opr(FloatRegister r) {
+ return LIR_OprFact::double_fpu(r->encoding(), r->successor()->encoding());
+ }
+#endif
+
+ static LIR_Opr as_float_opr(FloatRegister r) {
+ return LIR_OprFact::single_fpu(r->encoding());
+ }
+
+ static VMReg fpu_regname(int n);
+
+ static bool is_caller_save_register(LIR_Opr opr) {
+ return true;
+ }
+
+ static int adjust_reg_range(int range) {
+ // Reduce the number of available regs (to free Rheap_base) in case of compressed oops
+ if (UseCompressedOops || UseCompressedClassPointers) return range - 1;
+ return range;
+ }
+
+ static int nof_caller_save_cpu_regs() {
+ return adjust_reg_range(pd_nof_caller_save_cpu_regs_frame_map);
+ }
+
+ static int last_cpu_reg() {
+ return pd_last_cpu_reg;
+ }
+
+#endif // CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LIRAssembler_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,3608 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_arm.inline.hpp"
+
+#define __ _masm->
+
+// Note: Rtemp usage is this file should not impact C2 and should be
+// correct as long as it is not implicitly used in lower layers (the
+// arm [macro]assembler) and used with care in the other C1 specific
+// files.
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+ ShouldNotCallThis(); // Not used on ARM
+ return false;
+}
+
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+ // The first register in Java calling conventions
+ return FrameMap::R0_oop_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+ return FrameMap::as_pointer_opr(R0);
+}
+
+#ifndef PRODUCT
+void LIR_Assembler::verify_reserved_argument_area_size(int args_count) {
+ assert(args_count * wordSize <= frame_map()->reserved_argument_area_size(), "not enough space for arguments");
+}
+#endif // !PRODUCT
+
+void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) {
+ assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
+ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
+ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "not enough space");
+ __ mov_slow(Rtemp, c);
+ __ str(Rtemp, Address(SP, offset_from_sp_in_bytes));
+}
+
+void LIR_Assembler::store_parameter(Metadata* m, int offset_from_sp_in_words) {
+ assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
+ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
+ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "not enough space");
+ __ mov_metadata(Rtemp, m);
+ __ str(Rtemp, Address(SP, offset_from_sp_in_bytes));
+}
+
+//--------------fpu register translations-----------------------
+
+
+void LIR_Assembler::set_24bit_FPU() {
+ ShouldNotReachHere();
+}
+
+void LIR_Assembler::reset_FPU() {
+ ShouldNotReachHere();
+}
+
+void LIR_Assembler::fpop() {
+ Unimplemented();
+}
+
+void LIR_Assembler::fxch(int i) {
+ Unimplemented();
+}
+
+void LIR_Assembler::fld(int i) {
+ Unimplemented();
+}
+
+void LIR_Assembler::ffree(int i) {
+ Unimplemented();
+}
+
+void LIR_Assembler::breakpoint() {
+ __ breakpoint();
+}
+
+void LIR_Assembler::push(LIR_Opr opr) {
+ Unimplemented();
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+ Unimplemented();
+}
+
+//-------------------------------------------
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+ Register base = addr->base()->as_pointer_register();
+
+#ifdef AARCH64
+ int align = exact_log2(type2aelembytes(addr->type(), true));
+#endif
+
+ if (addr->index()->is_illegal() || addr->index()->is_constant()) {
+ int offset = addr->disp();
+ if (addr->index()->is_constant()) {
+ offset += addr->index()->as_constant_ptr()->as_jint() << addr->scale();
+ }
+
+#ifdef AARCH64
+ if (!Assembler::is_unsigned_imm_in_range(offset, 12, align) && !Assembler::is_imm_in_range(offset, 9, 0)) {
+ BAILOUT_("offset not in range", Address(base));
+ }
+ assert(UseUnalignedAccesses || (offset & right_n_bits(align)) == 0, "offset should be aligned");
+#else
+ if ((offset <= -4096) || (offset >= 4096)) {
+ BAILOUT_("offset not in range", Address(base));
+ }
+#endif // AARCH64
+
+ return Address(base, offset);
+
+ } else {
+ assert(addr->disp() == 0, "can't have both");
+ int scale = addr->scale();
+
+#ifdef AARCH64
+ assert((scale == 0) || (scale == align), "scale should be zero or equal to embedded shift");
+
+ bool is_index_extended = (addr->index()->type() == T_INT);
+ if (is_index_extended) {
+ assert(addr->index()->is_single_cpu(), "should be");
+ return Address(base, addr->index()->as_register(), ex_sxtw, scale);
+ } else {
+ assert(addr->index()->is_double_cpu(), "should be");
+ return Address(base, addr->index()->as_register_lo(), ex_lsl, scale);
+ }
+#else
+ assert(addr->index()->is_single_cpu(), "should be");
+ return scale >= 0 ? Address(base, addr->index()->as_register(), lsl, scale) :
+ Address(base, addr->index()->as_register(), lsr, -scale);
+#endif // AARCH64
+ }
+}
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+#ifdef AARCH64
+ ShouldNotCallThis(); // Not used on AArch64
+ return Address();
+#else
+ Address base = as_Address(addr);
+ assert(base.index() == noreg, "must be");
+ if (base.disp() + BytesPerWord >= 4096) { BAILOUT_("offset not in range", Address(base.base(),0)); }
+ return Address(base.base(), base.disp() + BytesPerWord);
+#endif // AARCH64
+}
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+#ifdef AARCH64
+ ShouldNotCallThis(); // Not used on AArch64
+ return Address();
+#else
+ return as_Address(addr);
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::osr_entry() {
+ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+ BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+ ValueStack* entry_state = osr_entry->end()->state();
+ int number_of_locks = entry_state->locks_size();
+
+ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+ Register OSR_buf = osrBufferPointer()->as_pointer_register();
+
+ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+ int monitor_offset = (method()->max_locals() + 2 * (number_of_locks - 1)) * BytesPerWord;
+ for (int i = 0; i < number_of_locks; i++) {
+ int slot_offset = monitor_offset - (i * 2 * BytesPerWord);
+ __ ldr(R1, Address(OSR_buf, slot_offset + 0*BytesPerWord));
+ __ ldr(R2, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+ __ str(R1, frame_map()->address_for_monitor_lock(i));
+ __ str(R2, frame_map()->address_for_monitor_object(i));
+ }
+}
+
+
+int LIR_Assembler::check_icache() {
+ Register receiver = LIR_Assembler::receiverOpr()->as_register();
+ int offset = __ offset();
+ __ inline_cache_check(receiver, Ricklass);
+ return offset;
+}
+
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
+ jobject o = (jobject)Universe::non_oop_word();
+ int index = __ oop_recorder()->allocate_oop_index(o);
+
+ PatchingStub* patch = new PatchingStub(_masm, patching_id(info), index);
+
+ __ patchable_mov_oop(reg, o, index);
+ patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
+ Metadata* o = (Metadata*)Universe::non_oop_word();
+ int index = __ oop_recorder()->allocate_metadata_index(o);
+ PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
+
+ __ patchable_mov_metadata(reg, o, index);
+ patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+ // Subtracts two words to account for return address and link
+ return frame_map()->framesize()*VMRegImpl::stack_slot_size - 2*wordSize;
+}
+
+
+int LIR_Assembler::emit_exception_handler() {
+ // TODO: ARM
+ __ nop(); // See comments in other ports
+
+ address handler_base = __ start_a_stub(exception_handler_size());
+ if (handler_base == NULL) {
+ bailout("exception handler overflow");
+ return -1;
+ }
+
+ int offset = code_offset();
+
+ // check that there is really an exception
+ __ verify_not_null_oop(Rexception_obj);
+
+ __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
+ __ should_not_reach_here();
+
+ assert(code_offset() - offset <= exception_handler_size(), "overflow");
+ __ end_a_stub();
+
+ return offset;
+}
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+ if (CommentedAssembly) {
+ _masm->block_comment("Unwind handler");
+ }
+#endif
+
+ int offset = code_offset();
+
+ // Fetch the exception from TLS and clear out exception related thread state
+ Register zero = __ zero_register(Rtemp);
+ __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ str(zero, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ str(zero, Address(Rthread, JavaThread::exception_pc_offset()));
+
+ __ bind(_unwind_handler_entry);
+ __ verify_not_null_oop(Rexception_obj);
+
+ // Preform needed unlocking
+ MonitorExitStub* stub = NULL;
+ if (method()->is_synchronized()) {
+ monitor_address(0, FrameMap::R0_opr);
+ stub = new MonitorExitStub(FrameMap::R0_opr, true, 0);
+ __ unlock_object(R2, R1, R0, Rtemp, *stub->entry());
+ __ bind(*stub->continuation());
+ }
+
+ // remove the activation and dispatch to the unwind handler
+ __ remove_frame(initial_frame_size_in_bytes()); // restores FP and LR
+ __ jump(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type, Rtemp);
+
+ // Emit the slow path assembly
+ if (stub != NULL) {
+ stub->emit_code(this);
+ }
+
+ return offset;
+}
+
+
+int LIR_Assembler::emit_deopt_handler() {
+ address handler_base = __ start_a_stub(deopt_handler_size());
+ if (handler_base == NULL) {
+ bailout("deopt handler overflow");
+ return -1;
+ }
+
+ int offset = code_offset();
+
+ __ mov_relative_address(LR, __ pc());
+#ifdef AARCH64
+ __ raw_push(LR, LR);
+ __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, Rtemp);
+#else
+ __ push(LR); // stub expects LR to be saved
+ __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
+#endif // AARCH64
+
+ assert(code_offset() - offset <= deopt_handler_size(), "overflow");
+ __ end_a_stub();
+
+ return offset;
+}
+
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+ // Pop the frame before safepoint polling
+ __ remove_frame(initial_frame_size_in_bytes());
+
+ // mov_slow here is usually one or two instruction
+ // TODO-AARCH64 3 instructions on AArch64, so try to load polling page by ldr_literal
+ __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference);
+ __ relocate(relocInfo::poll_return_type);
+ __ ldr(Rtemp, Address(Rtemp));
+ __ ret();
+}
+
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+ __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference);
+ if (info != NULL) {
+ add_debug_info_for_branch(info);
+ }
+ int offset = __ offset();
+ __ relocate(relocInfo::poll_type);
+ __ ldr(Rtemp, Address(Rtemp));
+ return offset;
+}
+
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+ if (from_reg != to_reg) {
+ __ mov(to_reg, from_reg);
+ }
+}
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+ assert(src->is_constant() && dest->is_register(), "must be");
+ LIR_Const* c = src->as_constant_ptr();
+
+ switch (c->type()) {
+ case T_ADDRESS:
+ case T_INT:
+ assert(patch_code == lir_patch_none, "no patching handled here");
+ __ mov_slow(dest->as_register(), c->as_jint());
+ break;
+
+ case T_LONG:
+ assert(patch_code == lir_patch_none, "no patching handled here");
+#ifdef AARCH64
+ __ mov_slow(dest->as_pointer_register(), (intptr_t)c->as_jlong());
+#else
+ __ mov_slow(dest->as_register_lo(), c->as_jint_lo());
+ __ mov_slow(dest->as_register_hi(), c->as_jint_hi());
+#endif // AARCH64
+ break;
+
+ case T_OBJECT:
+ if (patch_code == lir_patch_none) {
+ __ mov_oop(dest->as_register(), c->as_jobject());
+ } else {
+ jobject2reg_with_patching(dest->as_register(), info);
+ }
+ break;
+
+ case T_METADATA:
+ if (patch_code == lir_patch_none) {
+ __ mov_metadata(dest->as_register(), c->as_metadata());
+ } else {
+ klass2reg_with_patching(dest->as_register(), info);
+ }
+ break;
+
+ case T_FLOAT:
+ if (dest->is_single_fpu()) {
+ __ mov_float(dest->as_float_reg(), c->as_jfloat());
+ } else {
+#ifdef AARCH64
+ ShouldNotReachHere();
+#else
+ // Simple getters can return float constant directly into r0
+ __ mov_slow(dest->as_register(), c->as_jint_bits());
+#endif // AARCH64
+ }
+ break;
+
+ case T_DOUBLE:
+ if (dest->is_double_fpu()) {
+ __ mov_double(dest->as_double_reg(), c->as_jdouble());
+ } else {
+#ifdef AARCH64
+ ShouldNotReachHere();
+#else
+ // Simple getters can return double constant directly into r1r0
+ __ mov_slow(dest->as_register_lo(), c->as_jint_lo_bits());
+ __ mov_slow(dest->as_register_hi(), c->as_jint_hi_bits());
+#endif // AARCH64
+ }
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+ assert(src->is_constant(), "must be");
+ assert(dest->is_stack(), "must be");
+ LIR_Const* c = src->as_constant_ptr();
+
+ switch (c->type()) {
+ case T_INT: // fall through
+ case T_FLOAT:
+ __ mov_slow(Rtemp, c->as_jint_bits());
+ __ str_32(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix()));
+ break;
+
+ case T_ADDRESS:
+ __ mov_slow(Rtemp, c->as_jint());
+ __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix()));
+ break;
+
+ case T_OBJECT:
+ __ mov_oop(Rtemp, c->as_jobject());
+ __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix()));
+ break;
+
+ case T_LONG: // fall through
+ case T_DOUBLE:
+#ifdef AARCH64
+ __ mov_slow(Rtemp, c->as_jlong_bits());
+ __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix()));
+#else
+ __ mov_slow(Rtemp, c->as_jint_lo_bits());
+ __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes));
+ if (c->as_jint_hi_bits() != c->as_jint_lo_bits()) {
+ __ mov_slow(Rtemp, c->as_jint_hi_bits());
+ }
+ __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
+#endif // AARCH64
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
+ CodeEmitInfo* info, bool wide) {
+#ifdef AARCH64
+ assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL) ||
+ (src->as_constant_ptr()->type() == T_INT && src->as_constant_ptr()->as_jint() == 0) ||
+ (src->as_constant_ptr()->type() == T_LONG && src->as_constant_ptr()->as_jlong() == 0) ||
+ (src->as_constant_ptr()->type() == T_FLOAT && src->as_constant_ptr()->as_jint_bits() == 0) ||
+ (src->as_constant_ptr()->type() == T_DOUBLE && src->as_constant_ptr()->as_jlong_bits() == 0),
+ "cannot handle otherwise");
+ assert(dest->as_address_ptr()->type() == type, "should be");
+
+ Address addr = as_Address(dest->as_address_ptr());
+ int null_check_offset = code_offset();
+ switch (type) {
+ case T_OBJECT: // fall through
+ case T_ARRAY:
+ if (UseCompressedOops && !wide) {
+ __ str_w(ZR, addr);
+ } else {
+ __ str(ZR, addr);
+ }
+ break;
+ case T_ADDRESS: // fall through
+ case T_DOUBLE: // fall through
+ case T_LONG: __ str(ZR, addr); break;
+ case T_FLOAT: // fall through
+ case T_INT: __ str_w(ZR, addr); break;
+ case T_BOOLEAN: // fall through
+ case T_BYTE: __ strb(ZR, addr); break;
+ case T_CHAR: // fall through
+ case T_SHORT: __ strh(ZR, addr); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL),"cannot handle otherwise");
+ __ mov(Rtemp, 0);
+
+ int null_check_offset = code_offset();
+ __ str(Rtemp, as_Address(dest->as_address_ptr()));
+#endif // AARCH64
+
+ if (info != NULL) {
+#ifndef AARCH64
+ assert(false, "arm32 didn't support this before, investigate if bug");
+#endif
+ add_debug_info_for_null_check(null_check_offset, info);
+ }
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+ assert(src->is_register() && dest->is_register(), "must be");
+
+ if (src->is_single_cpu()) {
+ if (dest->is_single_cpu()) {
+ move_regs(src->as_register(), dest->as_register());
+#ifdef AARCH64
+ } else if (dest->is_double_cpu()) {
+ assert ((src->type() == T_OBJECT) || (src->type() == T_ARRAY) || (src->type() == T_ADDRESS), "invalid src type");
+ move_regs(src->as_register(), dest->as_register_lo());
+#else
+ } else if (dest->is_single_fpu()) {
+ __ fmsr(dest->as_float_reg(), src->as_register());
+#endif // AARCH64
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (src->is_double_cpu()) {
+#ifdef AARCH64
+ move_regs(src->as_register_lo(), dest->as_register_lo());
+#else
+ if (dest->is_double_cpu()) {
+ __ long_move(dest->as_register_lo(), dest->as_register_hi(), src->as_register_lo(), src->as_register_hi());
+ } else {
+ __ fmdrr(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi());
+ }
+#endif // AARCH64
+ } else if (src->is_single_fpu()) {
+ if (dest->is_single_fpu()) {
+ __ mov_float(dest->as_float_reg(), src->as_float_reg());
+ } else if (dest->is_single_cpu()) {
+ __ mov_fpr2gpr_float(dest->as_register(), src->as_float_reg());
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (src->is_double_fpu()) {
+ if (dest->is_double_fpu()) {
+ __ mov_double(dest->as_double_reg(), src->as_double_reg());
+ } else if (dest->is_double_cpu()) {
+#ifdef AARCH64
+ __ fmov_xd(dest->as_register_lo(), src->as_double_reg());
+#else
+ __ fmrrd(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg());
+#endif // AARCH64
+ } else {
+ ShouldNotReachHere();
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+ assert(src->is_register(), "should not call otherwise");
+ assert(dest->is_stack(), "should not call otherwise");
+
+ Address addr = dest->is_single_word() ?
+ frame_map()->address_for_slot(dest->single_stack_ix()) :
+ frame_map()->address_for_slot(dest->double_stack_ix());
+
+#ifndef AARCH64
+ assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending");
+ if (src->is_single_fpu() || src->is_double_fpu()) {
+ if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); }
+ }
+#endif // !AARCH64
+
+ if (src->is_single_cpu()) {
+ switch (type) {
+ case T_OBJECT:
+ case T_ARRAY: __ verify_oop(src->as_register()); // fall through
+ case T_ADDRESS:
+ case T_METADATA: __ str(src->as_register(), addr); break;
+ case T_FLOAT: // used in intBitsToFloat intrinsic implementation, fall through
+ case T_INT: __ str_32(src->as_register(), addr); break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else if (src->is_double_cpu()) {
+ __ str(src->as_register_lo(), addr);
+#ifndef AARCH64
+ __ str(src->as_register_hi(), frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
+#endif // !AARCH64
+ } else if (src->is_single_fpu()) {
+ __ str_float(src->as_float_reg(), addr);
+ } else if (src->is_double_fpu()) {
+ __ str_double(src->as_double_reg(), addr);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
+ LIR_PatchCode patch_code, CodeEmitInfo* info,
+ bool pop_fpu_stack, bool wide,
+ bool unaligned) {
+ LIR_Address* to_addr = dest->as_address_ptr();
+ Register base_reg = to_addr->base()->as_pointer_register();
+ const bool needs_patching = (patch_code != lir_patch_none);
+
+ PatchingStub* patch = NULL;
+ if (needs_patching) {
+#ifdef AARCH64
+ // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned.
+ __ align(wordSize);
+#endif
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+#ifdef AARCH64
+ // Extra nop for MT safe patching
+ __ nop();
+#endif // AARCH64
+ }
+
+ int null_check_offset = code_offset();
+
+ switch (type) {
+ case T_ARRAY:
+ case T_OBJECT:
+ if (UseCompressedOops && !wide) {
+#ifdef AARCH64
+ const Register temp_src = Rtemp;
+ assert_different_registers(temp_src, src->as_register());
+ __ encode_heap_oop(temp_src, src->as_register());
+ null_check_offset = code_offset();
+ __ str_32(temp_src, as_Address(to_addr));
+#else
+ ShouldNotReachHere();
+#endif // AARCH64
+ } else {
+ __ str(src->as_register(), as_Address(to_addr));
+ }
+ break;
+
+ case T_ADDRESS:
+#ifdef AARCH64
+ case T_LONG:
+#endif // AARCH64
+ __ str(src->as_pointer_register(), as_Address(to_addr));
+ break;
+
+ case T_BYTE:
+ case T_BOOLEAN:
+ __ strb(src->as_register(), as_Address(to_addr));
+ break;
+
+ case T_CHAR:
+ case T_SHORT:
+ __ strh(src->as_register(), as_Address(to_addr));
+ break;
+
+ case T_INT:
+#ifdef __SOFTFP__
+ case T_FLOAT:
+#endif // __SOFTFP__
+ __ str_32(src->as_register(), as_Address(to_addr));
+ break;
+
+#ifdef AARCH64
+
+ case T_FLOAT:
+ __ str_s(src->as_float_reg(), as_Address(to_addr));
+ break;
+
+ case T_DOUBLE:
+ __ str_d(src->as_double_reg(), as_Address(to_addr));
+ break;
+
+#else // AARCH64
+
+#ifdef __SOFTFP__
+ case T_DOUBLE:
+#endif // __SOFTFP__
+ case T_LONG: {
+ Register from_lo = src->as_register_lo();
+ Register from_hi = src->as_register_hi();
+ if (to_addr->index()->is_register()) {
+ assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ assert(to_addr->disp() == 0, "Not yet supporting both");
+ __ add(Rtemp, base_reg, to_addr->index()->as_register());
+ base_reg = Rtemp;
+ __ str(from_lo, Address(Rtemp));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_low, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_high;
+ }
+ __ str(from_hi, Address(Rtemp, BytesPerWord));
+ } else if (base_reg == from_lo) {
+ __ str(from_hi, as_Address_hi(to_addr));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_high, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_low;
+ }
+ __ str(from_lo, as_Address_lo(to_addr));
+ } else {
+ __ str(from_lo, as_Address_lo(to_addr));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_low, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_high;
+ }
+ __ str(from_hi, as_Address_hi(to_addr));
+ }
+ break;
+ }
+
+#ifndef __SOFTFP__
+ case T_FLOAT:
+ if (to_addr->index()->is_register()) {
+ assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ __ add(Rtemp, base_reg, to_addr->index()->as_register());
+ if ((to_addr->disp() <= -4096) || (to_addr->disp() >= 4096)) { BAILOUT("offset not in range"); }
+ __ fsts(src->as_float_reg(), Address(Rtemp, to_addr->disp()));
+ } else {
+ __ fsts(src->as_float_reg(), as_Address(to_addr));
+ }
+ break;
+
+ case T_DOUBLE:
+ if (to_addr->index()->is_register()) {
+ assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ __ add(Rtemp, base_reg, to_addr->index()->as_register());
+ if ((to_addr->disp() <= -4096) || (to_addr->disp() >= 4096)) { BAILOUT("offset not in range"); }
+ __ fstd(src->as_double_reg(), Address(Rtemp, to_addr->disp()));
+ } else {
+ __ fstd(src->as_double_reg(), as_Address(to_addr));
+ }
+ break;
+#endif // __SOFTFP__
+
+#endif // AARCH64
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ if (info != NULL) {
+ add_debug_info_for_null_check(null_check_offset, info);
+ }
+
+ if (patch != NULL) {
+ // Offset embeedded into LDR/STR instruction may appear not enough
+ // to address a field. So, provide a space for one more instruction
+ // that will deal with larger offsets.
+ __ nop();
+ patching_epilog(patch, patch_code, base_reg, info);
+ }
+}
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+ assert(src->is_stack(), "should not call otherwise");
+ assert(dest->is_register(), "should not call otherwise");
+
+ Address addr = src->is_single_word() ?
+ frame_map()->address_for_slot(src->single_stack_ix()) :
+ frame_map()->address_for_slot(src->double_stack_ix());
+
+#ifndef AARCH64
+ assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending");
+ if (dest->is_single_fpu() || dest->is_double_fpu()) {
+ if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); }
+ }
+#endif // !AARCH64
+
+ if (dest->is_single_cpu()) {
+ switch (type) {
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_METADATA: __ ldr(dest->as_register(), addr); break;
+ case T_FLOAT: // used in floatToRawIntBits intrinsic implemenation
+ case T_INT: __ ldr_u32(dest->as_register(), addr); break;
+ default:
+ ShouldNotReachHere();
+ }
+ if ((type == T_OBJECT) || (type == T_ARRAY)) {
+ __ verify_oop(dest->as_register());
+ }
+ } else if (dest->is_double_cpu()) {
+ __ ldr(dest->as_register_lo(), addr);
+#ifndef AARCH64
+ __ ldr(dest->as_register_hi(), frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes));
+#endif // !AARCH64
+ } else if (dest->is_single_fpu()) {
+ __ ldr_float(dest->as_float_reg(), addr);
+ } else if (dest->is_double_fpu()) {
+ __ ldr_double(dest->as_double_reg(), addr);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+ if (src->is_single_stack()) {
+ switch (src->type()) {
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_METADATA:
+ __ ldr(Rtemp, frame_map()->address_for_slot(src->single_stack_ix()));
+ __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix()));
+ break;
+
+ case T_INT:
+ case T_FLOAT:
+ __ ldr_u32(Rtemp, frame_map()->address_for_slot(src->single_stack_ix()));
+ __ str_32(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix()));
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+ } else {
+ assert(src->is_double_stack(), "must be");
+ __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes));
+ __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes));
+#ifdef AARCH64
+ assert(lo_word_offset_in_bytes == 0, "adjust this code");
+#else
+ __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes));
+ __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
+#endif // AARCH64
+ }
+}
+
+
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
+ LIR_PatchCode patch_code, CodeEmitInfo* info,
+ bool wide, bool unaligned) {
+ assert(src->is_address(), "should not call otherwise");
+ assert(dest->is_register(), "should not call otherwise");
+ LIR_Address* addr = src->as_address_ptr();
+
+ Register base_reg = addr->base()->as_pointer_register();
+
+ PatchingStub* patch = NULL;
+ if (patch_code != lir_patch_none) {
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+#ifdef AARCH64
+ // Extra nop for MT safe patching
+ __ nop();
+#endif // AARCH64
+ }
+ if (info != NULL) {
+ add_debug_info_for_null_check_here(info);
+ }
+
+ switch (type) {
+ case T_OBJECT: // fall through
+ case T_ARRAY:
+ if (UseCompressedOops && !wide) {
+ __ ldr_u32(dest->as_register(), as_Address(addr));
+ } else {
+ __ ldr(dest->as_register(), as_Address(addr));
+ }
+ break;
+
+ case T_ADDRESS:
+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+ __ ldr_u32(dest->as_pointer_register(), as_Address(addr));
+ } else {
+ __ ldr(dest->as_pointer_register(), as_Address(addr));
+ }
+ break;
+
+#ifdef AARCH64
+ case T_LONG:
+#else
+ case T_INT:
+#ifdef __SOFTFP__
+ case T_FLOAT:
+#endif // __SOFTFP__
+#endif // AARCH64
+ __ ldr(dest->as_pointer_register(), as_Address(addr));
+ break;
+
+ case T_BOOLEAN:
+ __ ldrb(dest->as_register(), as_Address(addr));
+ break;
+
+ case T_BYTE:
+ __ ldrsb(dest->as_register(), as_Address(addr));
+ break;
+
+ case T_CHAR:
+ __ ldrh(dest->as_register(), as_Address(addr));
+ break;
+
+ case T_SHORT:
+ __ ldrsh(dest->as_register(), as_Address(addr));
+ break;
+
+#ifdef AARCH64
+
+ case T_INT:
+ __ ldr_w(dest->as_register(), as_Address(addr));
+ break;
+
+ case T_FLOAT:
+ __ ldr_s(dest->as_float_reg(), as_Address(addr));
+ break;
+
+ case T_DOUBLE:
+ __ ldr_d(dest->as_double_reg(), as_Address(addr));
+ break;
+
+#else // AARCH64
+
+#ifdef __SOFTFP__
+ case T_DOUBLE:
+#endif // __SOFTFP__
+ case T_LONG: {
+ Register to_lo = dest->as_register_lo();
+ Register to_hi = dest->as_register_hi();
+ if (addr->index()->is_register()) {
+ assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ assert(addr->disp() == 0, "Not yet supporting both");
+ __ add(Rtemp, base_reg, addr->index()->as_register());
+ base_reg = Rtemp;
+ __ ldr(to_lo, Address(Rtemp));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_low, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_high;
+ }
+ __ ldr(to_hi, Address(Rtemp, BytesPerWord));
+ } else if (base_reg == to_lo) {
+ __ ldr(to_hi, as_Address_hi(addr));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_high, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_low;
+ }
+ __ ldr(to_lo, as_Address_lo(addr));
+ } else {
+ __ ldr(to_lo, as_Address_lo(addr));
+ if (patch != NULL) {
+ patching_epilog(patch, lir_patch_low, base_reg, info);
+ patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+ patch_code = lir_patch_high;
+ }
+ __ ldr(to_hi, as_Address_hi(addr));
+ }
+ break;
+ }
+
+#ifndef __SOFTFP__
+ case T_FLOAT:
+ if (addr->index()->is_register()) {
+ assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ __ add(Rtemp, base_reg, addr->index()->as_register());
+ if ((addr->disp() <= -4096) || (addr->disp() >= 4096)) { BAILOUT("offset not in range"); }
+ __ flds(dest->as_float_reg(), Address(Rtemp, addr->disp()));
+ } else {
+ __ flds(dest->as_float_reg(), as_Address(addr));
+ }
+ break;
+
+ case T_DOUBLE:
+ if (addr->index()->is_register()) {
+ assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register");
+ __ add(Rtemp, base_reg, addr->index()->as_register());
+ if ((addr->disp() <= -4096) || (addr->disp() >= 4096)) { BAILOUT("offset not in range"); }
+ __ fldd(dest->as_double_reg(), Address(Rtemp, addr->disp()));
+ } else {
+ __ fldd(dest->as_double_reg(), as_Address(addr));
+ }
+ break;
+#endif // __SOFTFP__
+
+#endif // AARCH64
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ if (patch != NULL) {
+ // Offset embeedded into LDR/STR instruction may appear not enough
+ // to address a field. So, provide a space for one more instruction
+ // that will deal with larger offsets.
+ __ nop();
+ patching_epilog(patch, patch_code, base_reg, info);
+ }
+
+#ifdef AARCH64
+ switch (type) {
+ case T_ARRAY:
+ case T_OBJECT:
+ if (UseCompressedOops && !wide) {
+ __ decode_heap_oop(dest->as_register());
+ }
+ __ verify_oop(dest->as_register());
+ break;
+
+ case T_ADDRESS:
+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+ __ decode_klass_not_null(dest->as_register());
+ }
+ break;
+ }
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+ bool is_32 = op->result_opr()->is_single_cpu();
+
+ if (op->code() == lir_idiv && op->in_opr2()->is_constant() && is_32) {
+ int c = op->in_opr2()->as_constant_ptr()->as_jint();
+ assert(is_power_of_2(c), "non power-of-2 constant should be put in a register");
+
+ Register left = op->in_opr1()->as_register();
+ Register dest = op->result_opr()->as_register();
+ if (c == 1) {
+ __ mov(dest, left);
+ } else if (c == 2) {
+ __ add_32(dest, left, AsmOperand(left, lsr, 31));
+ __ asr_32(dest, dest, 1);
+ } else if (c != (int) 0x80000000) {
+ int power = log2_intptr(c);
+ __ asr_32(Rtemp, left, 31);
+ __ add_32(dest, left, AsmOperand(Rtemp, lsr, 32-power)); // dest = left + (left < 0 ? 2^power - 1 : 0);
+ __ asr_32(dest, dest, power); // dest = dest >>> power;
+ } else {
+ // x/0x80000000 is a special case, since dividend is a power of two, but is negative.
+ // The only possible result values are 0 and 1, with 1 only for dividend == divisor == 0x80000000.
+ __ cmp_32(left, c);
+#ifdef AARCH64
+ __ cset(dest, eq);
+#else
+ __ mov(dest, 0, ne);
+ __ mov(dest, 1, eq);
+#endif // AARCH64
+ }
+ } else {
+#ifdef AARCH64
+ Register left = op->in_opr1()->as_pointer_register();
+ Register right = op->in_opr2()->as_pointer_register();
+ Register dest = op->result_opr()->as_pointer_register();
+
+ switch (op->code()) {
+ case lir_idiv:
+ if (is_32) {
+ __ sdiv_w(dest, left, right);
+ } else {
+ __ sdiv(dest, left, right);
+ }
+ break;
+ case lir_irem: {
+ Register tmp = op->in_opr3()->as_pointer_register();
+ assert_different_registers(left, tmp);
+ assert_different_registers(right, tmp);
+ if (is_32) {
+ __ sdiv_w(tmp, left, right);
+ __ msub_w(dest, right, tmp, left);
+ } else {
+ __ sdiv(tmp, left, right);
+ __ msub(dest, right, tmp, left);
+ }
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+#else
+ assert(op->code() == lir_idiv || op->code() == lir_irem, "unexpected op3");
+ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type);
+ add_debug_info_for_div0_here(op->info());
+#endif // AARCH64
+ }
+}
+
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+ if (op->block() != NULL) _branch_target_blocks.append(op->block());
+ if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+ assert(op->info() == NULL, "CodeEmitInfo?");
+#endif // ASSERT
+
+#ifdef __SOFTFP__
+ assert (op->code() != lir_cond_float_branch, "this should be impossible");
+#else
+ if (op->code() == lir_cond_float_branch) {
+#ifndef AARCH64
+ __ fmstat();
+#endif // !AARCH64
+ __ b(*(op->ublock()->label()), vs);
+ }
+#endif // __SOFTFP__
+
+ AsmCondition acond = al;
+ switch (op->cond()) {
+ case lir_cond_equal: acond = eq; break;
+ case lir_cond_notEqual: acond = ne; break;
+ case lir_cond_less: acond = lt; break;
+ case lir_cond_lessEqual: acond = le; break;
+ case lir_cond_greaterEqual: acond = ge; break;
+ case lir_cond_greater: acond = gt; break;
+ case lir_cond_aboveEqual: acond = hs; break;
+ case lir_cond_belowEqual: acond = ls; break;
+ default: assert(op->cond() == lir_cond_always, "must be");
+ }
+ __ b(*(op->label()), acond);
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+ LIR_Opr src = op->in_opr();
+ LIR_Opr dest = op->result_opr();
+
+ switch (op->bytecode()) {
+ case Bytecodes::_i2l:
+#ifdef AARCH64
+ __ sign_extend(dest->as_register_lo(), src->as_register(), 32);
+#else
+ move_regs(src->as_register(), dest->as_register_lo());
+ __ mov(dest->as_register_hi(), AsmOperand(src->as_register(), asr, 31));
+#endif // AARCH64
+ break;
+ case Bytecodes::_l2i:
+ move_regs(src->as_register_lo(), dest->as_register());
+ break;
+ case Bytecodes::_i2b:
+ __ sign_extend(dest->as_register(), src->as_register(), 8);
+ break;
+ case Bytecodes::_i2s:
+ __ sign_extend(dest->as_register(), src->as_register(), 16);
+ break;
+ case Bytecodes::_i2c:
+ __ zero_extend(dest->as_register(), src->as_register(), 16);
+ break;
+ case Bytecodes::_f2d:
+ __ convert_f2d(dest->as_double_reg(), src->as_float_reg());
+ break;
+ case Bytecodes::_d2f:
+ __ convert_d2f(dest->as_float_reg(), src->as_double_reg());
+ break;
+ case Bytecodes::_i2f:
+#ifdef AARCH64
+ __ scvtf_sw(dest->as_float_reg(), src->as_register());
+#else
+ __ fmsr(Stemp, src->as_register());
+ __ fsitos(dest->as_float_reg(), Stemp);
+#endif // AARCH64
+ break;
+ case Bytecodes::_i2d:
+#ifdef AARCH64
+ __ scvtf_dw(dest->as_double_reg(), src->as_register());
+#else
+ __ fmsr(Stemp, src->as_register());
+ __ fsitod(dest->as_double_reg(), Stemp);
+#endif // AARCH64
+ break;
+ case Bytecodes::_f2i:
+#ifdef AARCH64
+ __ fcvtzs_ws(dest->as_register(), src->as_float_reg());
+#else
+ __ ftosizs(Stemp, src->as_float_reg());
+ __ fmrs(dest->as_register(), Stemp);
+#endif // AARCH64
+ break;
+ case Bytecodes::_d2i:
+#ifdef AARCH64
+ __ fcvtzs_wd(dest->as_register(), src->as_double_reg());
+#else
+ __ ftosizd(Stemp, src->as_double_reg());
+ __ fmrs(dest->as_register(), Stemp);
+#endif // AARCH64
+ break;
+#ifdef AARCH64
+ case Bytecodes::_l2f:
+ __ scvtf_sx(dest->as_float_reg(), src->as_register_lo());
+ break;
+ case Bytecodes::_l2d:
+ __ scvtf_dx(dest->as_double_reg(), src->as_register_lo());
+ break;
+ case Bytecodes::_f2l:
+ __ fcvtzs_xs(dest->as_register_lo(), src->as_float_reg());
+ break;
+ case Bytecodes::_d2l:
+ __ fcvtzs_xd(dest->as_register_lo(), src->as_double_reg());
+ break;
+#endif // AARCH64
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+ if (op->init_check()) {
+ Register tmp = op->tmp1()->as_register();
+ __ ldrb(tmp, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
+ add_debug_info_for_null_check_here(op->stub()->info());
+ __ cmp(tmp, InstanceKlass::fully_initialized);
+ __ b(*op->stub()->entry(), ne);
+ }
+ __ allocate_object(op->obj()->as_register(),
+ op->tmp1()->as_register(),
+ op->tmp2()->as_register(),
+ op->tmp3()->as_register(),
+ op->header_size(),
+ op->object_size(),
+ op->klass()->as_register(),
+ *op->stub()->entry());
+ __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+ if (UseSlowPath ||
+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+ __ b(*op->stub()->entry());
+ } else {
+ __ allocate_array(op->obj()->as_register(),
+ op->len()->as_register(),
+ op->tmp1()->as_register(),
+ op->tmp2()->as_register(),
+ op->tmp3()->as_register(),
+ arrayOopDesc::header_size(op->type()),
+ type2aelembytes(op->type()),
+ op->klass()->as_register(),
+ *op->stub()->entry());
+ }
+ __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
+ ciMethodData *md, ciProfileData *data,
+ Register recv, Register tmp1, Label* update_done) {
+ assert_different_registers(mdo, recv, tmp1);
+ uint i;
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ Label next_test;
+ // See if the receiver is receiver[n].
+ Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
+ mdo_offset_bias);
+ __ ldr(tmp1, receiver_addr);
+ __ verify_klass_ptr(tmp1);
+ __ cmp(recv, tmp1);
+ __ b(next_test, ne);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
+ mdo_offset_bias);
+ __ ldr(tmp1, data_addr);
+ __ add(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, data_addr);
+ __ b(*update_done);
+ __ bind(next_test);
+ }
+
+ // Didn't find receiver; find next empty slot and fill it in
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ Label next_test;
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
+ mdo_offset_bias);
+ __ ldr(tmp1, recv_addr);
+ __ cbnz(tmp1, next_test);
+ __ str(recv, recv_addr);
+ __ mov(tmp1, DataLayout::counter_increment);
+ __ str(tmp1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
+ mdo_offset_bias));
+ __ b(*update_done);
+ __ bind(next_test);
+ }
+}
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+ md = method->method_data_or_null();
+ assert(md != NULL, "Sanity");
+ data = md->bci_to_data(bci);
+ assert(data != NULL, "need data for checkcast");
+ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+ if (md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes() >= 4096) {
+ // The offset is large so bias the mdo by the base of the slot so
+ // that the ldr can use an immediate offset to reference the slots of the data
+ mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
+ }
+}
+
+// On 32-bit ARM, code before this helper should test obj for null (ZF should be set if obj is null).
+void LIR_Assembler::typecheck_profile_helper1(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias,
+ Register obj, Register mdo, Register data_val, Label* obj_is_null) {
+ assert(method != NULL, "Should have method");
+ assert_different_registers(obj, mdo, data_val);
+ setup_md_access(method, bci, md, data, mdo_offset_bias);
+ Label not_null;
+#ifdef AARCH64
+ __ cbnz(obj, not_null);
+#else
+ __ b(not_null, ne);
+#endif // AARCH64
+ __ mov_metadata(mdo, md->constant_encoding());
+ if (mdo_offset_bias > 0) {
+ __ mov_slow(data_val, mdo_offset_bias);
+ __ add(mdo, mdo, data_val);
+ }
+ Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
+ __ ldrb(data_val, flags_addr);
+ __ orr(data_val, data_val, (uint)BitData::null_seen_byte_constant());
+ __ strb(data_val, flags_addr);
+ __ b(*obj_is_null);
+ __ bind(not_null);
+}
+
+void LIR_Assembler::typecheck_profile_helper2(ciMethodData* md, ciProfileData* data, int mdo_offset_bias,
+ Register mdo, Register recv, Register value, Register tmp1,
+ Label* profile_cast_success, Label* profile_cast_failure,
+ Label* success, Label* failure) {
+ assert_different_registers(mdo, value, tmp1);
+ __ bind(*profile_cast_success);
+ __ mov_metadata(mdo, md->constant_encoding());
+ if (mdo_offset_bias > 0) {
+ __ mov_slow(tmp1, mdo_offset_bias);
+ __ add(mdo, mdo, tmp1);
+ }
+ __ load_klass(recv, value);
+ type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success);
+ __ b(*success);
+ // Cast failure case
+ __ bind(*profile_cast_failure);
+ __ mov_metadata(mdo, md->constant_encoding());
+ if (mdo_offset_bias > 0) {
+ __ mov_slow(tmp1, mdo_offset_bias);
+ __ add(mdo, mdo, tmp1);
+ }
+ Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+ __ ldr(tmp1, data_addr);
+ __ sub(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, data_addr);
+ __ b(*failure);
+}
+
+// Sets `res` to true, if `cond` holds. On AArch64 also sets `res` to false if `cond` does not hold.
+static void set_instanceof_result(MacroAssembler* _masm, Register res, AsmCondition cond) {
+#ifdef AARCH64
+ __ cset(res, cond);
+#else
+ __ mov(res, 1, cond);
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+ // TODO: ARM - can be more effective with one more register
+ switch (op->code()) {
+ case lir_store_check: {
+ CodeStub* stub = op->stub();
+ Register value = op->object()->as_register();
+ Register array = op->array()->as_register();
+ Register klass_RInfo = op->tmp1()->as_register();
+ Register k_RInfo = op->tmp2()->as_register();
+ assert_different_registers(klass_RInfo, k_RInfo, Rtemp);
+ if (op->should_profile()) {
+ assert_different_registers(value, klass_RInfo, k_RInfo, Rtemp);
+ }
+
+ // check if it needs to be profiled
+ ciMethodData* md;
+ ciProfileData* data;
+ int mdo_offset_bias = 0;
+ Label profile_cast_success, profile_cast_failure, done;
+ Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+ Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+ if (op->should_profile()) {
+#ifndef AARCH64
+ __ cmp(value, 0);
+#endif // !AARCH64
+ typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, value, k_RInfo, Rtemp, &done);
+ } else {
+ __ cbz(value, done);
+ }
+ assert_different_registers(k_RInfo, value);
+ add_debug_info_for_null_check_here(op->info_for_exception());
+ __ load_klass(k_RInfo, array);
+ __ load_klass(klass_RInfo, value);
+ __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ // check for immediate positive hit
+ __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cond_cmp(Rtemp, k_RInfo, ne);
+ __ b(*success_target, eq);
+ // check for immediate negative hit
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
+ __ b(*failure_target, ne);
+ // slow case
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ cbz(R0, *failure_target);
+ if (op->should_profile()) {
+ Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp;
+ if (mdo == value) {
+ mdo = k_RInfo;
+ recv = klass_RInfo;
+ }
+ typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, value, tmp1,
+ &profile_cast_success, &profile_cast_failure,
+ &done, stub->entry());
+ }
+ __ bind(done);
+ break;
+ }
+
+ case lir_checkcast: {
+ CodeStub* stub = op->stub();
+ Register obj = op->object()->as_register();
+ Register res = op->result_opr()->as_register();
+ Register klass_RInfo = op->tmp1()->as_register();
+ Register k_RInfo = op->tmp2()->as_register();
+ ciKlass* k = op->klass();
+ assert_different_registers(res, k_RInfo, klass_RInfo, Rtemp);
+
+ // TODO: ARM - Late binding is used to prevent confusion of register allocator
+ assert(stub->is_exception_throw_stub(), "must be");
+ ((SimpleExceptionStub*)stub)->set_obj(op->result_opr());
+
+ ciMethodData* md;
+ ciProfileData* data;
+ int mdo_offset_bias = 0;
+
+ Label done;
+
+ Label profile_cast_failure, profile_cast_success;
+ Label *failure_target = op->should_profile() ? &profile_cast_failure : op->stub()->entry();
+ Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+
+#ifdef AARCH64
+ move_regs(obj, res);
+ if (op->should_profile()) {
+ typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done);
+ } else {
+ __ cbz(obj, done);
+ }
+ if (k->is_loaded()) {
+ __ mov_metadata(k_RInfo, k->constant_encoding());
+ } else {
+ if (res != obj) {
+ op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res));
+ }
+ klass2reg_with_patching(k_RInfo, op->info_for_patch());
+ }
+ __ load_klass(klass_RInfo, res);
+
+ if (op->fast_check()) {
+ __ cmp(klass_RInfo, k_RInfo);
+ __ b(*failure_target, ne);
+ } else if (k->is_loaded()) {
+ __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset()));
+ if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) {
+ __ cmp(Rtemp, k_RInfo);
+ __ b(*failure_target, ne);
+ } else {
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cond_cmp(Rtemp, k_RInfo, ne);
+ __ b(*success_target, eq);
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ cbz(R0, *failure_target);
+ }
+ } else {
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ // check for immediate positive hit
+ __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cond_cmp(Rtemp, k_RInfo, ne);
+ __ b(*success_target, eq);
+ // check for immediate negative hit
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
+ __ b(*failure_target, ne);
+ // slow case
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ cbz(R0, *failure_target);
+ }
+
+#else // AARCH64
+
+ __ movs(res, obj);
+ if (op->should_profile()) {
+ typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done);
+ } else {
+ __ b(done, eq);
+ }
+ if (k->is_loaded()) {
+ __ mov_metadata(k_RInfo, k->constant_encoding());
+ } else if (k_RInfo != obj) {
+ klass2reg_with_patching(k_RInfo, op->info_for_patch());
+ __ movs(res, obj);
+ } else {
+ // Patching doesn't update "res" register after GC, so do patching first
+ klass2reg_with_patching(Rtemp, op->info_for_patch());
+ __ movs(res, obj);
+ __ mov(k_RInfo, Rtemp);
+ }
+ __ load_klass(klass_RInfo, res, ne);
+
+ if (op->fast_check()) {
+ __ cmp(klass_RInfo, k_RInfo, ne);
+ __ b(*failure_target, ne);
+ } else if (k->is_loaded()) {
+ __ b(*success_target, eq);
+ __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset()));
+ if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) {
+ __ cmp(Rtemp, k_RInfo);
+ __ b(*failure_target, ne);
+ } else {
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cmp(Rtemp, k_RInfo, ne);
+ __ b(*success_target, eq);
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ cbz(R0, *failure_target);
+ }
+ } else {
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ __ b(*success_target, eq);
+ // check for immediate positive hit
+ __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cmp(Rtemp, k_RInfo, ne);
+ __ b(*success_target, eq);
+ // check for immediate negative hit
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
+ __ b(*failure_target, ne);
+ // slow case
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ __ cbz(R0, *failure_target);
+ }
+#endif // AARCH64
+
+ if (op->should_profile()) {
+ Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp;
+ typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, res, tmp1,
+ &profile_cast_success, &profile_cast_failure,
+ &done, stub->entry());
+ }
+ __ bind(done);
+ break;
+ }
+
+ case lir_instanceof: {
+ Register obj = op->object()->as_register();
+ Register res = op->result_opr()->as_register();
+ Register klass_RInfo = op->tmp1()->as_register();
+ Register k_RInfo = op->tmp2()->as_register();
+ ciKlass* k = op->klass();
+ assert_different_registers(res, klass_RInfo, k_RInfo, Rtemp);
+
+ ciMethodData* md;
+ ciProfileData* data;
+ int mdo_offset_bias = 0;
+
+ Label done;
+
+ Label profile_cast_failure, profile_cast_success;
+ Label *failure_target = op->should_profile() ? &profile_cast_failure : &done;
+ Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+
+#ifdef AARCH64
+ move_regs(obj, res);
+#else
+ __ movs(res, obj);
+#endif // AARCH64
+
+ if (op->should_profile()) {
+ typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done);
+ } else {
+#ifdef AARCH64
+ __ cbz(obj, done); // If obj == NULL, res is false
+#else
+ __ b(done, eq);
+#endif // AARCH64
+ }
+
+ if (k->is_loaded()) {
+ __ mov_metadata(k_RInfo, k->constant_encoding());
+ } else {
+ op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res));
+ klass2reg_with_patching(k_RInfo, op->info_for_patch());
+ }
+ __ load_klass(klass_RInfo, res);
+
+#ifndef AARCH64
+ if (!op->should_profile()) {
+ __ mov(res, 0);
+ }
+#endif // !AARCH64
+
+ if (op->fast_check()) {
+ __ cmp(klass_RInfo, k_RInfo);
+ if (!op->should_profile()) {
+ set_instanceof_result(_masm, res, eq);
+ } else {
+ __ b(profile_cast_failure, ne);
+ }
+ } else if (k->is_loaded()) {
+ __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset()));
+ if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) {
+ __ cmp(Rtemp, k_RInfo);
+ if (!op->should_profile()) {
+ set_instanceof_result(_masm, res, eq);
+ } else {
+ __ b(profile_cast_failure, ne);
+ }
+ } else {
+ __ cmp(klass_RInfo, k_RInfo);
+ __ cond_cmp(Rtemp, k_RInfo, ne);
+ if (!op->should_profile()) {
+ set_instanceof_result(_masm, res, eq);
+ }
+ __ b(*success_target, eq);
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ if (!op->should_profile()) {
+ move_regs(R0, res);
+ } else {
+ __ cbz(R0, *failure_target);
+ }
+ }
+ } else {
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ // check for immediate positive hit
+ __ cmp(klass_RInfo, k_RInfo);
+ if (!op->should_profile()) {
+#ifdef AARCH64
+ // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp
+ __ ldr(res, Address(klass_RInfo, Rtemp));
+#else
+ __ ldr(res, Address(klass_RInfo, Rtemp), ne);
+#endif // AARCH64
+ __ cond_cmp(res, k_RInfo, ne);
+ set_instanceof_result(_masm, res, eq);
+ } else {
+#ifdef AARCH64
+ // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp
+ __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
+#else
+ __ ldr(Rtemp, Address(klass_RInfo, Rtemp), ne);
+#endif // AARCH64
+ __ cond_cmp(Rtemp, k_RInfo, ne);
+ }
+ __ b(*success_target, eq);
+ // check for immediate negative hit
+ if (op->should_profile()) {
+ __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
+ }
+ __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
+ if (!op->should_profile()) {
+#ifdef AARCH64
+ __ mov(res, 0);
+#else
+ __ mov(res, 0, ne);
+#endif // AARCH64
+ }
+ __ b(*failure_target, ne);
+ // slow case
+ assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+ if (!op->should_profile()) {
+ move_regs(R0, res);
+ }
+ if (op->should_profile()) {
+ __ cbz(R0, *failure_target);
+ }
+ }
+
+ if (op->should_profile()) {
+ Label done_ok, done_failure;
+ Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp;
+ typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, res, tmp1,
+ &profile_cast_success, &profile_cast_failure,
+ &done_ok, &done_failure);
+ __ bind(done_failure);
+ __ mov(res, 0);
+ __ b(done);
+ __ bind(done_ok);
+ __ mov(res, 1);
+ }
+ __ bind(done);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+ // if (*addr == cmpval) {
+ // *addr = newval;
+ // dest = 1;
+ // } else {
+ // dest = 0;
+ // }
+#ifdef AARCH64
+ Label retry, done;
+ Register addr = op->addr()->as_pointer_register();
+ Register cmpval = op->cmp_value()->as_pointer_register();
+ Register newval = op->new_value()->as_pointer_register();
+ Register dest = op->result_opr()->as_pointer_register();
+ assert_different_registers(dest, addr, cmpval, newval, Rtemp);
+
+ if (UseCompressedOops && op->code() == lir_cas_obj) {
+ Register tmp1 = op->tmp1()->as_pointer_register();
+ Register tmp2 = op->tmp2()->as_pointer_register();
+ assert_different_registers(dest, addr, cmpval, newval, tmp1, tmp2, Rtemp);
+ __ encode_heap_oop(tmp1, cmpval); cmpval = tmp1;
+ __ encode_heap_oop(tmp2, newval); newval = tmp2;
+ }
+
+ __ mov(dest, ZR);
+ __ bind(retry);
+ if (((op->code() == lir_cas_obj) && !UseCompressedOops) || op->code() == lir_cas_long) {
+ __ ldaxr(Rtemp, addr);
+ __ cmp(Rtemp, cmpval);
+ __ b(done, ne);
+ __ stlxr(Rtemp, newval, addr);
+ } else if (((op->code() == lir_cas_obj) && UseCompressedOops) || op->code() == lir_cas_int) {
+ __ ldaxr_w(Rtemp, addr);
+ __ cmp_w(Rtemp, cmpval);
+ __ b(done, ne);
+ __ stlxr_w(Rtemp, newval, addr);
+ } else {
+ ShouldNotReachHere();
+ }
+ __ cbnz_w(Rtemp, retry);
+ __ mov(dest, 1);
+ __ bind(done);
+#else
+ // FIXME: membar_release
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
+ if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+ Register addr = op->addr()->as_register();
+ Register cmpval = op->cmp_value()->as_register();
+ Register newval = op->new_value()->as_register();
+ Register dest = op->result_opr()->as_register();
+ assert_different_registers(dest, addr, cmpval, newval, Rtemp);
+
+ __ atomic_cas_bool(cmpval, newval, addr, 0, Rtemp); // Rtemp free by default at C1 LIR layer
+ __ mov(dest, 1, eq);
+ __ mov(dest, 0, ne);
+ } else if (op->code() == lir_cas_long) {
+ assert(VM_Version::supports_cx8(), "wrong machine");
+ Register addr = op->addr()->as_pointer_register();
+ Register cmp_value_lo = op->cmp_value()->as_register_lo();
+ Register cmp_value_hi = op->cmp_value()->as_register_hi();
+ Register new_value_lo = op->new_value()->as_register_lo();
+ Register new_value_hi = op->new_value()->as_register_hi();
+ Register dest = op->result_opr()->as_register();
+ Register tmp_lo = op->tmp1()->as_register_lo();
+ Register tmp_hi = op->tmp1()->as_register_hi();
+
+ assert_different_registers(tmp_lo, tmp_hi, cmp_value_lo, cmp_value_hi, dest, new_value_lo, new_value_hi, addr);
+ assert(tmp_hi->encoding() == tmp_lo->encoding() + 1, "non aligned register pair");
+ assert(new_value_hi->encoding() == new_value_lo->encoding() + 1, "non aligned register pair");
+ assert((tmp_lo->encoding() & 0x1) == 0, "misaligned register pair");
+ assert((new_value_lo->encoding() & 0x1) == 0, "misaligned register pair");
+ __ atomic_cas64(tmp_lo, tmp_hi, dest, cmp_value_lo, cmp_value_hi,
+ new_value_lo, new_value_hi, addr, 0);
+ } else {
+ Unimplemented();
+ }
+#endif // AARCH64
+ // FIXME: is full membar really needed instead of just membar_acquire?
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
+}
+
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+ AsmCondition acond = al;
+ AsmCondition ncond = nv;
+ if (opr1 != opr2) {
+ switch (condition) {
+ case lir_cond_equal: acond = eq; ncond = ne; break;
+ case lir_cond_notEqual: acond = ne; ncond = eq; break;
+ case lir_cond_less: acond = lt; ncond = ge; break;
+ case lir_cond_lessEqual: acond = le; ncond = gt; break;
+ case lir_cond_greaterEqual: acond = ge; ncond = lt; break;
+ case lir_cond_greater: acond = gt; ncond = le; break;
+ case lir_cond_aboveEqual: acond = hs; ncond = lo; break;
+ case lir_cond_belowEqual: acond = ls; ncond = hi; break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+#ifdef AARCH64
+
+ // TODO-AARCH64 implement it more efficiently
+
+ if (opr1->is_register()) {
+ reg2reg(opr1, result);
+ } else if (opr1->is_stack()) {
+ stack2reg(opr1, result, result->type());
+ } else if (opr1->is_constant()) {
+ const2reg(opr1, result, lir_patch_none, NULL);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ Label skip;
+ __ b(skip, acond);
+
+ if (opr2->is_register()) {
+ reg2reg(opr2, result);
+ } else if (opr2->is_stack()) {
+ stack2reg(opr2, result, result->type());
+ } else if (opr2->is_constant()) {
+ const2reg(opr2, result, lir_patch_none, NULL);
+ } else {
+ ShouldNotReachHere();
+ }
+
+ __ bind(skip);
+
+#else
+ for (;;) { // two iterations only
+ if (opr1 == result) {
+ // do nothing
+ } else if (opr1->is_single_cpu()) {
+ __ mov(result->as_register(), opr1->as_register(), acond);
+ } else if (opr1->is_double_cpu()) {
+ __ long_move(result->as_register_lo(), result->as_register_hi(),
+ opr1->as_register_lo(), opr1->as_register_hi(), acond);
+ } else if (opr1->is_single_stack()) {
+ __ ldr(result->as_register(), frame_map()->address_for_slot(opr1->single_stack_ix()), acond);
+ } else if (opr1->is_double_stack()) {
+ __ ldr(result->as_register_lo(),
+ frame_map()->address_for_slot(opr1->double_stack_ix(), lo_word_offset_in_bytes), acond);
+ __ ldr(result->as_register_hi(),
+ frame_map()->address_for_slot(opr1->double_stack_ix(), hi_word_offset_in_bytes), acond);
+ } else if (opr1->is_illegal()) {
+ // do nothing: this part of the cmove has been optimized away in the peephole optimizer
+ } else {
+ assert(opr1->is_constant(), "must be");
+ LIR_Const* c = opr1->as_constant_ptr();
+
+ switch (c->type()) {
+ case T_INT:
+ __ mov_slow(result->as_register(), c->as_jint(), acond);
+ break;
+ case T_LONG:
+ __ mov_slow(result->as_register_lo(), c->as_jint_lo(), acond);
+ __ mov_slow(result->as_register_hi(), c->as_jint_hi(), acond);
+ break;
+ case T_OBJECT:
+ __ mov_oop(result->as_register(), c->as_jobject(), 0, acond);
+ break;
+ case T_FLOAT:
+#ifdef __SOFTFP__
+ // not generated now.
+ __ mov_slow(result->as_register(), c->as_jint(), acond);
+#else
+ __ mov_float(result->as_float_reg(), c->as_jfloat(), acond);
+#endif // __SOFTFP__
+ break;
+ case T_DOUBLE:
+#ifdef __SOFTFP__
+ // not generated now.
+ __ mov_slow(result->as_register_lo(), c->as_jint_lo(), acond);
+ __ mov_slow(result->as_register_hi(), c->as_jint_hi(), acond);
+#else
+ __ mov_double(result->as_double_reg(), c->as_jdouble(), acond);
+#endif // __SOFTFP__
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ // Negate the condition and repeat the algorithm with the second operand
+ if (opr1 == opr2) { break; }
+ opr1 = opr2;
+ acond = ncond;
+ }
+#endif // AARCH64
+}
+
+#if defined(AARCH64) || defined(ASSERT)
+static int reg_size(LIR_Opr op) {
+ switch (op->type()) {
+ case T_FLOAT:
+ case T_INT: return BytesPerInt;
+ case T_LONG:
+ case T_DOUBLE: return BytesPerLong;
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_METADATA: return BytesPerWord;
+ case T_ADDRESS:
+ case T_ILLEGAL: // fall through
+ default: ShouldNotReachHere(); return -1;
+ }
+}
+#endif
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+ assert(info == NULL, "unused on this code path");
+ assert(dest->is_register(), "wrong items state");
+
+ if (right->is_address()) {
+ // special case for adding shifted/extended register
+ const Register res = dest->as_pointer_register();
+ const Register lreg = left->as_pointer_register();
+ const LIR_Address* addr = right->as_address_ptr();
+
+ assert(addr->base()->as_pointer_register() == lreg && addr->index()->is_register() && addr->disp() == 0, "must be");
+
+ int scale = addr->scale();
+ AsmShift shift = lsl;
+
+#ifdef AARCH64
+ bool is_index_extended = reg_size(addr->base()) > reg_size(addr->index());
+ if (scale < 0) {
+ scale = -scale;
+ shift = lsr;
+ }
+ assert(shift == lsl || !is_index_extended, "could not have extend and right shift in one operand");
+ assert(0 <= scale && scale <= 63, "scale is too large");
+
+ if (is_index_extended) {
+ assert(scale <= 4, "scale is too large for add with extended register");
+ assert(addr->index()->is_single_cpu(), "should be");
+ assert(addr->index()->type() == T_INT, "should be");
+ assert(dest->is_double_cpu(), "should be");
+ assert(code == lir_add, "special case of add with extended register");
+
+ __ add(res, lreg, addr->index()->as_register(), ex_sxtw, scale);
+ return;
+ } else if (reg_size(dest) == BytesPerInt) {
+ assert(reg_size(addr->base()) == reg_size(addr->index()), "should be");
+ assert(reg_size(addr->base()) == reg_size(dest), "should be");
+
+ AsmOperand operand(addr->index()->as_pointer_register(), shift, scale);
+ switch (code) {
+ case lir_add: __ add_32(res, lreg, operand); break;
+ case lir_sub: __ sub_32(res, lreg, operand); break;
+ default: ShouldNotReachHere();
+ }
+ return;
+ }
+#endif // AARCH64
+
+ assert(reg_size(addr->base()) == reg_size(addr->index()), "should be");
+ assert(reg_size(addr->base()) == reg_size(dest), "should be");
+ assert(reg_size(dest) == wordSize, "should be");
+
+ AsmOperand operand(addr->index()->as_pointer_register(), shift, scale);
+ switch (code) {
+ case lir_add: __ add(res, lreg, operand); break;
+ case lir_sub: __ sub(res, lreg, operand); break;
+ default: ShouldNotReachHere();
+ }
+
+#ifndef AARCH64
+ } else if (left->is_address()) {
+ assert(code == lir_sub && right->is_single_cpu(), "special case used by strength_reduce_multiply()");
+ const LIR_Address* addr = left->as_address_ptr();
+ const Register res = dest->as_register();
+ const Register rreg = right->as_register();
+ assert(addr->base()->as_register() == rreg && addr->index()->is_register() && addr->disp() == 0, "must be");
+ __ rsb(res, rreg, AsmOperand(addr->index()->as_register(), lsl, addr->scale()));
+#endif // !AARCH64
+
+ } else if (dest->is_single_cpu()) {
+ assert(left->is_single_cpu(), "unexpected left operand");
+#ifdef AARCH64
+ assert(dest->type() == T_INT, "unexpected dest type");
+ assert(left->type() == T_INT, "unexpected left type");
+ assert(right->type() == T_INT, "unexpected right type");
+#endif // AARCH64
+
+ const Register res = dest->as_register();
+ const Register lreg = left->as_register();
+
+ if (right->is_single_cpu()) {
+ const Register rreg = right->as_register();
+ switch (code) {
+ case lir_add: __ add_32(res, lreg, rreg); break;
+ case lir_sub: __ sub_32(res, lreg, rreg); break;
+ case lir_mul: __ mul_32(res, lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ assert(right->is_constant(), "must be");
+ const jint c = right->as_constant_ptr()->as_jint();
+ if (!Assembler::is_arith_imm_in_range(c)) {
+ BAILOUT("illegal arithmetic operand");
+ }
+ switch (code) {
+ case lir_add: __ add_32(res, lreg, c); break;
+ case lir_sub: __ sub_32(res, lreg, c); break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ } else if (dest->is_double_cpu()) {
+#ifdef AARCH64
+ assert(left->is_double_cpu() ||
+ (left->is_single_cpu() && ((left->type() == T_OBJECT) || (left->type() == T_ARRAY) || (left->type() == T_ADDRESS))),
+ "unexpected left operand");
+
+ const Register res = dest->as_register_lo();
+ const Register lreg = left->as_pointer_register();
+
+ if (right->is_constant()) {
+ assert(right->type() == T_LONG, "unexpected right type");
+ assert((right->as_constant_ptr()->as_jlong() >> 24) == 0, "out of range");
+ jint imm = (jint)right->as_constant_ptr()->as_jlong();
+ switch (code) {
+ case lir_add: __ add(res, lreg, imm); break;
+ case lir_sub: __ sub(res, lreg, imm); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ assert(right->is_double_cpu() ||
+ (right->is_single_cpu() && ((right->type() == T_OBJECT) || (right->type() == T_ARRAY) || (right->type() == T_ADDRESS))),
+ "unexpected right operand");
+ const Register rreg = right->as_pointer_register();
+ switch (code) {
+ case lir_add: __ add(res, lreg, rreg); break;
+ case lir_sub: __ sub(res, lreg, rreg); break;
+ case lir_mul: __ mul(res, lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ }
+#else // AARCH64
+ Register res_lo = dest->as_register_lo();
+ Register res_hi = dest->as_register_hi();
+ Register lreg_lo = left->as_register_lo();
+ Register lreg_hi = left->as_register_hi();
+ if (right->is_double_cpu()) {
+ Register rreg_lo = right->as_register_lo();
+ Register rreg_hi = right->as_register_hi();
+ if (res_lo == lreg_hi || res_lo == rreg_hi) {
+ res_lo = Rtemp;
+ }
+ switch (code) {
+ case lir_add:
+ __ adds(res_lo, lreg_lo, rreg_lo);
+ __ adc(res_hi, lreg_hi, rreg_hi);
+ break;
+ case lir_sub:
+ __ subs(res_lo, lreg_lo, rreg_lo);
+ __ sbc(res_hi, lreg_hi, rreg_hi);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else {
+ assert(right->is_constant(), "must be");
+ assert((right->as_constant_ptr()->as_jlong() >> 32) == 0, "out of range");
+ const jint c = (jint) right->as_constant_ptr()->as_jlong();
+ if (res_lo == lreg_hi) {
+ res_lo = Rtemp;
+ }
+ switch (code) {
+ case lir_add:
+ __ adds(res_lo, lreg_lo, c);
+ __ adc(res_hi, lreg_hi, 0);
+ break;
+ case lir_sub:
+ __ subs(res_lo, lreg_lo, c);
+ __ sbc(res_hi, lreg_hi, 0);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ move_regs(res_lo, dest->as_register_lo());
+#endif // AARCH64
+
+ } else if (dest->is_single_fpu()) {
+ assert(left->is_single_fpu(), "must be");
+ assert(right->is_single_fpu(), "must be");
+ const FloatRegister res = dest->as_float_reg();
+ const FloatRegister lreg = left->as_float_reg();
+ const FloatRegister rreg = right->as_float_reg();
+ switch (code) {
+ case lir_add: __ add_float(res, lreg, rreg); break;
+ case lir_sub: __ sub_float(res, lreg, rreg); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ mul_float(res, lreg, rreg); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ div_float(res, lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else if (dest->is_double_fpu()) {
+ assert(left->is_double_fpu(), "must be");
+ assert(right->is_double_fpu(), "must be");
+ const FloatRegister res = dest->as_double_reg();
+ const FloatRegister lreg = left->as_double_reg();
+ const FloatRegister rreg = right->as_double_reg();
+ switch (code) {
+ case lir_add: __ add_double(res, lreg, rreg); break;
+ case lir_sub: __ sub_double(res, lreg, rreg); break;
+ case lir_mul_strictfp: // fall through
+ case lir_mul: __ mul_double(res, lreg, rreg); break;
+ case lir_div_strictfp: // fall through
+ case lir_div: __ div_double(res, lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+ switch (code) {
+ case lir_abs:
+ __ abs_double(dest->as_double_reg(), value->as_double_reg());
+ break;
+ case lir_sqrt:
+ __ sqrt_double(dest->as_double_reg(), value->as_double_reg());
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+ assert(dest->is_register(), "wrong items state");
+ assert(left->is_register(), "wrong items state");
+
+ if (dest->is_single_cpu()) {
+#ifdef AARCH64
+ assert (dest->type() == T_INT, "unexpected result type");
+ assert (left->type() == T_INT, "unexpected left type");
+ assert (right->type() == T_INT, "unexpected right type");
+#endif // AARCH64
+
+ const Register res = dest->as_register();
+ const Register lreg = left->as_register();
+
+ if (right->is_single_cpu()) {
+ const Register rreg = right->as_register();
+ switch (code) {
+ case lir_logic_and: __ and_32(res, lreg, rreg); break;
+ case lir_logic_or: __ orr_32(res, lreg, rreg); break;
+ case lir_logic_xor: __ eor_32(res, lreg, rreg); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ assert(right->is_constant(), "must be");
+ const uint c = (uint)right->as_constant_ptr()->as_jint();
+ switch (code) {
+ case lir_logic_and: __ and_32(res, lreg, c); break;
+ case lir_logic_or: __ orr_32(res, lreg, c); break;
+ case lir_logic_xor: __ eor_32(res, lreg, c); break;
+ default: ShouldNotReachHere();
+ }
+ }
+ } else {
+ assert(dest->is_double_cpu(), "should be");
+ Register res_lo = dest->as_register_lo();
+
+#ifdef AARCH64
+ assert ((left->is_single_cpu() && left->is_oop_register()) || left->is_double_cpu(), "should be");
+ const Register lreg_lo = left->as_pointer_register();
+#else
+ assert (dest->type() == T_LONG, "unexpected result type");
+ assert (left->type() == T_LONG, "unexpected left type");
+ assert (right->type() == T_LONG, "unexpected right type");
+
+ const Register res_hi = dest->as_register_hi();
+ const Register lreg_lo = left->as_register_lo();
+ const Register lreg_hi = left->as_register_hi();
+#endif // AARCH64
+
+ if (right->is_register()) {
+#ifdef AARCH64
+ assert ((right->is_single_cpu() && right->is_oop_register()) || right->is_double_cpu(), "should be");
+ const Register rreg_lo = right->as_pointer_register();
+ switch (code) {
+ case lir_logic_and: __ andr(res_lo, lreg_lo, rreg_lo); break;
+ case lir_logic_or: __ orr (res_lo, lreg_lo, rreg_lo); break;
+ case lir_logic_xor: __ eor (res_lo, lreg_lo, rreg_lo); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ const Register rreg_lo = right->as_register_lo();
+ const Register rreg_hi = right->as_register_hi();
+ if (res_lo == lreg_hi || res_lo == rreg_hi) {
+ res_lo = Rtemp; // Temp register helps to avoid overlap between result and input
+ }
+ switch (code) {
+ case lir_logic_and:
+ __ andr(res_lo, lreg_lo, rreg_lo);
+ __ andr(res_hi, lreg_hi, rreg_hi);
+ break;
+ case lir_logic_or:
+ __ orr(res_lo, lreg_lo, rreg_lo);
+ __ orr(res_hi, lreg_hi, rreg_hi);
+ break;
+ case lir_logic_xor:
+ __ eor(res_lo, lreg_lo, rreg_lo);
+ __ eor(res_hi, lreg_hi, rreg_hi);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ move_regs(res_lo, dest->as_register_lo());
+#endif // AARCH64
+ } else {
+ assert(right->is_constant(), "must be");
+#ifdef AARCH64
+ const julong c = (julong)right->as_constant_ptr()->as_jlong();
+ Assembler::LogicalImmediate imm(c, false);
+ if (imm.is_encoded()) {
+ switch (code) {
+ case lir_logic_and: __ andr(res_lo, lreg_lo, imm); break;
+ case lir_logic_or: __ orr (res_lo, lreg_lo, imm); break;
+ case lir_logic_xor: __ eor (res_lo, lreg_lo, imm); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ BAILOUT("64 bit constant cannot be inlined");
+ }
+#else
+ const jint c_lo = (jint) right->as_constant_ptr()->as_jlong();
+ const jint c_hi = (jint) (right->as_constant_ptr()->as_jlong() >> 32);
+ // Case for logic_or from do_ClassIDIntrinsic()
+ if (c_hi == 0 && AsmOperand::is_rotated_imm(c_lo)) {
+ switch (code) {
+ case lir_logic_and:
+ __ andr(res_lo, lreg_lo, c_lo);
+ __ mov(res_hi, 0);
+ break;
+ case lir_logic_or:
+ __ orr(res_lo, lreg_lo, c_lo);
+ break;
+ case lir_logic_xor:
+ __ eor(res_lo, lreg_lo, c_lo);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else if (code == lir_logic_and &&
+ c_hi == -1 &&
+ (AsmOperand::is_rotated_imm(c_lo) ||
+ AsmOperand::is_rotated_imm(~c_lo))) {
+ // Another case which handles logic_and from do_ClassIDIntrinsic()
+ if (AsmOperand::is_rotated_imm(c_lo)) {
+ __ andr(res_lo, lreg_lo, c_lo);
+ } else {
+ __ bic(res_lo, lreg_lo, ~c_lo);
+ }
+ if (res_hi != lreg_hi) {
+ __ mov(res_hi, lreg_hi);
+ }
+ } else {
+ BAILOUT("64 bit constant cannot be inlined");
+ }
+#endif // AARCH64
+ }
+ }
+}
+
+
+#ifdef AARCH64
+
+void LIR_Assembler::long_compare_helper(LIR_Opr opr1, LIR_Opr opr2) {
+ assert(opr1->is_double_cpu(), "should be");
+ Register x = opr1->as_register_lo();
+
+ if (opr2->is_double_cpu()) {
+ Register y = opr2->as_register_lo();
+ __ cmp(x, y);
+
+ } else {
+ assert(opr2->is_constant(), "should be");
+ assert(opr2->as_constant_ptr()->type() == T_LONG, "long constant expected");
+ jlong c = opr2->as_jlong();
+ assert(((c >> 31) == 0) || ((c >> 31) == -1), "immediate is out of range");
+ if (c >= 0) {
+ __ cmp(x, (jint)c);
+ } else {
+ __ cmn(x, (jint)(-c));
+ }
+ }
+}
+
+#endif // AARCH64
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+ if (opr1->is_single_cpu()) {
+ if (opr2->is_constant()) {
+ switch (opr2->as_constant_ptr()->type()) {
+ case T_INT: {
+ const jint c = opr2->as_constant_ptr()->as_jint();
+ if (Assembler::is_arith_imm_in_range(c)) {
+ __ cmp_32(opr1->as_register(), c);
+ } else if (Assembler::is_arith_imm_in_range(-c)) {
+ __ cmn_32(opr1->as_register(), -c);
+ } else {
+ // This can happen when compiling lookupswitch
+ __ mov_slow(Rtemp, c);
+ __ cmp_32(opr1->as_register(), Rtemp);
+ }
+ break;
+ }
+ case T_OBJECT:
+ assert(opr2->as_constant_ptr()->as_jobject() == NULL, "cannot handle otherwise");
+ __ cmp(opr1->as_register(), 0);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else if (opr2->is_single_cpu()) {
+ if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_METADATA || opr1->type() == T_ADDRESS) {
+ assert(opr2->type() == T_OBJECT || opr2->type() == T_ARRAY || opr2->type() == T_METADATA || opr2->type() == T_ADDRESS, "incompatibe type");
+ __ cmp(opr1->as_register(), opr2->as_register());
+ } else {
+ assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_METADATA && opr2->type() != T_ADDRESS, "incompatibe type");
+ __ cmp_32(opr1->as_register(), opr2->as_register());
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (opr1->is_double_cpu()) {
+#ifdef AARCH64
+ long_compare_helper(opr1, opr2);
+#else
+ Register xlo = opr1->as_register_lo();
+ Register xhi = opr1->as_register_hi();
+ if (opr2->is_constant() && opr2->as_jlong() == 0) {
+ assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "cannot handle otherwise");
+ __ orrs(Rtemp, xlo, xhi);
+ } else if (opr2->is_register()) {
+ Register ylo = opr2->as_register_lo();
+ Register yhi = opr2->as_register_hi();
+ if (condition == lir_cond_equal || condition == lir_cond_notEqual) {
+ __ teq(xhi, yhi);
+ __ teq(xlo, ylo, eq);
+ } else {
+ __ subs(xlo, xlo, ylo);
+ __ sbcs(xhi, xhi, yhi);
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+#endif // AARCH64
+ } else if (opr1->is_single_fpu()) {
+ if (opr2->is_constant()) {
+ assert(opr2->as_jfloat() == 0.0f, "cannot handle otherwise");
+ __ cmp_zero_float(opr1->as_float_reg());
+ } else {
+ __ cmp_float(opr1->as_float_reg(), opr2->as_float_reg());
+ }
+ } else if (opr1->is_double_fpu()) {
+ if (opr2->is_constant()) {
+ assert(opr2->as_jdouble() == 0.0, "cannot handle otherwise");
+ __ cmp_zero_double(opr1->as_double_reg());
+ } else {
+ __ cmp_double(opr1->as_double_reg(), opr2->as_double_reg());
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
+ const Register res = dst->as_register();
+ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+ comp_op(lir_cond_unknown, left, right, op);
+#ifdef AARCH64
+ if (code == lir_ucmp_fd2i) { // unordered is less
+ __ cset(res, gt); // 1 if '>', else 0
+ __ csinv(res, res, ZR, ge); // previous value if '>=', else -1
+ } else {
+ __ cset(res, hi); // 1 if '>' or unordered, else 0
+ __ csinv(res, res, ZR, pl); // previous value if '>=' or unordered, else -1
+ }
+#else
+ __ fmstat();
+ if (code == lir_ucmp_fd2i) { // unordered is less
+ __ mvn(res, 0, lt);
+ __ mov(res, 1, ge);
+ } else { // unordered is greater
+ __ mov(res, 1, cs);
+ __ mvn(res, 0, cc);
+ }
+ __ mov(res, 0, eq);
+#endif // AARCH64
+
+ } else {
+ assert(code == lir_cmp_l2i, "must be");
+
+#ifdef AARCH64
+ long_compare_helper(left, right);
+
+ __ cset(res, gt); // 1 if '>', else 0
+ __ csinv(res, res, ZR, ge); // previous value if '>=', else -1
+#else
+ Label done;
+ const Register xlo = left->as_register_lo();
+ const Register xhi = left->as_register_hi();
+ const Register ylo = right->as_register_lo();
+ const Register yhi = right->as_register_hi();
+ __ cmp(xhi, yhi);
+ __ mov(res, 1, gt);
+ __ mvn(res, 0, lt);
+ __ b(done, ne);
+ __ subs(res, xlo, ylo);
+ __ mov(res, 1, hi);
+ __ mvn(res, 0, lo);
+ __ bind(done);
+#endif // AARCH64
+ }
+}
+
+
+void LIR_Assembler::align_call(LIR_Code code) {
+ // Not needed
+}
+
+
+void LIR_Assembler::call(LIR_OpJavaCall *op, relocInfo::relocType rtype) {
+ int ret_addr_offset = __ patchable_call(op->addr(), rtype);
+ assert(ret_addr_offset == __ offset(), "embedded return address not allowed");
+ add_call_info_here(op->info());
+}
+
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall *op) {
+ bool near_range = __ cache_fully_reachable();
+ address oop_address = pc();
+
+ bool use_movw = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw());
+
+ // Ricklass may contain something that is not a metadata pointer so
+ // mov_metadata can't be used
+ InlinedAddress value((address)Universe::non_oop_word());
+ InlinedAddress addr(op->addr());
+ if (use_movw) {
+#ifdef AARCH64
+ ShouldNotReachHere();
+#else
+ __ movw(Ricklass, ((unsigned int)Universe::non_oop_word()) & 0xffff);
+ __ movt(Ricklass, ((unsigned int)Universe::non_oop_word()) >> 16);
+#endif // AARCH64
+ } else {
+ // No movw/movt, must be load a pc relative value but no
+ // relocation so no metadata table to load from.
+ // Use a b instruction rather than a bl, inline constant after the
+ // branch, use a PC relative ldr to load the constant, arrange for
+ // the call to return after the constant(s).
+ __ ldr_literal(Ricklass, value);
+ }
+ __ relocate(virtual_call_Relocation::spec(oop_address));
+ if (near_range && use_movw) {
+ __ bl(op->addr());
+ } else {
+ Label call_return;
+ __ adr(LR, call_return);
+ if (near_range) {
+ __ b(op->addr());
+ } else {
+ __ indirect_jump(addr, Rtemp);
+ __ bind_literal(addr);
+ }
+ if (!use_movw) {
+ __ bind_literal(value);
+ }
+ __ bind(call_return);
+ }
+ add_call_info(code_offset(), op->info());
+}
+
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+ ShouldNotReachHere();
+}
+
+void LIR_Assembler::emit_static_call_stub() {
+ address call_pc = __ pc();
+ address stub = __ start_a_stub(call_stub_size());
+ if (stub == NULL) {
+ BAILOUT("static call stub overflow");
+ }
+
+ DEBUG_ONLY(int offset = code_offset();)
+
+ InlinedMetadata metadata_literal(NULL);
+ __ relocate(static_stub_Relocation::spec(call_pc));
+ // If not a single instruction, NativeMovConstReg::next_instruction_address()
+ // must jump over the whole following ldr_literal.
+ // (See CompiledStaticCall::set_to_interpreted())
+#ifdef ASSERT
+ address ldr_site = __ pc();
+#endif
+ __ ldr_literal(Rmethod, metadata_literal);
+ assert(nativeMovConstReg_at(ldr_site)->next_instruction_address() == __ pc(), "Fix ldr_literal or its parsing");
+ bool near_range = __ cache_fully_reachable();
+ InlinedAddress dest((address)-1);
+ if (near_range) {
+ address branch_site = __ pc();
+ __ b(branch_site); // b to self maps to special NativeJump -1 destination
+ } else {
+ __ indirect_jump(dest, Rtemp);
+ }
+ __ bind_literal(metadata_literal); // includes spec_for_immediate reloc
+ if (!near_range) {
+ __ bind_literal(dest); // special NativeJump -1 destination
+ }
+
+ assert(code_offset() - offset <= call_stub_size(), "overflow");
+ __ end_a_stub();
+}
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+ assert(exceptionOop->as_register() == Rexception_obj, "must match");
+ assert(exceptionPC->as_register() == Rexception_pc, "must match");
+ info->add_register_oop(exceptionOop);
+
+ Runtime1::StubID handle_id = compilation()->has_fpu_code() ?
+ Runtime1::handle_exception_id :
+ Runtime1::handle_exception_nofpu_id;
+ Label return_address;
+ __ adr(Rexception_pc, return_address);
+ __ call(Runtime1::entry_for(handle_id), relocInfo::runtime_call_type);
+ __ bind(return_address);
+ add_call_info_here(info); // for exception handler
+}
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+ assert(exceptionOop->as_register() == Rexception_obj, "must match");
+ __ b(_unwind_handler_entry);
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+#ifdef AARCH64
+ if (dest->is_single_cpu()) {
+ Register res = dest->as_register();
+ Register x = left->as_register();
+ Register y = count->as_register();
+ assert (dest->type() == T_INT, "unexpected result type");
+ assert (left->type() == T_INT, "unexpected left type");
+
+ switch (code) {
+ case lir_shl: __ lslv_w(res, x, y); break;
+ case lir_shr: __ asrv_w(res, x, y); break;
+ case lir_ushr: __ lsrv_w(res, x, y); break;
+ default: ShouldNotReachHere();
+ }
+ } else if (dest->is_double_cpu()) {
+ Register res = dest->as_register_lo();
+ Register x = left->as_register_lo();
+ Register y = count->as_register();
+
+ switch (code) {
+ case lir_shl: __ lslv(res, x, y); break;
+ case lir_shr: __ asrv(res, x, y); break;
+ case lir_ushr: __ lsrv(res, x, y); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+#else
+ AsmShift shift = lsl;
+ switch (code) {
+ case lir_shl: shift = lsl; break;
+ case lir_shr: shift = asr; break;
+ case lir_ushr: shift = lsr; break;
+ default: ShouldNotReachHere();
+ }
+
+ if (dest->is_single_cpu()) {
+ __ andr(Rtemp, count->as_register(), 31);
+ __ mov(dest->as_register(), AsmOperand(left->as_register(), shift, Rtemp));
+ } else if (dest->is_double_cpu()) {
+ Register dest_lo = dest->as_register_lo();
+ Register dest_hi = dest->as_register_hi();
+ Register src_lo = left->as_register_lo();
+ Register src_hi = left->as_register_hi();
+ Register Rcount = count->as_register();
+ // Resolve possible register conflicts
+ if (shift == lsl && dest_hi == src_lo) {
+ dest_hi = Rtemp;
+ } else if (shift != lsl && dest_lo == src_hi) {
+ dest_lo = Rtemp;
+ } else if (dest_lo == src_lo && dest_hi == src_hi) {
+ dest_lo = Rtemp;
+ } else if (dest_lo == Rcount || dest_hi == Rcount) {
+ Rcount = Rtemp;
+ }
+ __ andr(Rcount, count->as_register(), 63);
+ __ long_shift(dest_lo, dest_hi, src_lo, src_hi, shift, Rcount);
+ move_regs(dest_lo, dest->as_register_lo());
+ move_regs(dest_hi, dest->as_register_hi());
+ } else {
+ ShouldNotReachHere();
+ }
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+#ifdef AARCH64
+ if (dest->is_single_cpu()) {
+ assert (dest->type() == T_INT, "unexpected result type");
+ assert (left->type() == T_INT, "unexpected left type");
+ count &= 31;
+ if (count != 0) {
+ switch (code) {
+ case lir_shl: __ _lsl_w(dest->as_register(), left->as_register(), count); break;
+ case lir_shr: __ _asr_w(dest->as_register(), left->as_register(), count); break;
+ case lir_ushr: __ _lsr_w(dest->as_register(), left->as_register(), count); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ move_regs(left->as_register(), dest->as_register());
+ }
+ } else if (dest->is_double_cpu()) {
+ count &= 63;
+ if (count != 0) {
+ switch (code) {
+ case lir_shl: __ _lsl(dest->as_register_lo(), left->as_register_lo(), count); break;
+ case lir_shr: __ _asr(dest->as_register_lo(), left->as_register_lo(), count); break;
+ case lir_ushr: __ _lsr(dest->as_register_lo(), left->as_register_lo(), count); break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ move_regs(left->as_register_lo(), dest->as_register_lo());
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+
+#else
+ AsmShift shift = lsl;
+ switch (code) {
+ case lir_shl: shift = lsl; break;
+ case lir_shr: shift = asr; break;
+ case lir_ushr: shift = lsr; break;
+ default: ShouldNotReachHere();
+ }
+
+ if (dest->is_single_cpu()) {
+ count &= 31;
+ if (count != 0) {
+ __ mov(dest->as_register(), AsmOperand(left->as_register(), shift, count));
+ } else {
+ move_regs(left->as_register(), dest->as_register());
+ }
+ } else if (dest->is_double_cpu()) {
+ count &= 63;
+ if (count != 0) {
+ Register dest_lo = dest->as_register_lo();
+ Register dest_hi = dest->as_register_hi();
+ Register src_lo = left->as_register_lo();
+ Register src_hi = left->as_register_hi();
+ // Resolve possible register conflicts
+ if (shift == lsl && dest_hi == src_lo) {
+ dest_hi = Rtemp;
+ } else if (shift != lsl && dest_lo == src_hi) {
+ dest_lo = Rtemp;
+ }
+ __ long_shift(dest_lo, dest_hi, src_lo, src_hi, shift, count);
+ move_regs(dest_lo, dest->as_register_lo());
+ move_regs(dest_hi, dest->as_register_hi());
+ } else {
+ __ long_move(dest->as_register_lo(), dest->as_register_hi(),
+ left->as_register_lo(), left->as_register_hi());
+ }
+ } else {
+ ShouldNotReachHere();
+ }
+#endif // AARCH64
+}
+
+
+// Saves 4 given registers in reserved argument area.
+void LIR_Assembler::save_in_reserved_area(Register r1, Register r2, Register r3, Register r4) {
+ verify_reserved_argument_area_size(4);
+#ifdef AARCH64
+ __ stp(r1, r2, Address(SP, 0));
+ __ stp(r3, r4, Address(SP, 2*wordSize));
+#else
+ __ stmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4));
+#endif // AARCH64
+}
+
+// Restores 4 given registers from reserved argument area.
+void LIR_Assembler::restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4) {
+#ifdef AARCH64
+ __ ldp(r1, r2, Address(SP, 0));
+ __ ldp(r3, r4, Address(SP, 2*wordSize));
+#else
+ __ ldmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4), no_writeback);
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+ ciArrayKlass* default_type = op->expected_type();
+ Register src = op->src()->as_register();
+ Register src_pos = op->src_pos()->as_register();
+ Register dst = op->dst()->as_register();
+ Register dst_pos = op->dst_pos()->as_register();
+ Register length = op->length()->as_register();
+ Register tmp = op->tmp()->as_register();
+ Register tmp2 = Rtemp;
+
+ assert(src == R0 && src_pos == R1 && dst == R2 && dst_pos == R3, "code assumption");
+#ifdef AARCH64
+ assert(length == R4, "code assumption");
+#endif // AARCH64
+
+ CodeStub* stub = op->stub();
+
+ int flags = op->flags();
+ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+ if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+ // If we don't know anything or it's an object array, just go through the generic arraycopy
+ if (default_type == NULL) {
+
+ // save arguments, because they will be killed by a runtime call
+ save_in_reserved_area(R0, R1, R2, R3);
+
+#ifdef AARCH64
+ // save length argument, will be killed by a runtime call
+ __ raw_push(length, ZR);
+#else
+ // pass length argument on SP[0]
+ __ str(length, Address(SP, -2*wordSize, pre_indexed)); // 2 words for a proper stack alignment
+#endif // AARCH64
+
+ address copyfunc_addr = StubRoutines::generic_arraycopy();
+ if (copyfunc_addr == NULL) { // Use C version if stub was not generated
+ __ call(CAST_FROM_FN_PTR(address, Runtime1::arraycopy));
+ } else {
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ __ inc_counter((address)&Runtime1::_generic_arraycopystub_cnt, tmp, tmp2);
+ }
+#endif // !PRODUCT
+ // the stub is in the code cache so close enough
+ __ call(copyfunc_addr, relocInfo::runtime_call_type);
+ }
+
+#ifdef AARCH64
+ __ raw_pop(length, ZR);
+#else
+ __ add(SP, SP, 2*wordSize);
+#endif // AARCH64
+
+ __ cbz_32(R0, *stub->continuation());
+
+ if (copyfunc_addr != NULL) {
+ __ mvn_32(tmp, R0);
+ restore_from_reserved_area(R0, R1, R2, R3); // load saved arguments in slow case only
+ __ sub_32(length, length, tmp);
+ __ add_32(src_pos, src_pos, tmp);
+ __ add_32(dst_pos, dst_pos, tmp);
+ } else {
+ restore_from_reserved_area(R0, R1, R2, R3); // load saved arguments in slow case only
+ }
+
+ __ b(*stub->entry());
+
+ __ bind(*stub->continuation());
+ return;
+ }
+
+ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
+ "must be true at this point");
+ int elem_size = type2aelembytes(basic_type);
+ int shift = exact_log2(elem_size);
+
+ // Check for NULL
+ if (flags & LIR_OpArrayCopy::src_null_check) {
+ if (flags & LIR_OpArrayCopy::dst_null_check) {
+ __ cmp(src, 0);
+ __ cond_cmp(dst, 0, ne); // make one instruction shorter if both checks are needed
+ __ b(*stub->entry(), eq);
+ } else {
+ __ cbz(src, *stub->entry());
+ }
+ } else if (flags & LIR_OpArrayCopy::dst_null_check) {
+ __ cbz(dst, *stub->entry());
+ }
+
+ // If the compiler was not able to prove that exact type of the source or the destination
+ // of the arraycopy is an array type, check at runtime if the source or the destination is
+ // an instance type.
+ if (flags & LIR_OpArrayCopy::type_check) {
+ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
+ __ load_klass(tmp, dst);
+ __ ldr_u32(tmp2, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+ __ mov_slow(tmp, Klass::_lh_neutral_value);
+ __ cmp_32(tmp2, tmp);
+ __ b(*stub->entry(), ge);
+ }
+
+ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
+ __ load_klass(tmp, src);
+ __ ldr_u32(tmp2, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+ __ mov_slow(tmp, Klass::_lh_neutral_value);
+ __ cmp_32(tmp2, tmp);
+ __ b(*stub->entry(), ge);
+ }
+ }
+
+ // Check if negative
+ const int all_positive_checks = LIR_OpArrayCopy::src_pos_positive_check |
+ LIR_OpArrayCopy::dst_pos_positive_check |
+ LIR_OpArrayCopy::length_positive_check;
+ switch (flags & all_positive_checks) {
+ case LIR_OpArrayCopy::src_pos_positive_check:
+ __ branch_if_negative_32(src_pos, *stub->entry());
+ break;
+ case LIR_OpArrayCopy::dst_pos_positive_check:
+ __ branch_if_negative_32(dst_pos, *stub->entry());
+ break;
+ case LIR_OpArrayCopy::length_positive_check:
+ __ branch_if_negative_32(length, *stub->entry());
+ break;
+ case LIR_OpArrayCopy::src_pos_positive_check | LIR_OpArrayCopy::dst_pos_positive_check:
+ __ branch_if_any_negative_32(src_pos, dst_pos, tmp, *stub->entry());
+ break;
+ case LIR_OpArrayCopy::src_pos_positive_check | LIR_OpArrayCopy::length_positive_check:
+ __ branch_if_any_negative_32(src_pos, length, tmp, *stub->entry());
+ break;
+ case LIR_OpArrayCopy::dst_pos_positive_check | LIR_OpArrayCopy::length_positive_check:
+ __ branch_if_any_negative_32(dst_pos, length, tmp, *stub->entry());
+ break;
+ case all_positive_checks:
+ __ branch_if_any_negative_32(src_pos, dst_pos, length, tmp, *stub->entry());
+ break;
+ default:
+ assert((flags & all_positive_checks) == 0, "the last option");
+ }
+
+ // Range checks
+ if (flags & LIR_OpArrayCopy::src_range_check) {
+ __ ldr_s32(tmp2, Address(src, arrayOopDesc::length_offset_in_bytes()));
+ __ add_32(tmp, src_pos, length);
+ __ cmp_32(tmp, tmp2);
+ __ b(*stub->entry(), hi);
+ }
+ if (flags & LIR_OpArrayCopy::dst_range_check) {
+ __ ldr_s32(tmp2, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+ __ add_32(tmp, dst_pos, length);
+ __ cmp_32(tmp, tmp2);
+ __ b(*stub->entry(), hi);
+ }
+
+ // Check if src and dst are of the same type
+ if (flags & LIR_OpArrayCopy::type_check) {
+ // We don't know the array types are compatible
+ if (basic_type != T_OBJECT) {
+ // Simple test for basic type arrays
+ if (UseCompressedClassPointers) {
+ // We don't need decode because we just need to compare
+ __ ldr_u32(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
+ __ ldr_u32(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
+ __ cmp_32(tmp, tmp2);
+ } else {
+ __ load_klass(tmp, src);
+ __ load_klass(tmp2, dst);
+ __ cmp(tmp, tmp2);
+ }
+ __ b(*stub->entry(), ne);
+ } else {
+ // For object arrays, if src is a sub class of dst then we can
+ // safely do the copy.
+ Label cont, slow;
+
+ address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+
+ __ load_klass(tmp, src);
+ __ load_klass(tmp2, dst);
+
+ // We are at a call so all live registers are saved before we
+ // get here
+ assert_different_registers(tmp, tmp2, R6, altFP_7_11);
+
+ __ check_klass_subtype_fast_path(tmp, tmp2, R6, altFP_7_11, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL);
+
+ __ mov(R6, R0);
+ __ mov(altFP_7_11, R1);
+ __ mov(R0, tmp);
+ __ mov(R1, tmp2);
+ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); // does not blow any registers except R0, LR and Rtemp
+ __ cmp_32(R0, 0);
+ __ mov(R0, R6);
+ __ mov(R1, altFP_7_11);
+
+ if (copyfunc_addr != NULL) { // use stub if available
+ // src is not a sub class of dst so we have to do a
+ // per-element check.
+
+ __ b(cont, ne);
+
+ __ bind(slow);
+
+ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+ if ((flags & mask) != mask) {
+ // Check that at least both of them object arrays.
+ assert(flags & mask, "one of the two should be known to be an object array");
+
+ if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+ __ load_klass(tmp, src);
+ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+ __ load_klass(tmp, dst);
+ }
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+ __ ldr_u32(tmp2, Address(tmp, lh_offset));
+
+ jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+ __ mov_slow(tmp, objArray_lh);
+ __ cmp_32(tmp, tmp2);
+ __ b(*stub->entry(), ne);
+ }
+
+ save_in_reserved_area(R0, R1, R2, R3);
+
+ Register src_ptr = R0;
+ Register dst_ptr = R1;
+ Register len = R2;
+ Register chk_off = R3;
+ Register super_k = AARCH64_ONLY(R4) NOT_AARCH64(tmp);
+
+ __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+ __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift);
+
+ __ add(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+ __ add_ptr_scaled_int32(dst_ptr, dst_ptr, dst_pos, shift);
+ __ load_klass(tmp, dst);
+
+ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+
+#ifdef AARCH64
+ __ raw_push(length, ZR); // Preserve length around *copyfunc_addr call
+
+ __ mov(len, length);
+ __ ldr(super_k, Address(tmp, ek_offset)); // super_k == R4 == length, so this load cannot be performed earlier
+ // TODO-AARCH64: check whether it is faster to load super klass early by using tmp and additional mov.
+ __ ldr_u32(chk_off, Address(super_k, sco_offset));
+#else // AARCH64
+ __ ldr(super_k, Address(tmp, ek_offset));
+
+ __ mov(len, length);
+ __ ldr_u32(chk_off, Address(super_k, sco_offset));
+ __ push(super_k);
+#endif // AARCH64
+
+ __ call(copyfunc_addr, relocInfo::runtime_call_type);
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ Label failed;
+ __ cbnz_32(R0, failed);
+ __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, tmp, tmp2);
+ __ bind(failed);
+ }
+#endif // PRODUCT
+
+#ifdef AARCH64
+ __ raw_pop(length, ZR);
+#else
+ __ add(SP, SP, wordSize); // Drop super_k argument
+#endif // AARCH64
+
+ __ cbz_32(R0, *stub->continuation());
+ __ mvn_32(tmp, R0);
+
+ // load saved arguments in slow case only
+ restore_from_reserved_area(R0, R1, R2, R3);
+
+ __ sub_32(length, length, tmp);
+ __ add_32(src_pos, src_pos, tmp);
+ __ add_32(dst_pos, dst_pos, tmp);
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, tmp, tmp2);
+ }
+#endif
+
+ __ b(*stub->entry());
+
+ __ bind(cont);
+ } else {
+ __ b(*stub->entry(), eq);
+ __ bind(cont);
+ }
+ }
+ }
+
+#ifndef PRODUCT
+ if (PrintC1Statistics) {
+ address counter = Runtime1::arraycopy_count_address(basic_type);
+ __ inc_counter(counter, tmp, tmp2);
+ }
+#endif // !PRODUCT
+
+ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+ const char *name;
+ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+ Register src_ptr = R0;
+ Register dst_ptr = R1;
+ Register len = R2;
+
+ __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+ __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift);
+
+ __ add(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+ __ add_ptr_scaled_int32(dst_ptr, dst_ptr, dst_pos, shift);
+
+ __ mov(len, length);
+
+ __ call(entry, relocInfo::runtime_call_type);
+
+ __ bind(*stub->continuation());
+}
+
+#ifdef ASSERT
+ // emit run-time assertion
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+ assert(op->code() == lir_assert, "must be");
+
+#ifdef AARCH64
+ __ NOT_IMPLEMENTED();
+#else
+ if (op->in_opr1()->is_valid()) {
+ assert(op->in_opr2()->is_valid(), "both operands must be valid");
+ comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
+ } else {
+ assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
+ assert(op->condition() == lir_cond_always, "no other conditions allowed");
+ }
+
+ Label ok;
+ if (op->condition() != lir_cond_always) {
+ AsmCondition acond;
+ switch (op->condition()) {
+ case lir_cond_equal: acond = eq; break;
+ case lir_cond_notEqual: acond = ne; break;
+ case lir_cond_less: acond = lt; break;
+ case lir_cond_lessEqual: acond = le; break;
+ case lir_cond_greaterEqual: acond = ge; break;
+ case lir_cond_greater: acond = gt; break;
+ case lir_cond_aboveEqual: acond = hs; break;
+ case lir_cond_belowEqual: acond = ls; break;
+ default: ShouldNotReachHere();
+ }
+ __ b(ok, acond);
+ }
+ if (op->halt()) {
+ const char* str = __ code_string(op->msg());
+ __ stop(str);
+ } else {
+ breakpoint();
+ }
+ __ bind(ok);
+#endif // AARCH64
+}
+#endif // ASSERT
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+ fatal("CRC32 intrinsic is not implemented on this platform");
+}
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+ Register obj = op->obj_opr()->as_pointer_register();
+ Register hdr = op->hdr_opr()->as_pointer_register();
+ Register lock = op->lock_opr()->as_pointer_register();
+ Register tmp = op->scratch_opr()->is_illegal() ? noreg :
+ op->scratch_opr()->as_pointer_register();
+
+ if (!UseFastLocking) {
+ __ b(*op->stub()->entry());
+ } else if (op->code() == lir_lock) {
+ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+ int null_check_offset = __ lock_object(hdr, obj, lock, tmp, *op->stub()->entry());
+ if (op->info() != NULL) {
+ add_debug_info_for_null_check(null_check_offset, op->info());
+ }
+ } else if (op->code() == lir_unlock) {
+ __ unlock_object(hdr, obj, lock, tmp, *op->stub()->entry());
+ } else {
+ ShouldNotReachHere();
+ }
+ __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+ ciMethod* method = op->profiled_method();
+ int bci = op->profiled_bci();
+ ciMethod* callee = op->profiled_callee();
+
+ // Update counter for all call types
+ ciMethodData* md = method->method_data_or_null();
+ assert(md != NULL, "Sanity");
+ ciProfileData* data = md->bci_to_data(bci);
+ assert(data->is_CounterData(), "need CounterData for calls");
+ assert(op->mdo()->is_single_cpu(), "mdo must be allocated");
+ Register mdo = op->mdo()->as_register();
+ assert(op->tmp1()->is_register(), "tmp1 must be allocated");
+ Register tmp1 = op->tmp1()->as_pointer_register();
+ assert_different_registers(mdo, tmp1);
+ __ mov_metadata(mdo, md->constant_encoding());
+ int mdo_offset_bias = 0;
+ int max_offset = AARCH64_ONLY(4096 << LogBytesPerWord) NOT_AARCH64(4096);
+ if (md->byte_offset_of_slot(data, CounterData::count_offset()) + data->size_in_bytes() >= max_offset) {
+ // The offset is large so bias the mdo by the base of the slot so
+ // that the ldr can use an immediate offset to reference the slots of the data
+ mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
+ __ mov_slow(tmp1, mdo_offset_bias);
+ __ add(mdo, mdo, tmp1);
+ }
+
+ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+ Bytecodes::Code bc = method->java_code_at_bci(bci);
+ const bool callee_is_static = callee->is_loaded() && callee->is_static();
+ // Perform additional virtual call profiling for invokevirtual and
+ // invokeinterface bytecodes
+ if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+ !callee_is_static && // required for optimized MH invokes
+ C1ProfileVirtualCalls) {
+
+ assert(op->recv()->is_single_cpu(), "recv must be allocated");
+ Register recv = op->recv()->as_register();
+ assert_different_registers(mdo, tmp1, recv);
+ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+ ciKlass* known_klass = op->known_holder();
+ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+ // We know the type that will be seen at this call site; we can
+ // statically update the MethodData* rather than needing to do
+ // dynamic tests on the receiver type
+
+ // NOTE: we should probably put a lock around this search to
+ // avoid collisions by concurrent compilations
+ ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+ uint i;
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ ciKlass* receiver = vc_data->receiver(i);
+ if (known_klass->equals(receiver)) {
+ Address data_addr(mdo, md->byte_offset_of_slot(data,
+ VirtualCallData::receiver_count_offset(i)) -
+ mdo_offset_bias);
+ __ ldr(tmp1, data_addr);
+ __ add(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, data_addr);
+ return;
+ }
+ }
+
+ // Receiver type not found in profile data; select an empty slot
+
+ // Note that this is less efficient than it should be because it
+ // always does a write to the receiver part of the
+ // VirtualCallData rather than just the first time
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ ciKlass* receiver = vc_data->receiver(i);
+ if (receiver == NULL) {
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) -
+ mdo_offset_bias);
+ __ mov_metadata(tmp1, known_klass->constant_encoding());
+ __ str(tmp1, recv_addr);
+ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) -
+ mdo_offset_bias);
+ __ ldr(tmp1, data_addr);
+ __ add(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, data_addr);
+ return;
+ }
+ }
+ } else {
+ __ load_klass(recv, recv);
+ Label update_done;
+ type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
+ // Receiver did not match any saved receiver and there is no empty row for it.
+ // Increment total counter to indicate polymorphic case.
+ __ ldr(tmp1, counter_addr);
+ __ add(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, counter_addr);
+
+ __ bind(update_done);
+ }
+ } else {
+ // Static call
+ __ ldr(tmp1, counter_addr);
+ __ add(tmp1, tmp1, DataLayout::counter_increment);
+ __ str(tmp1, counter_addr);
+ }
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+ fatal("Type profiling not implemented on this platform");
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) {
+ Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+ Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no);
+ __ add_slow(dst->as_pointer_register(), mon_addr.base(), mon_addr.disp());
+}
+
+
+void LIR_Assembler::align_backward_branch_target() {
+ // TODO-AARCH64 review it
+ // Some ARM processors do better with 8-byte branch target alignment
+ __ align(8);
+}
+
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+
+ if (left->is_single_cpu()) {
+ assert (dest->type() == T_INT, "unexpected result type");
+ assert (left->type() == T_INT, "unexpected left type");
+ __ neg_32(dest->as_register(), left->as_register());
+ } else if (left->is_double_cpu()) {
+#ifdef AARCH64
+ __ neg(dest->as_register_lo(), left->as_register_lo());
+#else
+ Register dest_lo = dest->as_register_lo();
+ Register dest_hi = dest->as_register_hi();
+ Register src_lo = left->as_register_lo();
+ Register src_hi = left->as_register_hi();
+ if (dest_lo == src_hi) {
+ dest_lo = Rtemp;
+ }
+ __ rsbs(dest_lo, src_lo, 0);
+ __ rsc(dest_hi, src_hi, 0);
+ move_regs(dest_lo, dest->as_register_lo());
+#endif // AARCH64
+ } else if (left->is_single_fpu()) {
+ __ neg_float(dest->as_float_reg(), left->as_float_reg());
+ } else if (left->is_double_fpu()) {
+ __ neg_double(dest->as_double_reg(), left->as_double_reg());
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+ LIR_Address* addr = addr_opr->as_address_ptr();
+ if (addr->index()->is_illegal()) {
+ jint c = addr->disp();
+ if (!Assembler::is_arith_imm_in_range(c)) {
+ BAILOUT("illegal arithmetic operand");
+ }
+ __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), c);
+ } else {
+ assert(addr->disp() == 0, "cannot handle otherwise");
+#ifdef AARCH64
+ assert(addr->index()->is_double_cpu(), "should be");
+#endif // AARCH64
+ __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(),
+ AsmOperand(addr->index()->as_pointer_register(), lsl, addr->scale()));
+ }
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+ assert(!tmp->is_valid(), "don't need temporary");
+ __ call(dest);
+ if (info != NULL) {
+ add_call_info_here(info);
+ }
+}
+
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+#ifdef AARCH64
+ Unimplemented(); // TODO-AARCH64: Use stlr/ldar instructions for volatile load/store
+#else
+ assert(src->is_double_cpu() && dest->is_address() ||
+ src->is_address() && dest->is_double_cpu(),
+ "Simple move_op is called for all other cases");
+
+ int null_check_offset;
+ if (dest->is_address()) {
+ // Store
+ const LIR_Address* addr = dest->as_address_ptr();
+ const Register src_lo = src->as_register_lo();
+ const Register src_hi = src->as_register_hi();
+ assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already");
+
+ if (src_lo < src_hi) {
+ null_check_offset = __ offset();
+ __ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(src_hi));
+ } else {
+ assert(src_lo < Rtemp, "Rtemp is higher than any allocatable register");
+ __ mov(Rtemp, src_hi);
+ null_check_offset = __ offset();
+ __ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(Rtemp));
+ }
+ } else {
+ // Load
+ const LIR_Address* addr = src->as_address_ptr();
+ const Register dest_lo = dest->as_register_lo();
+ const Register dest_hi = dest->as_register_hi();
+ assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already");
+
+ null_check_offset = __ offset();
+ if (dest_lo < dest_hi) {
+ __ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(dest_hi));
+ } else {
+ assert(dest_lo < Rtemp, "Rtemp is higher than any allocatable register");
+ __ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(Rtemp));
+ __ mov(dest_hi, Rtemp);
+ }
+ }
+
+ if (info != NULL) {
+ add_debug_info_for_null_check(null_check_offset, info);
+ }
+#endif // AARCH64
+}
+
+
+void LIR_Assembler::membar() {
+ __ membar(MacroAssembler::StoreLoad, Rtemp);
+}
+
+void LIR_Assembler::membar_acquire() {
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
+}
+
+void LIR_Assembler::membar_release() {
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
+}
+
+void LIR_Assembler::membar_loadload() {
+ __ membar(MacroAssembler::LoadLoad, Rtemp);
+}
+
+void LIR_Assembler::membar_storestore() {
+ __ membar(MacroAssembler::StoreStore, Rtemp);
+}
+
+void LIR_Assembler::membar_loadstore() {
+ __ membar(MacroAssembler::LoadStore, Rtemp);
+}
+
+void LIR_Assembler::membar_storeload() {
+ __ membar(MacroAssembler::StoreLoad, Rtemp);
+}
+
+void LIR_Assembler::on_spin_wait() {
+ Unimplemented();
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+void LIR_Assembler::peephole(LIR_List* lir) {
+#ifdef AARCH64
+ return; // TODO-AARCH64 implement peephole optimizations
+#endif
+ LIR_OpList* inst = lir->instructions_list();
+ const int inst_length = inst->length();
+ for (int i = 0; i < inst_length; i++) {
+ LIR_Op* op = inst->at(i);
+ switch (op->code()) {
+ case lir_cmp: {
+ // Replace:
+ // cmp rX, y
+ // cmove [EQ] y, z, rX
+ // with
+ // cmp rX, y
+ // cmove [EQ] illegalOpr, z, rX
+ //
+ // or
+ // cmp rX, y
+ // cmove [NE] z, y, rX
+ // with
+ // cmp rX, y
+ // cmove [NE] z, illegalOpr, rX
+ //
+ // moves from illegalOpr should be removed when converting LIR to native assembly
+
+ LIR_Op2* cmp = op->as_Op2();
+ assert(cmp != NULL, "cmp LIR instruction is not an op2");
+
+ if (i + 1 < inst_length) {
+ LIR_Op2* cmove = inst->at(i + 1)->as_Op2();
+ if (cmove != NULL && cmove->code() == lir_cmove) {
+ LIR_Opr cmove_res = cmove->result_opr();
+ bool res_is_op1 = cmove_res == cmp->in_opr1();
+ bool res_is_op2 = cmove_res == cmp->in_opr2();
+ LIR_Opr cmp_res, cmp_arg;
+ if (res_is_op1) {
+ cmp_res = cmp->in_opr1();
+ cmp_arg = cmp->in_opr2();
+ } else if (res_is_op2) {
+ cmp_res = cmp->in_opr2();
+ cmp_arg = cmp->in_opr1();
+ } else {
+ cmp_res = LIR_OprFact::illegalOpr;
+ cmp_arg = LIR_OprFact::illegalOpr;
+ }
+
+ if (cmp_res != LIR_OprFact::illegalOpr) {
+ LIR_Condition cond = cmove->condition();
+ if (cond == lir_cond_equal && cmove->in_opr1() == cmp_arg) {
+ cmove->set_in_opr1(LIR_OprFact::illegalOpr);
+ } else if (cond == lir_cond_notEqual && cmove->in_opr2() == cmp_arg) {
+ cmove->set_in_opr2(LIR_OprFact::illegalOpr);
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+ Register ptr = src->as_pointer_register();
+
+ if (code == lir_xchg) {
+#ifdef AARCH64
+ if (UseCompressedOops && data->is_oop()) {
+ __ encode_heap_oop(tmp->as_pointer_register(), data->as_register());
+ }
+#endif // AARCH64
+ } else {
+ assert (!data->is_oop(), "xadd for oops");
+ }
+
+#ifndef AARCH64
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
+#endif // !AARCH64
+
+ Label retry;
+ __ bind(retry);
+
+ if ((data->type() == T_INT) || (data->is_oop() AARCH64_ONLY(&& UseCompressedOops))) {
+ Register dst = dest->as_register();
+ Register new_val = noreg;
+#ifdef AARCH64
+ __ ldaxr_w(dst, ptr);
+#else
+ __ ldrex(dst, Address(ptr));
+#endif
+ if (code == lir_xadd) {
+ Register tmp_reg = tmp->as_register();
+ if (data->is_constant()) {
+ assert_different_registers(dst, ptr, tmp_reg);
+ __ add_32(tmp_reg, dst, data->as_constant_ptr()->as_jint());
+ } else {
+ assert_different_registers(dst, ptr, tmp_reg, data->as_register());
+ __ add_32(tmp_reg, dst, data->as_register());
+ }
+ new_val = tmp_reg;
+ } else {
+ if (UseCompressedOops && data->is_oop()) {
+ new_val = tmp->as_pointer_register();
+ } else {
+ new_val = data->as_register();
+ }
+ assert_different_registers(dst, ptr, new_val);
+ }
+#ifdef AARCH64
+ __ stlxr_w(Rtemp, new_val, ptr);
+#else
+ __ strex(Rtemp, new_val, Address(ptr));
+#endif // AARCH64
+
+#ifdef AARCH64
+ } else if ((data->type() == T_LONG) || (data->is_oop() && !UseCompressedOops)) {
+ Register dst = dest->as_pointer_register();
+ Register new_val = noreg;
+ __ ldaxr(dst, ptr);
+ if (code == lir_xadd) {
+ Register tmp_reg = tmp->as_pointer_register();
+ if (data->is_constant()) {
+ assert_different_registers(dst, ptr, tmp_reg);
+ jlong c = data->as_constant_ptr()->as_jlong();
+ assert((jlong)((jint)c) == c, "overflow");
+ __ add(tmp_reg, dst, (jint)c);
+ } else {
+ assert_different_registers(dst, ptr, tmp_reg, data->as_pointer_register());
+ __ add(tmp_reg, dst, data->as_pointer_register());
+ }
+ new_val = tmp_reg;
+ } else {
+ new_val = data->as_pointer_register();
+ assert_different_registers(dst, ptr, new_val);
+ }
+ __ stlxr(Rtemp, new_val, ptr);
+#else
+ } else if (data->type() == T_LONG) {
+ Register dst_lo = dest->as_register_lo();
+ Register new_val_lo = noreg;
+ Register dst_hi = dest->as_register_hi();
+
+ assert(dst_hi->encoding() == dst_lo->encoding() + 1, "non aligned register pair");
+ assert((dst_lo->encoding() & 0x1) == 0, "misaligned register pair");
+
+ __ bind(retry);
+ __ ldrexd(dst_lo, Address(ptr));
+ if (code == lir_xadd) {
+ Register tmp_lo = tmp->as_register_lo();
+ Register tmp_hi = tmp->as_register_hi();
+
+ assert(tmp_hi->encoding() == tmp_lo->encoding() + 1, "non aligned register pair");
+ assert((tmp_lo->encoding() & 0x1) == 0, "misaligned register pair");
+
+ if (data->is_constant()) {
+ jlong c = data->as_constant_ptr()->as_jlong();
+ assert((jlong)((jint)c) == c, "overflow");
+ assert_different_registers(dst_lo, dst_hi, ptr, tmp_lo, tmp_hi);
+ __ adds(tmp_lo, dst_lo, (jint)c);
+ __ adc(tmp_hi, dst_hi, 0);
+ } else {
+ Register new_val_lo = data->as_register_lo();
+ Register new_val_hi = data->as_register_hi();
+ __ adds(tmp_lo, dst_lo, new_val_lo);
+ __ adc(tmp_hi, dst_hi, new_val_hi);
+ assert_different_registers(dst_lo, dst_hi, ptr, tmp_lo, tmp_hi, new_val_lo, new_val_hi);
+ }
+ new_val_lo = tmp_lo;
+ } else {
+ new_val_lo = data->as_register_lo();
+ Register new_val_hi = data->as_register_hi();
+
+ assert_different_registers(dst_lo, dst_hi, ptr, new_val_lo, new_val_hi);
+ assert(new_val_hi->encoding() == new_val_lo->encoding() + 1, "non aligned register pair");
+ assert((new_val_lo->encoding() & 0x1) == 0, "misaligned register pair");
+ }
+ __ strexd(Rtemp, new_val_lo, Address(ptr));
+#endif // AARCH64
+ } else {
+ ShouldNotReachHere();
+ }
+
+ __ cbnz_32(Rtemp, retry);
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
+
+#ifdef AARCH64
+ if (UseCompressedOops && data->is_oop()) {
+ __ decode_heap_oop(dest->as_register());
+ }
+#endif // AARCH64
+}
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LIRAssembler_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP
+#define CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP
+
+ private:
+
+ // Record the type of the receiver in ReceiverTypeData
+ void type_profile_helper(Register mdo, int mdo_offset_bias,
+ ciMethodData *md, ciProfileData *data,
+ Register recv, Register tmp1, Label* update_done);
+ // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+ void setup_md_access(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+
+ void typecheck_profile_helper1(ciMethod* method, int bci,
+ ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias,
+ Register obj, Register mdo, Register data_val, Label* obj_is_null);
+
+ void typecheck_profile_helper2(ciMethodData* md, ciProfileData* data, int mdo_offset_bias,
+ Register mdo, Register recv, Register value, Register tmp1,
+ Label* profile_cast_success, Label* profile_cast_failure,
+ Label* success, Label* failure);
+
+#ifdef AARCH64
+ void long_compare_helper(LIR_Opr opr1, LIR_Opr opr2);
+#endif // AARCH64
+
+ // Saves 4 given registers in reserved argument area.
+ void save_in_reserved_area(Register r1, Register r2, Register r3, Register r4);
+
+ // Restores 4 given registers from reserved argument area.
+ void restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4);
+
+ enum {
+ _call_stub_size = AARCH64_ONLY(32) NOT_AARCH64(16),
+ _call_aot_stub_size = 0,
+ _exception_handler_size = PRODUCT_ONLY(AARCH64_ONLY(256) NOT_AARCH64(68)) NOT_PRODUCT(AARCH64_ONLY(256+216) NOT_AARCH64(68+60)),
+ _deopt_handler_size = AARCH64_ONLY(32) NOT_AARCH64(16)
+ };
+
+ public:
+
+ void verify_reserved_argument_area_size(int args_count) PRODUCT_RETURN;
+
+ void store_parameter(jint c, int offset_from_sp_in_words);
+ void store_parameter(Metadata* m, int offset_from_sp_in_words);
+
+#endif // CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LIRGenerator_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1767 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_arm.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+void LIRItem::load_byte_item() {
+ load_item();
+}
+
+void LIRItem::load_nonconstant() {
+ LIR_Opr r = value()->operand();
+ if (_gen->can_inline_as_constant(value())) {
+ if (!r->is_constant()) {
+ r = LIR_OprFact::value_type(value()->type());
+ }
+ _result = r;
+ } else {
+ load_item();
+ }
+}
+
+//--------------------------------------------------------------
+// LIRGenerator
+//--------------------------------------------------------------
+
+
+LIR_Opr LIRGenerator::exceptionOopOpr() {
+ return FrameMap::Exception_oop_opr;
+}
+
+LIR_Opr LIRGenerator::exceptionPcOpr() {
+ return FrameMap::Exception_pc_opr;
+}
+
+LIR_Opr LIRGenerator::syncLockOpr() {
+ return new_register(T_INT);
+}
+
+LIR_Opr LIRGenerator::syncTempOpr() {
+ return new_register(T_OBJECT);
+}
+
+LIR_Opr LIRGenerator::getThreadTemp() {
+ return LIR_OprFact::illegalOpr;
+}
+
+LIR_Opr LIRGenerator::atomicLockOpr() {
+ return LIR_OprFact::illegalOpr;
+}
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+ LIR_Opr opr;
+ switch (type->tag()) {
+ case intTag: opr = FrameMap::Int_result_opr; break;
+ case objectTag: opr = FrameMap::Object_result_opr; break;
+ case longTag: opr = FrameMap::Long_result_opr; break;
+ case floatTag: opr = FrameMap::Float_result_opr; break;
+ case doubleTag: opr = FrameMap::Double_result_opr; break;
+ case addressTag:
+ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+ }
+ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+ return opr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+ return new_register(T_INT);
+}
+
+
+//--------- loading items into registers --------------------------------
+
+
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+#ifdef AARCH64
+ if (v->type()->as_IntConstant() != NULL) {
+ return v->type()->as_IntConstant()->value() == 0;
+ } else if (v->type()->as_LongConstant() != NULL) {
+ return v->type()->as_LongConstant()->value() == 0;
+ } else if (v->type()->as_ObjectConstant() != NULL) {
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
+ } else if (v->type()->as_FloatConstant() != NULL) {
+ return jint_cast(v->type()->as_FloatConstant()->value()) == 0;
+ } else if (v->type()->as_DoubleConstant() != NULL) {
+ return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0;
+ }
+#endif // AARCH64
+ return false;
+}
+
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+ if (v->type()->as_IntConstant() != NULL) {
+ return Assembler::is_arith_imm_in_range(v->type()->as_IntConstant()->value());
+ } else if (v->type()->as_ObjectConstant() != NULL) {
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
+#ifdef AARCH64
+ } else if (v->type()->as_LongConstant() != NULL) {
+ return Assembler::is_arith_imm_in_range(v->type()->as_LongConstant()->value());
+#else
+ } else if (v->type()->as_FloatConstant() != NULL) {
+ return v->type()->as_FloatConstant()->value() == 0.0f;
+ } else if (v->type()->as_DoubleConstant() != NULL) {
+ return v->type()->as_DoubleConstant()->value() == 0.0;
+#endif // AARCH64
+ }
+ return false;
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+ ShouldNotCallThis(); // Not used on ARM
+ return false;
+}
+
+
+#ifdef AARCH64
+
+static bool can_inline_as_constant_in_cmp(Value v) {
+ jlong constant;
+ if (v->type()->as_IntConstant() != NULL) {
+ constant = v->type()->as_IntConstant()->value();
+ } else if (v->type()->as_LongConstant() != NULL) {
+ constant = v->type()->as_LongConstant()->value();
+ } else if (v->type()->as_ObjectConstant() != NULL) {
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
+ } else if (v->type()->as_FloatConstant() != NULL) {
+ return v->type()->as_FloatConstant()->value() == 0.0f;
+ } else if (v->type()->as_DoubleConstant() != NULL) {
+ return v->type()->as_DoubleConstant()->value() == 0.0;
+ } else {
+ return false;
+ }
+
+ return Assembler::is_arith_imm_in_range(constant) || Assembler::is_arith_imm_in_range(-constant);
+}
+
+
+static bool can_inline_as_constant_in_logic(Value v) {
+ if (v->type()->as_IntConstant() != NULL) {
+ return Assembler::LogicalImmediate(v->type()->as_IntConstant()->value(), true).is_encoded();
+ } else if (v->type()->as_LongConstant() != NULL) {
+ return Assembler::LogicalImmediate(v->type()->as_LongConstant()->value(), false).is_encoded();
+ }
+ return false;
+}
+
+
+#endif // AARCH64
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+ return LIR_OprFact::illegalOpr;
+}
+
+
+static LIR_Opr make_constant(BasicType type, jlong c) {
+ switch (type) {
+ case T_ADDRESS:
+ case T_OBJECT: return LIR_OprFact::intptrConst(c);
+ case T_LONG: return LIR_OprFact::longConst(c);
+ case T_INT: return LIR_OprFact::intConst(c);
+ default: ShouldNotReachHere();
+ return LIR_OprFact::intConst(-1);
+ }
+}
+
+#ifdef AARCH64
+
+void LIRGenerator::add_constant(LIR_Opr src, jlong c, LIR_Opr dest) {
+ if (c == 0) {
+ __ move(src, dest);
+ return;
+ }
+
+ BasicType type = src->type();
+ bool is_neg = (c < 0);
+ c = ABS(c);
+
+ if ((c >> 24) == 0) {
+ for (int shift = 0; shift <= 12; shift += 12) {
+ int part = ((int)c) & (right_n_bits(12) << shift);
+ if (part != 0) {
+ if (is_neg) {
+ __ sub(src, make_constant(type, part), dest);
+ } else {
+ __ add(src, make_constant(type, part), dest);
+ }
+ src = dest;
+ }
+ }
+ } else {
+ __ move(make_constant(type, c), dest);
+ if (is_neg) {
+ __ sub(src, dest, dest);
+ } else {
+ __ add(src, dest, dest);
+ }
+ }
+}
+
+#endif // AARCH64
+
+
+void LIRGenerator::add_large_constant(LIR_Opr src, int c, LIR_Opr dest) {
+ assert(c != 0, "must be");
+#ifdef AARCH64
+ add_constant(src, c, dest);
+#else
+ // Find first non-zero bit
+ int shift = 0;
+ while ((c & (3 << shift)) == 0) {
+ shift += 2;
+ }
+ // Add the least significant part of the constant
+ int mask = 0xff << shift;
+ __ add(src, LIR_OprFact::intConst(c & mask), dest);
+ // Add up to 3 other parts of the constant;
+ // each of them can be represented as rotated_imm
+ if (c & (mask << 8)) {
+ __ add(dest, LIR_OprFact::intConst(c & (mask << 8)), dest);
+ }
+ if (c & (mask << 16)) {
+ __ add(dest, LIR_OprFact::intConst(c & (mask << 16)), dest);
+ }
+ if (c & (mask << 24)) {
+ __ add(dest, LIR_OprFact::intConst(c & (mask << 24)), dest);
+ }
+#endif // AARCH64
+}
+
+static LIR_Address* make_address(LIR_Opr base, LIR_Opr index, LIR_Address::Scale scale, BasicType type) {
+ return new LIR_Address(base, index, scale, 0, type);
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+ int shift, int disp, BasicType type) {
+ assert(base->is_register(), "must be");
+
+ if (index->is_constant()) {
+ disp += index->as_constant_ptr()->as_jint() << shift;
+ index = LIR_OprFact::illegalOpr;
+ }
+
+#ifndef AARCH64
+ if (base->type() == T_LONG) {
+ LIR_Opr tmp = new_register(T_INT);
+ __ convert(Bytecodes::_l2i, base, tmp);
+ base = tmp;
+ }
+ if (index != LIR_OprFact::illegalOpr && index->type() == T_LONG) {
+ LIR_Opr tmp = new_register(T_INT);
+ __ convert(Bytecodes::_l2i, index, tmp);
+ index = tmp;
+ }
+ // At this point base and index should be all ints and not constants
+ assert(base->is_single_cpu() && !base->is_constant(), "base should be an non-constant int");
+ assert(index->is_illegal() || (index->type() == T_INT && !index->is_constant()), "index should be an non-constant int");
+#endif
+
+ int max_disp;
+ bool disp_is_in_range;
+ bool embedded_shift;
+
+#ifdef AARCH64
+ int align = exact_log2(type2aelembytes(type, true));
+ assert((disp & right_n_bits(align)) == 0, "displacement is not aligned");
+ assert(shift == 0 || shift == align, "shift should be zero or equal to embedded align");
+ max_disp = (1 << 12) << align;
+
+ if (disp >= 0) {
+ disp_is_in_range = Assembler::is_unsigned_imm_in_range(disp, 12, align);
+ } else {
+ disp_is_in_range = Assembler::is_imm_in_range(disp, 9, 0);
+ }
+
+ embedded_shift = true;
+#else
+ switch (type) {
+ case T_BYTE:
+ case T_SHORT:
+ case T_CHAR:
+ max_disp = 256; // ldrh, ldrsb encoding has 8-bit offset
+ embedded_shift = false;
+ break;
+ case T_FLOAT:
+ case T_DOUBLE:
+ max_disp = 1024; // flds, fldd have 8-bit offset multiplied by 4
+ embedded_shift = false;
+ break;
+ case T_LONG:
+ max_disp = 4096;
+ embedded_shift = false;
+ break;
+ default:
+ max_disp = 4096; // ldr, ldrb allow 12-bit offset
+ embedded_shift = true;
+ }
+
+ disp_is_in_range = (-max_disp < disp && disp < max_disp);
+#endif // !AARCH64
+
+ if (index->is_register()) {
+ LIR_Opr tmp = new_pointer_register();
+ if (!disp_is_in_range) {
+ add_large_constant(base, disp, tmp);
+ base = tmp;
+ disp = 0;
+ }
+ LIR_Address* addr = make_address(base, index, (LIR_Address::Scale)shift, type);
+ if (disp == 0 && embedded_shift) {
+ // can use ldr/str instruction with register index
+ return addr;
+ } else {
+ LIR_Opr tmp = new_pointer_register();
+ __ add(base, LIR_OprFact::address(addr), tmp); // add with shifted/extended register
+ return new LIR_Address(tmp, disp, type);
+ }
+ }
+
+ // If the displacement is too large to be inlined into LDR instruction,
+ // generate large constant with additional sequence of ADD instructions
+ int excess_disp = disp & ~(max_disp - 1);
+ if (excess_disp != 0) {
+ LIR_Opr tmp = new_pointer_register();
+ add_large_constant(base, excess_disp, tmp);
+ base = tmp;
+ }
+ return new LIR_Address(base, disp & (max_disp - 1), type);
+}
+
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+ BasicType type, bool needs_card_mark) {
+ int base_offset = arrayOopDesc::base_offset_in_bytes(type);
+ int elem_size = type2aelembytes(type);
+
+ if (index_opr->is_constant()) {
+ int offset = base_offset + index_opr->as_constant_ptr()->as_jint() * elem_size;
+ if (needs_card_mark) {
+ LIR_Opr base_opr = new_pointer_register();
+ add_large_constant(array_opr, offset, base_opr);
+ return new LIR_Address(base_opr, (intx)0, type);
+ } else {
+ return generate_address(array_opr, offset, type);
+ }
+ } else {
+ assert(index_opr->is_register(), "must be");
+ int scale = exact_log2(elem_size);
+ if (needs_card_mark) {
+ LIR_Opr base_opr = new_pointer_register();
+ LIR_Address* addr = make_address(base_opr, index_opr, (LIR_Address::Scale)scale, type);
+ __ add(array_opr, LIR_OprFact::intptrConst(base_offset), base_opr);
+ __ add(base_opr, LIR_OprFact::address(addr), base_opr); // add with shifted/extended register
+ return new LIR_Address(base_opr, type);
+ } else {
+ return generate_address(array_opr, index_opr, scale, base_offset, type);
+ }
+ }
+}
+
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+ assert(type == T_LONG || type == T_INT, "should be");
+ LIR_Opr r = make_constant(type, x);
+#ifdef AARCH64
+ bool imm_in_range = Assembler::LogicalImmediate(x, type == T_INT).is_encoded();
+#else
+ bool imm_in_range = AsmOperand::is_rotated_imm(x);
+#endif // AARCH64
+ if (!imm_in_range) {
+ LIR_Opr tmp = new_register(type);
+ __ move(r, tmp);
+ return tmp;
+ }
+ return r;
+}
+
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+ LIR_Opr pointer = new_pointer_register();
+ __ move(LIR_OprFact::intptrConst(counter), pointer);
+ LIR_Address* addr = new LIR_Address(pointer, type);
+ increment_counter(addr, step);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+ LIR_Opr temp = new_register(addr->type());
+ __ move(addr, temp);
+ __ add(temp, make_constant(addr->type(), step), temp);
+ __ move(temp, addr);
+}
+
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+ __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info);
+ __ cmp(condition, FrameMap::LR_opr, c);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+ __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info);
+ __ cmp(condition, reg, FrameMap::LR_opr);
+}
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+ assert(left != result, "should be different registers");
+ if (is_power_of_2(c + 1)) {
+#ifdef AARCH64
+ __ shift_left(left, log2_intptr(c + 1), result);
+ __ sub(result, left, result);
+#else
+ LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c + 1);
+ LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT);
+ __ sub(LIR_OprFact::address(addr), left, result); // rsb with shifted register
+#endif // AARCH64
+ return true;
+ } else if (is_power_of_2(c - 1)) {
+ LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c - 1);
+ LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT);
+ __ add(left, LIR_OprFact::address(addr), result); // add with shifted register
+ return true;
+ }
+ return false;
+}
+
+
+void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp) {
+ assert(item->type() == T_INT, "other types are not expected");
+ __ store(item, new LIR_Address(FrameMap::SP_opr, in_bytes(offset_from_sp), item->type()));
+}
+
+void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) {
+ assert(CardTableModRefBS::dirty_card_val() == 0,
+ "Cannot use ZR register (aarch64) or the register containing the card table base address directly (aarch32) otherwise");
+#ifdef AARCH64
+ // AARCH64 has a register that is constant zero. We can use that one to set the
+ // value in the card table to dirty.
+ __ move(FrameMap::ZR_opr, card_addr);
+#else // AARCH64
+ CardTableModRefBS* ct = (CardTableModRefBS*)_bs;
+ if(((intx)ct->byte_map_base & 0xff) == 0) {
+ // If the card table base address is aligned to 256 bytes, we can use the register
+ // that contains the card_table_base_address.
+ __ move(value, card_addr);
+ } else {
+ // Otherwise we need to create a register containing that value.
+ LIR_Opr tmp_zero = new_register(T_INT);
+ __ move(LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val()), tmp_zero);
+ __ move(tmp_zero, card_addr);
+ }
+#endif // AARCH64
+}
+
+void LIRGenerator::CardTableModRef_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base) {
+ assert(addr->is_register(), "must be a register at this point");
+
+ LIR_Opr tmp = FrameMap::LR_ptr_opr;
+
+ // TODO-AARCH64: check performance
+ bool load_card_table_base_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw());
+ if (load_card_table_base_const) {
+ __ move((LIR_Opr)card_table_base, tmp);
+ } else {
+ __ move(new LIR_Address(FrameMap::Rthread_opr, in_bytes(JavaThread::card_table_base_offset()), T_ADDRESS), tmp);
+ }
+
+#ifdef AARCH64
+ LIR_Address* shifted_reg_operand = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTableModRefBS::card_shift, 0, T_BYTE);
+ LIR_Opr tmp2 = tmp;
+ __ add(tmp, LIR_OprFact::address(shifted_reg_operand), tmp2); // tmp2 = tmp + (addr >> CardTableModRefBS::card_shift)
+ LIR_Address* card_addr = new LIR_Address(tmp2, T_BYTE);
+#else
+ // Use unsigned type T_BOOLEAN here rather than (signed) T_BYTE since signed load
+ // byte instruction does not support the addressing mode we need.
+ LIR_Address* card_addr = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTableModRefBS::card_shift, 0, T_BOOLEAN);
+#endif
+ if (UseCondCardMark) {
+ if (UseConcMarkSweepGC) {
+ __ membar_storeload();
+ }
+ LIR_Opr cur_value = new_register(T_INT);
+ __ move(card_addr, cur_value);
+
+ LabelObj* L_already_dirty = new LabelObj();
+ __ cmp(lir_cond_equal, cur_value, LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val()));
+ __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label());
+ set_card(tmp, card_addr);
+ __ branch_destination(L_already_dirty->label());
+ } else {
+ if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+ __ membar_storestore();
+ }
+ set_card(tmp, card_addr);
+ }
+}
+
+//----------------------------------------------------------------------
+// visitor functions
+//----------------------------------------------------------------------
+
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+ assert(x->is_pinned(),"");
+ bool needs_range_check = x->compute_needs_range_check();
+ bool use_length = x->length() != NULL;
+ bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+ bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+ !get_jobject_constant(x->value())->is_null_object() ||
+ x->should_profile());
+
+ LIRItem array(x->array(), this);
+ LIRItem index(x->index(), this);
+ LIRItem value(x->value(), this);
+ LIRItem length(this);
+
+ array.load_item();
+ index.load_nonconstant();
+
+ if (use_length && needs_range_check) {
+ length.set_instruction(x->length());
+ length.load_item();
+ }
+ if (needs_store_check || x->check_boolean()) {
+ value.load_item();
+ } else {
+ value.load_for_store(x->elt_type());
+ }
+
+ set_no_result(x);
+
+ // the CodeEmitInfo must be duplicated for each different
+ // LIR-instruction because spilling can occur anywhere between two
+ // instructions and so the debug information must be different
+ CodeEmitInfo* range_check_info = state_for(x);
+ CodeEmitInfo* null_check_info = NULL;
+ if (x->needs_null_check()) {
+ null_check_info = new CodeEmitInfo(range_check_info);
+ }
+
+ // emit array address setup early so it schedules better
+ LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+ if (GenerateRangeChecks && needs_range_check) {
+ if (use_length) {
+ __ cmp(lir_cond_belowEqual, length.result(), index.result());
+ __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+ } else {
+ array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+ // range_check also does the null check
+ null_check_info = NULL;
+ }
+ }
+
+ if (GenerateArrayStoreCheck && needs_store_check) {
+ LIR_Opr tmp1 = FrameMap::R0_oop_opr;
+ LIR_Opr tmp2 = FrameMap::R1_oop_opr;
+ CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+ __ store_check(value.result(), array.result(), tmp1, tmp2,
+ LIR_OprFact::illegalOpr, store_check_info,
+ x->profiled_method(), x->profiled_bci());
+ }
+
+#if INCLUDE_ALL_GCS
+ if (obj_store) {
+ // Needs GC write barriers.
+ pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ }
+#endif // INCLUDE_ALL_GCS
+
+ LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
+ __ move(result, array_addr, null_check_info);
+ if (obj_store) {
+ post_barrier(LIR_OprFact::address(array_addr), value.result());
+ }
+}
+
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+ assert(x->is_pinned(),"");
+ LIRItem obj(x->obj(), this);
+ obj.load_item();
+ set_no_result(x);
+
+ LIR_Opr lock = new_pointer_register();
+ LIR_Opr hdr = new_pointer_register();
+
+ // Need a scratch register for biased locking on arm
+ LIR_Opr scratch = LIR_OprFact::illegalOpr;
+ if(UseBiasedLocking) {
+ scratch = new_pointer_register();
+ } else {
+ scratch = atomicLockOpr();
+ }
+
+ CodeEmitInfo* info_for_exception = NULL;
+ if (x->needs_null_check()) {
+ info_for_exception = state_for(x);
+ }
+
+ CodeEmitInfo* info = state_for(x, x->state(), true);
+ monitor_enter(obj.result(), lock, hdr, scratch,
+ x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+ assert(x->is_pinned(),"");
+ LIRItem obj(x->obj(), this);
+ obj.dont_load_item();
+ set_no_result(x);
+
+ LIR_Opr obj_temp = new_pointer_register();
+ LIR_Opr lock = new_pointer_register();
+ LIR_Opr hdr = new_pointer_register();
+
+ monitor_exit(obj_temp, lock, hdr, atomicLockOpr(), x->monitor_no());
+}
+
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+#ifdef __SOFTFP__
+ address runtime_func = NULL;
+ ValueTag tag = x->type()->tag();
+ if (tag == floatTag) {
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fneg);
+ } else if (tag == doubleTag) {
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dneg);
+ }
+ if (runtime_func != NULL) {
+ set_result(x, call_runtime(x->x(), runtime_func, x->type(), NULL));
+ return;
+ }
+#endif // __SOFTFP__
+ LIRItem value(x->x(), this);
+ value.load_item();
+ LIR_Opr reg = rlock_result(x);
+ __ negate(value.result(), reg);
+}
+
+
+// for _fadd, _fmul, _fsub, _fdiv, _frem
+// _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+ address runtime_func;
+ switch (x->op()) {
+ case Bytecodes::_frem:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+ break;
+ case Bytecodes::_drem:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+ break;
+#ifdef __SOFTFP__
+ // Call function compiled with -msoft-float.
+
+ // __aeabi_XXXX_glibc: Imported code from glibc soft-fp bundle for calculation accuracy improvement. See CR 6757269.
+
+ case Bytecodes::_fadd:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fadd_glibc);
+ break;
+ case Bytecodes::_fmul:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fmul);
+ break;
+ case Bytecodes::_fsub:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fsub_glibc);
+ break;
+ case Bytecodes::_fdiv:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fdiv);
+ break;
+ case Bytecodes::_dadd:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dadd_glibc);
+ break;
+ case Bytecodes::_dmul:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dmul);
+ break;
+ case Bytecodes::_dsub:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dsub_glibc);
+ break;
+ case Bytecodes::_ddiv:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_ddiv);
+ break;
+ default:
+ ShouldNotReachHere();
+#else // __SOFTFP__
+ default: {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ left.load_item();
+ right.load_item();
+ rlock_result(x);
+ arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result(), x->is_strictfp());
+ return;
+ }
+#endif // __SOFTFP__
+ }
+
+ LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, x->type(), NULL);
+ set_result(x, result);
+}
+
+
+void LIRGenerator::make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info) {
+ assert(right_arg->is_register(), "must be");
+ __ cmp(lir_cond_equal, right_arg, make_constant(type, 0));
+ __ branch(lir_cond_equal, type, new DivByZeroStub(info));
+}
+
+
+// for _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+ CodeEmitInfo* info = NULL;
+ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+ info = state_for(x);
+ }
+
+#ifdef AARCH64
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ LIRItem* left_arg = &left;
+ LIRItem* right_arg = &right;
+
+ // Test if instr is commutative and if we should swap
+ if (x->is_commutative() && left.is_constant()) {
+ left_arg = &right;
+ right_arg = &left;
+ }
+
+ left_arg->load_item();
+ switch (x->op()) {
+ case Bytecodes::_ldiv:
+ right_arg->load_item();
+ make_div_by_zero_check(right_arg->result(), T_LONG, info);
+ __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL);
+ break;
+
+ case Bytecodes::_lrem: {
+ right_arg->load_item();
+ make_div_by_zero_check(right_arg->result(), T_LONG, info);
+ // a % b is implemented with 2 instructions:
+ // tmp = a/b (sdiv)
+ // res = a - b*tmp (msub)
+ LIR_Opr tmp = FrameMap::as_long_opr(Rtemp);
+ __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL);
+ break;
+ }
+
+ case Bytecodes::_lmul:
+ if (right_arg->is_constant() && is_power_of_2_long(right_arg->get_jlong_constant())) {
+ right_arg->dont_load_item();
+ __ shift_left(left_arg->result(), exact_log2_long(right_arg->get_jlong_constant()), rlock_result(x));
+ } else {
+ right_arg->load_item();
+ __ mul(left_arg->result(), right_arg->result(), rlock_result(x));
+ }
+ break;
+
+ case Bytecodes::_ladd:
+ case Bytecodes::_lsub:
+ if (right_arg->is_constant()) {
+ jlong c = right_arg->get_jlong_constant();
+ add_constant(left_arg->result(), (x->op() == Bytecodes::_ladd) ? c : -c, rlock_result(x));
+ } else {
+ right_arg->load_item();
+ arithmetic_op_long(x->op(), rlock_result(x), left_arg->result(), right_arg->result(), NULL);
+ }
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+#else
+ switch (x->op()) {
+ case Bytecodes::_ldiv:
+ case Bytecodes::_lrem: {
+ LIRItem right(x->y(), this);
+ right.load_item();
+ make_div_by_zero_check(right.result(), T_LONG, info);
+ }
+ // Fall through
+ case Bytecodes::_lmul: {
+ address entry;
+ switch (x->op()) {
+ case Bytecodes::_lrem:
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
+ break;
+ case Bytecodes::_ldiv:
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
+ break;
+ case Bytecodes::_lmul:
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::lmul);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ LIR_Opr result = call_runtime(x->y(), x->x(), entry, x->type(), NULL);
+ set_result(x, result);
+ break;
+ }
+ case Bytecodes::_ladd:
+ case Bytecodes::_lsub: {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ left.load_item();
+ right.load_item();
+ rlock_result(x);
+ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+#endif // AARCH64
+}
+
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+ bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem;
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ LIRItem* left_arg = &left;
+ LIRItem* right_arg = &right;
+
+ // Test if instr is commutative and if we should swap
+ if (x->is_commutative() && left.is_constant()) {
+ left_arg = &right;
+ right_arg = &left;
+ }
+
+ if (is_div_rem) {
+ CodeEmitInfo* info = state_for(x);
+ if (x->op() == Bytecodes::_idiv && right_arg->is_constant() && is_power_of_2(right_arg->get_jint_constant())) {
+ left_arg->load_item();
+ right_arg->dont_load_item();
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+ LIR_Opr result = rlock_result(x);
+ __ idiv(left_arg->result(), right_arg->result(), result, tmp, info);
+ } else {
+#ifdef AARCH64
+ left_arg->load_item();
+ right_arg->load_item();
+ make_div_by_zero_check(right_arg->result(), T_INT, info);
+ if (x->op() == Bytecodes::_idiv) {
+ __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL);
+ } else {
+ // a % b is implemented with 2 instructions:
+ // tmp = a/b (sdiv)
+ // res = a - b*tmp (msub)
+ LIR_Opr tmp = FrameMap::as_opr(Rtemp);
+ __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL);
+ }
+#else
+ left_arg->load_item_force(FrameMap::R0_opr);
+ right_arg->load_item_force(FrameMap::R2_opr);
+ LIR_Opr tmp = FrameMap::R1_opr;
+ LIR_Opr result = rlock_result(x);
+ LIR_Opr out_reg;
+ if (x->op() == Bytecodes::_irem) {
+ out_reg = FrameMap::R0_opr;
+ __ irem(left_arg->result(), right_arg->result(), out_reg, tmp, info);
+ } else if (x->op() == Bytecodes::_idiv) {
+ out_reg = FrameMap::R1_opr;
+ __ idiv(left_arg->result(), right_arg->result(), out_reg, tmp, info);
+ }
+ __ move(out_reg, result);
+#endif // AARCH64
+ }
+
+#ifdef AARCH64
+ } else if (((x->op() == Bytecodes::_iadd) || (x->op() == Bytecodes::_isub)) && right_arg->is_constant()) {
+ left_arg->load_item();
+ jint c = right_arg->get_jint_constant();
+ right_arg->dont_load_item();
+ add_constant(left_arg->result(), (x->op() == Bytecodes::_iadd) ? c : -c, rlock_result(x));
+#endif // AARCH64
+
+ } else {
+ left_arg->load_item();
+ if (x->op() == Bytecodes::_imul && right_arg->is_constant()) {
+ int c = right_arg->get_jint_constant();
+ if (c > 0 && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
+ right_arg->dont_load_item();
+ } else {
+ right_arg->load_item();
+ }
+ } else {
+ AARCH64_ONLY(assert(!right_arg->is_constant(), "constant right_arg is already handled by this moment");)
+ right_arg->load_nonconstant();
+ }
+ rlock_result(x);
+ assert(right_arg->is_constant() || right_arg->is_register(), "wrong state of right");
+ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), NULL);
+ }
+}
+
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+ ValueTag tag = x->type()->tag();
+ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+ switch (tag) {
+ case floatTag:
+ case doubleTag: do_ArithmeticOp_FPU(x); return;
+ case longTag: do_ArithmeticOp_Long(x); return;
+ case intTag: do_ArithmeticOp_Int(x); return;
+ }
+ ShouldNotReachHere();
+}
+
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+ LIRItem value(x->x(), this);
+ LIRItem count(x->y(), this);
+
+#ifndef AARCH64
+ if (value.type()->is_long()) {
+ count.set_destroys_register();
+ }
+#endif // !AARCH64
+
+ if (count.is_constant()) {
+ assert(count.type()->as_IntConstant() != NULL, "should be");
+ count.dont_load_item();
+ } else {
+ count.load_item();
+ }
+ value.load_item();
+
+ LIR_Opr res = rlock_result(x);
+ shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr);
+}
+
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+
+ left.load_item();
+
+#ifdef AARCH64
+ if (right.is_constant() && can_inline_as_constant_in_logic(right.value())) {
+ right.dont_load_item();
+ } else {
+ right.load_item();
+ }
+#else
+ right.load_nonconstant();
+#endif // AARCH64
+
+ logic_op(x->op(), rlock_result(x), left.result(), right.result());
+}
+
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+#ifdef __SOFTFP__
+ address runtime_func;
+ switch (x->op()) {
+ case Bytecodes::_fcmpl:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl);
+ break;
+ case Bytecodes::_fcmpg:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg);
+ break;
+ case Bytecodes::_dcmpl:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl);
+ break;
+ case Bytecodes::_dcmpg:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg);
+ break;
+ case Bytecodes::_lcmp: {
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ left.load_item();
+ right.load_nonconstant();
+ LIR_Opr reg = rlock_result(x);
+ __ lcmp2int(left.result(), right.result(), reg);
+ return;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+ LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, x->type(), NULL);
+ set_result(x, result);
+#else // __SOFTFP__
+ LIRItem left(x->x(), this);
+ LIRItem right(x->y(), this);
+ left.load_item();
+
+#ifdef AARCH64
+ if (right.is_constant() && can_inline_as_constant_in_cmp(right.value())) {
+ right.dont_load_item();
+ } else {
+ right.load_item();
+ }
+#else
+ right.load_nonconstant();
+#endif // AARCH64
+
+ LIR_Opr reg = rlock_result(x);
+
+ if (x->x()->type()->is_float_kind()) {
+ Bytecodes::Code code = x->op();
+ __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+ } else if (x->x()->type()->tag() == longTag) {
+ __ lcmp2int(left.result(), right.result(), reg);
+ } else {
+ ShouldNotReachHere();
+ }
+#endif // __SOFTFP__
+}
+
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+ assert(x->number_of_arguments() == 4, "wrong type");
+ LIRItem obj (x->argument_at(0), this); // object
+ LIRItem offset(x->argument_at(1), this); // offset of field
+ LIRItem cmp (x->argument_at(2), this); // value to compare with field
+ LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp
+
+ LIR_Opr addr = new_pointer_register();
+ LIR_Opr tmp1 = LIR_OprFact::illegalOpr;
+ LIR_Opr tmp2 = LIR_OprFact::illegalOpr;
+
+ // get address of field
+ obj.load_item();
+ offset.load_item();
+ cmp.load_item();
+ val.load_item();
+
+ __ add(obj.result(), offset.result(), addr);
+ LIR_Opr result = rlock_result(x);
+
+ if (type == objectType) {
+#if INCLUDE_ALL_GCS
+ // Do the pre-write barrier, if any.
+ pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+#endif // INCLUDE_ALL_GCS
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ tmp1 = new_pointer_register();
+ tmp2 = new_pointer_register();
+ }
+#endif // AARCH64
+ __ cas_obj(addr, cmp.result(), val.result(), tmp1, tmp2, result);
+ post_barrier(addr, val.result());
+ }
+ else if (type == intType) {
+ __ cas_int(addr, cmp.result(), val.result(), tmp1, tmp1, result);
+ }
+ else if (type == longType) {
+#ifndef AARCH64
+ tmp1 = new_register(T_LONG);
+#endif // !AARCH64
+ __ cas_long(addr, cmp.result(), val.result(), tmp1, tmp2, result);
+ }
+ else {
+ ShouldNotReachHere();
+ }
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+ address runtime_func;
+ switch (x->id()) {
+ case vmIntrinsics::_dabs: {
+#ifdef __SOFTFP__
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dabs);
+ break;
+#else
+ assert(x->number_of_arguments() == 1, "wrong type");
+ LIRItem value(x->argument_at(0), this);
+ value.load_item();
+ __ abs(value.result(), rlock_result(x), LIR_OprFact::illegalOpr);
+ return;
+#endif // __SOFTFP__
+ }
+ case vmIntrinsics::_dsqrt: {
+#ifdef __SOFTFP__
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
+ break;
+#else
+ assert(x->number_of_arguments() == 1, "wrong type");
+ LIRItem value(x->argument_at(0), this);
+ value.load_item();
+ __ sqrt(value.result(), rlock_result(x), LIR_OprFact::illegalOpr);
+ return;
+#endif // __SOFTFP__
+ }
+ case vmIntrinsics::_dsin:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+ break;
+ case vmIntrinsics::_dcos:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+ break;
+ case vmIntrinsics::_dtan:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+ break;
+ case vmIntrinsics::_dlog:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+ break;
+ case vmIntrinsics::_dlog10:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+ break;
+ case vmIntrinsics::_dexp:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+ break;
+ case vmIntrinsics::_dpow:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+ break;
+ default:
+ ShouldNotReachHere();
+ return;
+ }
+
+ LIR_Opr result;
+ if (x->number_of_arguments() == 1) {
+ result = call_runtime(x->argument_at(0), runtime_func, x->type(), NULL);
+ } else {
+ assert(x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow, "unexpected intrinsic");
+ result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_func, x->type(), NULL);
+ }
+ set_result(x, result);
+}
+
+void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
+ fatal("FMA intrinsic is not implemented on this platform");
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+ fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+ CodeEmitInfo* info = state_for(x, x->state());
+ assert(x->number_of_arguments() == 5, "wrong type");
+ LIRItem src(x->argument_at(0), this);
+ LIRItem src_pos(x->argument_at(1), this);
+ LIRItem dst(x->argument_at(2), this);
+ LIRItem dst_pos(x->argument_at(3), this);
+ LIRItem length(x->argument_at(4), this);
+
+ // We put arguments into the same registers which are used for a Java call.
+ // Note: we used fixed registers for all arguments because all registers
+ // are caller-saved, so register allocator treats them all as used.
+ src.load_item_force (FrameMap::R0_oop_opr);
+ src_pos.load_item_force(FrameMap::R1_opr);
+ dst.load_item_force (FrameMap::R2_oop_opr);
+ dst_pos.load_item_force(FrameMap::R3_opr);
+ length.load_item_force (FrameMap::R4_opr);
+ LIR_Opr tmp = (FrameMap::R5_opr);
+ set_no_result(x);
+
+ int flags;
+ ciArrayKlass* expected_type;
+ arraycopy_helper(x, &flags, &expected_type);
+ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(),
+ tmp, expected_type, flags, info);
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+ fatal("CRC32 intrinsic is not implemented on this platform");
+}
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ Unimplemented();
+}
+
+void LIRGenerator::do_Convert(Convert* x) {
+ address runtime_func;
+ switch (x->op()) {
+#ifndef AARCH64
+ case Bytecodes::_l2f:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
+ break;
+ case Bytecodes::_l2d:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2d);
+ break;
+ case Bytecodes::_f2l:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::f2l);
+ break;
+ case Bytecodes::_d2l:
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2l);
+ break;
+#ifdef __SOFTFP__
+ case Bytecodes::_f2d:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_f2d);
+ break;
+ case Bytecodes::_d2f:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_d2f);
+ break;
+ case Bytecodes::_i2f:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_i2f);
+ break;
+ case Bytecodes::_i2d:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_i2d);
+ break;
+ case Bytecodes::_f2i:
+ runtime_func = CAST_FROM_FN_PTR(address, __aeabi_f2iz);
+ break;
+ case Bytecodes::_d2i:
+ // This is implemented in hard float in assembler on arm but a call
+ // on other platforms.
+ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2i);
+ break;
+#endif // __SOFTFP__
+#endif // !AARCH64
+ default: {
+ LIRItem value(x->value(), this);
+ value.load_item();
+ LIR_Opr reg = rlock_result(x);
+ __ convert(x->op(), value.result(), reg, NULL);
+ return;
+ }
+ }
+
+ LIR_Opr result = call_runtime(x->value(), runtime_func, x->type(), NULL);
+ set_result(x, result);
+}
+
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+ print_if_not_loaded(x);
+
+ CodeEmitInfo* info = state_for(x, x->state());
+ LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewInstanceStub::emit_code
+ LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewInstanceStub::emit_code
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = FrameMap::LR_oop_opr;
+
+ new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3,
+ LIR_OprFact::illegalOpr, klass_reg, info);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+ // Evaluate state_for() first, because it can emit code
+ // with the same fixed registers that are used here (R1, R2)
+ CodeEmitInfo* info = state_for(x, x->state());
+ LIRItem length(x->length(), this);
+
+ length.load_item_force(FrameMap::R2_opr); // R2 is required by runtime call in NewTypeArrayStub::emit_code
+ LIR_Opr len = length.result();
+
+ LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewTypeArrayStub::emit_code
+ LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewTypeArrayStub::emit_code
+
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = FrameMap::LR_oop_opr;
+ LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+
+ BasicType elem_type = x->elt_type();
+ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+ // Evaluate state_for() first, because it can emit code
+ // with the same fixed registers that are used here (R1, R2)
+ CodeEmitInfo* info = state_for(x, x->state());
+ LIRItem length(x->length(), this);
+
+ length.load_item_force(FrameMap::R2_opr); // R2 is required by runtime call in NewObjectArrayStub::emit_code
+ LIR_Opr len = length.result();
+
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for(x, x->state_before());
+ }
+
+ LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewObjectArrayStub::emit_code
+ LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewObjectArrayStub::emit_code
+
+ LIR_Opr tmp1 = new_register(objectType);
+ LIR_Opr tmp2 = new_register(objectType);
+ LIR_Opr tmp3 = FrameMap::LR_oop_opr;
+ LIR_Opr tmp4 = LIR_OprFact::illegalOpr;
+
+ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+ ciMetadata* obj = ciObjArrayKlass::make(x->klass());
+ if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+ }
+ klass2reg_with_patching(klass_reg, obj, patching_info);
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+ Values* dims = x->dims();
+ int i = dims->length();
+ LIRItemList* items = new LIRItemList(i, i, NULL);
+ while (i-- > 0) {
+ LIRItem* size = new LIRItem(dims->at(i), this);
+ items->at_put(i, size);
+ }
+
+ // Need to get the info before, as the items may become invalid through item_free
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for(x, x->state_before());
+
+ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+ // clone all handlers (NOTE: Usually this is handled transparently
+ // by the CodeEmitInfo cloning logic in CodeStub constructors but
+ // is done explicitly here because a stub isn't being used).
+ x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+ }
+
+ i = dims->length();
+ while (i-- > 0) {
+ LIRItem* size = items->at(i);
+ size->load_item();
+ LIR_Opr sz = size->result();
+ assert(sz->type() == T_INT, "should be");
+ store_stack_parameter(sz, in_ByteSize(i * BytesPerInt));
+ }
+
+ CodeEmitInfo* info = state_for(x, x->state());
+ LIR_Opr klass_reg = FrameMap::R0_metadata_opr;
+ klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+ LIR_Opr rank = FrameMap::R2_opr;
+ __ move(LIR_OprFact::intConst(x->rank()), rank);
+ LIR_Opr varargs = FrameMap::SP_opr;
+ LIR_OprList* args = new LIR_OprList(3);
+ args->append(klass_reg);
+ args->append(rank);
+ args->append(varargs);
+ LIR_Opr reg = result_register_for(x->type());
+ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+ LIR_OprFact::illegalOpr, reg, args, info);
+
+ LIR_Opr result = rlock_result(x);
+ __ move(reg, result);
+}
+
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+ // nothing to do for now
+}
+
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+ LIRItem obj(x->obj(), this);
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+ patching_info = state_for(x, x->state_before());
+ }
+
+ obj.load_item();
+
+ CodeEmitInfo* info_for_exception = state_for(x);
+ CodeStub* stub;
+ if (x->is_incompatible_class_change_check()) {
+ assert(patching_info == NULL, "can't patch this");
+ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+ LIR_OprFact::illegalOpr, info_for_exception);
+ } else {
+ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
+ LIR_OprFact::illegalOpr, info_for_exception);
+ }
+
+ LIR_Opr out_reg = rlock_result(x);
+ LIR_Opr tmp1 = FrameMap::R0_oop_opr;
+ LIR_Opr tmp2 = FrameMap::R1_oop_opr;
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+
+ __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, x->direct_compare(),
+ info_for_exception, patching_info, stub, x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+ LIRItem obj(x->obj(), this);
+ CodeEmitInfo* patching_info = NULL;
+ if (!x->klass()->is_loaded() || PatchALot) {
+ patching_info = state_for(x, x->state_before());
+ }
+
+ obj.load_item();
+ LIR_Opr out_reg = rlock_result(x);
+ LIR_Opr tmp1 = FrameMap::R0_oop_opr;
+ LIR_Opr tmp2 = FrameMap::R1_oop_opr;
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+
+ __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
+}
+
+
+#ifdef __SOFTFP__
+// Turn operator if (f <op> g) into runtime call:
+// call _aeabi_fcmp<op>(f, g)
+// cmp(eq, 1)
+// branch(eq, true path).
+void LIRGenerator::do_soft_float_compare(If* x) {
+ assert(x->number_of_sux() == 2, "inconsistency");
+ ValueTag tag = x->x()->type()->tag();
+ If::Condition cond = x->cond();
+ address runtime_func;
+ // unordered comparison gets the wrong answer because aeabi functions
+ // return false.
+ bool unordered_is_true = x->unordered_is_true();
+ // reverse of condition for ne
+ bool compare_to_zero = false;
+ switch (lir_cond(cond)) {
+ case lir_cond_notEqual:
+ compare_to_zero = true; // fall through
+ case lir_cond_equal:
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, __aeabi_fcmpeq):
+ CAST_FROM_FN_PTR(address, __aeabi_dcmpeq);
+ break;
+ case lir_cond_less:
+ if (unordered_is_true) {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmplt):
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmplt);
+ } else {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, __aeabi_fcmplt):
+ CAST_FROM_FN_PTR(address, __aeabi_dcmplt);
+ }
+ break;
+ case lir_cond_lessEqual:
+ if (unordered_is_true) {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmple):
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmple);
+ } else {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, __aeabi_fcmple):
+ CAST_FROM_FN_PTR(address, __aeabi_dcmple);
+ }
+ break;
+ case lir_cond_greaterEqual:
+ if (unordered_is_true) {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmpge):
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmpge);
+ } else {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, __aeabi_fcmpge):
+ CAST_FROM_FN_PTR(address, __aeabi_dcmpge);
+ }
+ break;
+ case lir_cond_greater:
+ if (unordered_is_true) {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmpgt):
+ CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmpgt);
+ } else {
+ runtime_func = tag == floatTag ?
+ CAST_FROM_FN_PTR(address, __aeabi_fcmpgt):
+ CAST_FROM_FN_PTR(address, __aeabi_dcmpgt);
+ }
+ break;
+ case lir_cond_aboveEqual:
+ case lir_cond_belowEqual:
+ ShouldNotReachHere(); // We're not going to get these.
+ default:
+ assert(lir_cond(cond) == lir_cond_always, "must be");
+ ShouldNotReachHere();
+ }
+ set_no_result(x);
+
+ // add safepoint before generating condition code so it can be recomputed
+ if (x->is_safepoint()) {
+ increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+ }
+ // Call float compare function, returns (1,0) if true or false.
+ LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, intType, NULL);
+ __ cmp(lir_cond_equal, result,
+ compare_to_zero ?
+ LIR_OprFact::intConst(0) : LIR_OprFact::intConst(1));
+ profile_branch(x, cond);
+ move_to_phi(x->state());
+ __ branch(lir_cond_equal, T_INT, x->tsux());
+}
+#endif // __SOFTFP__
+
+void LIRGenerator::do_If(If* x) {
+ assert(x->number_of_sux() == 2, "inconsistency");
+ ValueTag tag = x->x()->type()->tag();
+
+#ifdef __SOFTFP__
+ if (tag == floatTag || tag == doubleTag) {
+ do_soft_float_compare(x);
+ assert(x->default_sux() == x->fsux(), "wrong destination above");
+ __ jump(x->default_sux());
+ return;
+ }
+#endif // __SOFTFP__
+
+ LIRItem xitem(x->x(), this);
+ LIRItem yitem(x->y(), this);
+ LIRItem* xin = &xitem;
+ LIRItem* yin = &yitem;
+ If::Condition cond = x->cond();
+
+#ifndef AARCH64
+ if (tag == longTag) {
+ if (cond == If::gtr || cond == If::leq) {
+ cond = Instruction::mirror(cond);
+ xin = &yitem;
+ yin = &xitem;
+ }
+ xin->set_destroys_register();
+ }
+#endif // !AARCH64
+
+ xin->load_item();
+ LIR_Opr left = xin->result();
+ LIR_Opr right;
+
+#ifdef AARCH64
+ if (yin->is_constant() && can_inline_as_constant_in_cmp(yin->value())) {
+ yin->dont_load_item();
+ } else {
+ yin->load_item();
+ }
+ right = yin->result();
+#else
+ if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 &&
+ (cond == If::eql || cond == If::neq)) {
+ // inline long zero
+ right = LIR_OprFact::value_type(yin->value()->type());
+ } else {
+ yin->load_nonconstant();
+ right = yin->result();
+ }
+#endif // AARCH64
+
+ set_no_result(x);
+
+ // add safepoint before generating condition code so it can be recomputed
+ if (x->is_safepoint()) {
+ increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+ }
+
+ __ cmp(lir_cond(cond), left, right);
+ profile_branch(x, cond);
+ move_to_phi(x->state());
+ if (x->x()->type()->is_float_kind()) {
+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+ } else {
+ __ branch(lir_cond(cond), right->type(), x->tsux());
+ }
+ assert(x->default_sux() == x->fsux(), "wrong destination above");
+ __ jump(x->default_sux());
+}
+
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+ return FrameMap::Rthread_opr;
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) {
+ __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::R0_opr);
+ LIR_OprList* args = new LIR_OprList(1);
+ args->append(FrameMap::R0_opr);
+ address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry);
+ __ call_runtime_leaf(func, getThreadTemp(), LIR_OprFact::illegalOpr, args);
+}
+
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+ CodeEmitInfo* info) {
+#ifndef AARCH64
+ if (value->is_double_cpu()) {
+ assert(address->index()->is_illegal(), "should have a constant displacement");
+ LIR_Opr tmp = new_pointer_register();
+ add_large_constant(address->base(), address->disp(), tmp);
+ __ volatile_store_mem_reg(value, new LIR_Address(tmp, (intx)0, address->type()), info);
+ return;
+ }
+#endif // !AARCH64
+ // TODO-AARCH64 implement with stlr instruction
+ __ store(value, address, info, lir_patch_none);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+ CodeEmitInfo* info) {
+#ifndef AARCH64
+ if (result->is_double_cpu()) {
+ assert(address->index()->is_illegal(), "should have a constant displacement");
+ LIR_Opr tmp = new_pointer_register();
+ add_large_constant(address->base(), address->disp(), tmp);
+ __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, address->type()), result, info);
+ return;
+ }
+#endif // !AARCH64
+ // TODO-AARCH64 implement with ldar instruction
+ __ load(address, result, info, lir_patch_none);
+}
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+ BasicType type, bool is_volatile) {
+#ifdef AARCH64
+ __ load(new LIR_Address(src, offset, type), dst);
+#else
+ assert(offset->is_single_cpu(), "must be");
+ if (is_volatile && dst->is_double_cpu()) {
+ LIR_Opr tmp = new_pointer_register();
+ __ add(src, offset, tmp);
+ __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, type), dst, NULL);
+ } else if (type == T_FLOAT || type == T_DOUBLE) {
+ // fld doesn't have indexed addressing mode
+ LIR_Opr tmp = new_register(T_INT);
+ __ add(src, offset, tmp);
+ __ load(new LIR_Address(tmp, (intx)0, type), dst);
+ } else {
+ __ load(new LIR_Address(src, offset, type), dst);
+ }
+#endif // AARCH64
+}
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+ BasicType type, bool is_volatile) {
+#ifdef AARCH64
+ LIR_Address* addr = new LIR_Address(src, offset, type);
+ if (type == T_ARRAY || type == T_OBJECT) {
+ pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ __ move(data, addr);
+ assert(src->is_register(), "must be register");
+ post_barrier(LIR_OprFact::address(addr), data);
+ } else {
+ __ move(data, addr);
+ }
+#else
+ assert(offset->is_single_cpu(), "must be");
+ if (is_volatile && data->is_double_cpu()) {
+ LIR_Opr tmp = new_register(T_INT);
+ __ add(src, offset, tmp);
+ __ volatile_store_mem_reg(data, new LIR_Address(tmp, (intx)0, type), NULL);
+ } else if (type == T_FLOAT || type == T_DOUBLE) {
+ // fst doesn't have indexed addressing mode
+ LIR_Opr tmp = new_register(T_INT);
+ __ add(src, offset, tmp);
+ __ move(data, new LIR_Address(tmp, (intx)0, type));
+ } else {
+ LIR_Address* addr = new LIR_Address(src, offset, type);
+ bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+#if INCLUDE_ALL_GCS
+ if (is_obj) {
+ // Do the pre-write barrier, if any.
+ pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ }
+#endif // INCLUDE_ALL_GCS
+ __ move(data, addr);
+ if (is_obj) {
+ assert(src->is_register(), "must be register");
+ post_barrier(LIR_OprFact::address(addr), data);
+ }
+ }
+#endif // AARCH64
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+ BasicType type = x->basic_type();
+ LIRItem src(x->object(), this);
+ LIRItem off(x->offset(), this);
+ LIRItem value(x->value(), this);
+
+ src.load_item();
+ if (x->is_add()) {
+ value.load_nonconstant();
+ } else {
+ value.load_item();
+ }
+ off.load_nonconstant();
+
+ LIR_Opr dst = rlock_result(x, type);
+ LIR_Opr data = value.result();
+ bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+
+ assert (type == T_INT || type == T_LONG || (!x->is_add() && is_obj), "unexpected type");
+ LIR_Opr addr_ptr = new_pointer_register();
+
+ __ add(src.result(), off.result(), addr_ptr);
+
+ LIR_Address* addr = new LIR_Address(addr_ptr, (intx)0, type);
+
+ if (x->is_add()) {
+ LIR_Opr tmp = new_register(type);
+ __ xadd(addr_ptr, data, dst, tmp);
+ } else {
+ LIR_Opr tmp = (UseCompressedOops && is_obj) ? new_pointer_register() : LIR_OprFact::illegalOpr;
+ if (is_obj) {
+ // Do the pre-write barrier, if any.
+ pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ }
+ __ xchg(addr_ptr, data, dst, tmp);
+ if (is_obj) {
+ // Seems to be a precise address
+ post_barrier(LIR_OprFact::address(addr), data);
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LIRGenerator_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+ // Helper to set the card at the given address to the given value.
+ void set_card(LIR_Opr value, LIR_Address* card_addr);
+
+ void make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info);
+
+#ifdef AARCH64
+ // the helper for arithmetic
+ void add_constant(LIR_Opr src, jlong c, LIR_Opr dest);
+#endif // AARCH64
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LIR_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LIR.hpp"
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+ return as_FloatRegister(fpu_regnr());
+}
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+ return as_FloatRegister(fpu_regnrLo());
+}
+
+#ifdef AARCH64
+// Reg2 unused.
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
+ (reg1 << LIR_OprDesc::reg2_shift) |
+ LIR_OprDesc::double_type |
+ LIR_OprDesc::fpu_register |
+ LIR_OprDesc::double_size);
+}
+#else
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+ assert(as_FloatRegister(reg2) != fnoreg, "Arm32 holds double in two regs.");
+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
+ (reg2 << LIR_OprDesc::reg2_shift) |
+ LIR_OprDesc::double_type |
+ LIR_OprDesc::fpu_register |
+ LIR_OprDesc::double_size);
+}
+#endif
+
+#ifndef PRODUCT
+void LIR_Address::verify() const {
+#ifdef _LP64
+ assert(base()->is_cpu_register(), "wrong base operand");
+#endif
+#ifdef AARCH64
+ if (base()->type() == T_INT) {
+ assert(index()->is_single_cpu() && (index()->type() == T_INT), "wrong index operand");
+ } else {
+ assert(index()->is_illegal() || index()->is_double_cpu() ||
+ (index()->is_single_cpu() && (index()->is_oop_register() || index()->type() == T_INT)), "wrong index operand");
+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses");
+ }
+#else
+ assert(disp() == 0 || index()->is_illegal(), "can't have both");
+ // Note: offsets higher than 4096 must not be rejected here. They can
+ // be handled by the back-end or will be rejected if not.
+#ifdef _LP64
+ assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
+ "wrong type for addresses");
+#else
+ assert(base()->is_single_cpu(), "wrong base operand");
+ assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand");
+ assert(base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA,
+ "wrong type for addresses");
+#endif
+#endif // AARCH64
+}
+#endif // PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LinearScan_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+ // No FPU stack on ARM
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_LinearScan_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP
+#define CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+ return reg_num < pd_nof_cpu_regs_processed_in_linearscan ||
+ reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+#ifndef AARCH64
+ if (type == T_LONG || type == T_DOUBLE) return 2;
+#endif // !AARCH64
+ return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+#ifdef AARCH64
+ return false;
+#else
+ return type == T_DOUBLE || type == T_LONG;
+#endif // AARCH64
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+ // TODO-AARCH64 try to add callee-saved registers
+ return true;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+ // No extra temporals on ARM
+}
+
+
+// Implementation of LinearScanWalker
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+#ifndef __SOFTFP__
+ if (cur->type() == T_FLOAT || cur->type() == T_DOUBLE) {
+ _first_reg = pd_first_fpu_reg;
+ _last_reg = pd_first_fpu_reg + pd_nof_fpu_regs_reg_alloc - 1;
+ return true;
+ }
+#endif // !__SOFTFP__
+
+ // Use allocatable CPU registers otherwise
+ _first_reg = pd_first_cpu_reg;
+ _last_reg = pd_first_cpu_reg + FrameMap::adjust_reg_range(pd_nof_cpu_regs_reg_alloc) - 1;
+ return true;
+}
+
+#endif // CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_MacroAssembler_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+// Note: Rtemp usage is this file should not impact C2 and should be
+// correct as long as it is not implicitly used in lower layers (the
+// arm [macro]assembler) and used with care in the other C1 specific
+// files.
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+ Label verified;
+ load_klass(Rtemp, receiver);
+ cmp(Rtemp, iCache);
+ b(verified, eq); // jump over alignment no-ops
+#ifdef AARCH64
+ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
+#else
+ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
+#endif
+ align(CodeEntryAlignment);
+ bind(verified);
+}
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
+ assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+ assert((frame_size_in_bytes % StackAlignmentInBytes) == 0, "frame size should be aligned");
+
+#ifdef AARCH64
+ // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
+ nop();
+#endif // AARCH64
+
+ arm_stack_overflow_check(bang_size_in_bytes, Rtemp);
+
+ // FP can no longer be used to memorize SP. It may be modified
+ // if this method contains a methodHandle call site
+ raw_push(FP, LR);
+ sub_slow(SP, SP, frame_size_in_bytes);
+}
+
+void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) {
+ add_slow(SP, SP, frame_size_in_bytes);
+ raw_pop(FP, LR);
+}
+
+void C1_MacroAssembler::verified_entry() {
+ if (C1Breakpoint) {
+ breakpoint();
+ }
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void C1_MacroAssembler::try_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ if (UseTLAB) {
+ tlab_allocate(obj, obj_end, tmp1, size_expression, slow_case);
+ } else {
+ eden_allocate(obj, obj_end, tmp1, tmp2, size_expression, slow_case);
+ incr_allocated_bytes(size_expression, tmp1);
+ }
+}
+
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp) {
+ assert_different_registers(obj, klass, len, tmp);
+
+ if(UseBiasedLocking && !len->is_valid()) {
+ ldr(tmp, Address(klass, Klass::prototype_header_offset()));
+ } else {
+ mov(tmp, (intptr_t)markOopDesc::prototype());
+ }
+
+#ifdef AARCH64
+ if (UseCompressedClassPointers) {
+ str(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
+ encode_klass_not_null(tmp, klass); // Take care not to kill klass
+ str_w(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
+ } else {
+ assert(oopDesc::mark_offset_in_bytes() + wordSize == oopDesc::klass_offset_in_bytes(), "adjust this code");
+ stp(tmp, klass, Address(obj, oopDesc::mark_offset_in_bytes()));
+ }
+#else
+ str(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
+ str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+#endif // AARCH64
+
+ if (len->is_valid()) {
+ str_32(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+ }
+#ifdef AARCH64
+ else if (UseCompressedClassPointers) {
+ store_klass_gap(obj);
+ }
+#endif // AARCH64
+}
+
+
+// Cleans object body [base..obj_end]. Clobbers `base` and `tmp` registers.
+void C1_MacroAssembler::initialize_body(Register base, Register obj_end, Register tmp) {
+ zero_memory(base, obj_end, tmp);
+}
+
+
+void C1_MacroAssembler::initialize_object(Register obj, Register obj_end, Register klass,
+ Register len, Register tmp1, Register tmp2,
+ RegisterOrConstant header_size, int obj_size_in_bytes,
+ bool is_tlab_allocated)
+{
+ assert_different_registers(obj, obj_end, klass, len, tmp1, tmp2);
+ initialize_header(obj, klass, len, tmp1);
+
+ const Register ptr = tmp2;
+
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+#ifdef AARCH64
+ if (obj_size_in_bytes < 0) {
+ add_rc(ptr, obj, header_size);
+ initialize_body(ptr, obj_end, tmp1);
+
+ } else {
+ int base = instanceOopDesc::header_size() * HeapWordSize;
+ assert(obj_size_in_bytes >= base, "should be");
+
+ const int zero_bytes = obj_size_in_bytes - base;
+ assert((zero_bytes % wordSize) == 0, "should be");
+
+ if ((zero_bytes % (2*wordSize)) != 0) {
+ str(ZR, Address(obj, base));
+ base += wordSize;
+ }
+
+ const int stp_count = zero_bytes / (2*wordSize);
+
+ if (zero_bytes > 8 * wordSize) {
+ Label loop;
+ add(ptr, obj, base);
+ mov(tmp1, stp_count);
+ bind(loop);
+ subs(tmp1, tmp1, 1);
+ stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
+ b(loop, gt);
+ } else {
+ for (int i = 0; i < stp_count; i++) {
+ stp(ZR, ZR, Address(obj, base + i * 2 * wordSize));
+ }
+ }
+ }
+#else
+ if (obj_size_in_bytes >= 0 && obj_size_in_bytes <= 8 * BytesPerWord) {
+ mov(tmp1, 0);
+ const int base = instanceOopDesc::header_size() * HeapWordSize;
+ for (int i = base; i < obj_size_in_bytes; i += wordSize) {
+ str(tmp1, Address(obj, i));
+ }
+ } else {
+ assert(header_size.is_constant() || header_size.as_register() == ptr, "code assumption");
+ add(ptr, obj, header_size);
+ initialize_body(ptr, obj_end, tmp1);
+ }
+#endif // AARCH64
+ }
+
+ // StoreStore barrier required after complete initialization
+ // (headers + content zeroing), before the object may escape.
+ membar(MacroAssembler::StoreStore, tmp1);
+}
+
+void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, Register tmp3,
+ int header_size, int object_size,
+ Register klass, Label& slow_case) {
+ assert_different_registers(obj, tmp1, tmp2, tmp3, klass, Rtemp);
+ assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+ const int object_size_in_bytes = object_size * BytesPerWord;
+
+ const Register obj_end = tmp1;
+ const Register len = noreg;
+
+ if (Assembler::is_arith_imm_in_range(object_size_in_bytes)) {
+ try_allocate(obj, obj_end, tmp2, tmp3, object_size_in_bytes, slow_case);
+ } else {
+ // Rtemp should be free at c1 LIR level
+ mov_slow(Rtemp, object_size_in_bytes);
+ try_allocate(obj, obj_end, tmp2, tmp3, Rtemp, slow_case);
+ }
+ initialize_object(obj, obj_end, klass, len, tmp2, tmp3, instanceOopDesc::header_size() * HeapWordSize, object_size_in_bytes, /* is_tlab_allocated */ UseTLAB);
+}
+
+void C1_MacroAssembler::allocate_array(Register obj, Register len,
+ Register tmp1, Register tmp2, Register tmp3,
+ int header_size, int element_size,
+ Register klass, Label& slow_case) {
+ assert_different_registers(obj, len, tmp1, tmp2, tmp3, klass, Rtemp);
+ const int header_size_in_bytes = header_size * BytesPerWord;
+ const int scale_shift = exact_log2(element_size);
+ const Register obj_size = Rtemp; // Rtemp should be free at c1 LIR level
+
+#ifdef AARCH64
+ mov_slow(Rtemp, max_array_allocation_length);
+ cmp_32(len, Rtemp);
+#else
+ cmp_32(len, max_array_allocation_length);
+#endif // AARCH64
+ b(slow_case, hs);
+
+ bool align_header = ((header_size_in_bytes | element_size) & MinObjAlignmentInBytesMask) != 0;
+ assert(align_header || ((header_size_in_bytes & MinObjAlignmentInBytesMask) == 0), "must be");
+ assert(align_header || ((element_size & MinObjAlignmentInBytesMask) == 0), "must be");
+
+ mov(obj_size, header_size_in_bytes + (align_header ? (MinObjAlignmentInBytes - 1) : 0));
+ add_ptr_scaled_int32(obj_size, obj_size, len, scale_shift);
+
+ if (align_header) {
+ align_reg(obj_size, obj_size, MinObjAlignmentInBytes);
+ }
+
+ try_allocate(obj, tmp1, tmp2, tmp3, obj_size, slow_case);
+ initialize_object(obj, tmp1, klass, len, tmp2, tmp3, header_size_in_bytes, -1, /* is_tlab_allocated */ UseTLAB);
+}
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj,
+ Register disp_hdr, Register tmp1,
+ Label& slow_case) {
+ Label done, fast_lock, fast_lock_done;
+ int null_check_offset = 0;
+
+ const Register tmp2 = Rtemp; // Rtemp should be free at c1 LIR level
+ assert_different_registers(hdr, obj, disp_hdr, tmp1, tmp2);
+
+ assert(BasicObjectLock::lock_offset_in_bytes() == 0, "ajust this code");
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+ const int mark_offset = BasicLock::displaced_header_offset_in_bytes();
+
+ if (UseBiasedLocking) {
+ // load object
+ str(obj, Address(disp_hdr, obj_offset));
+ null_check_offset = biased_locking_enter(obj, hdr/*scratched*/, tmp1, false, tmp2, done, slow_case);
+ }
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
+
+#ifdef AARCH64
+
+ str(obj, Address(disp_hdr, obj_offset));
+
+ if (!UseBiasedLocking) {
+ null_check_offset = offset();
+ }
+ ldr(hdr, obj);
+
+ // Test if object is already locked
+ assert(markOopDesc::unlocked_value == 1, "adjust this code");
+ tbnz(hdr, exact_log2(markOopDesc::unlocked_value), fast_lock);
+
+ // Check for recursive locking
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+ intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
+ Assembler::LogicalImmediate imm(mask, false);
+ mov(tmp2, SP);
+ sub(tmp2, hdr, tmp2);
+ ands(tmp2, tmp2, imm);
+ b(slow_case, ne);
+
+ // Recursive locking: store 0 into a lock record
+ str(ZR, Address(disp_hdr, mark_offset));
+ b(fast_lock_done);
+
+#else // AARCH64
+
+ if (!UseBiasedLocking) {
+ null_check_offset = offset();
+ }
+
+ // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
+ // That would be acceptable as ether CAS or slow case path is taken in that case.
+
+ // Must be the first instruction here, because implicit null check relies on it
+ ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+ str(obj, Address(disp_hdr, obj_offset));
+ tst(hdr, markOopDesc::unlocked_value);
+ b(fast_lock, ne);
+
+ // Check for recursive locking
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+ // -1- test low 2 bits
+ movs(tmp2, AsmOperand(hdr, lsl, 30));
+ // -2- test (hdr - SP) if the low two bits are 0
+ sub(tmp2, hdr, SP, eq);
+ movs(tmp2, AsmOperand(tmp2, lsr, exact_log2(os::vm_page_size())), eq);
+ // If 'eq' then OK for recursive fast locking: store 0 into a lock record.
+ str(tmp2, Address(disp_hdr, mark_offset), eq);
+ b(fast_lock_done, eq);
+ // else need slow case
+ b(slow_case);
+
+#endif // AARCH64
+
+ bind(fast_lock);
+ // Save previous object header in BasicLock structure and update the header
+ str(hdr, Address(disp_hdr, mark_offset));
+
+ cas_for_lock_acquire(hdr, disp_hdr, obj, tmp2, slow_case);
+
+ bind(fast_lock_done);
+
+#ifndef PRODUCT
+ if (PrintBiasedLockingStatistics) {
+ cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr());
+ }
+#endif // !PRODUCT
+
+ bind(done);
+
+ return null_check_offset;
+}
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj,
+ Register disp_hdr, Register tmp,
+ Label& slow_case) {
+ // Note: this method is not using its 'tmp' argument
+
+ assert_different_registers(hdr, obj, disp_hdr, Rtemp);
+ Register tmp2 = Rtemp;
+
+ assert(BasicObjectLock::lock_offset_in_bytes() == 0, "ajust this code");
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+ const int mark_offset = BasicLock::displaced_header_offset_in_bytes();
+
+ Label done;
+ if (UseBiasedLocking) {
+ // load object
+ ldr(obj, Address(disp_hdr, obj_offset));
+ biased_locking_exit(obj, hdr, done);
+ }
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
+ Label retry;
+
+ // Load displaced header and object from the lock
+ ldr(hdr, Address(disp_hdr, mark_offset));
+ // If hdr is NULL, we've got recursive locking and there's nothing more to do
+ cbz(hdr, done);
+
+ if(!UseBiasedLocking) {
+ // load object
+ ldr(obj, Address(disp_hdr, obj_offset));
+ }
+
+ // Restore the object header
+ cas_for_lock_release(disp_hdr, hdr, obj, tmp2, slow_case);
+
+ bind(done);
+}
+
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+ if (!VerifyOops) return;
+ verify_oop_addr(Address(SP, stack_offset));
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+ Label not_null;
+ cbnz(r, not_null);
+ stop("non-null oop required");
+ bind(not_null);
+ if (!VerifyOops) return;
+ verify_oop(r);
+}
+
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_MacroAssembler_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP
+#define CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP
+
+ private:
+
+ void pd_init() { /* not used */ }
+
+ public:
+
+ // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+ // `size_expression` should be a register or constant which can be used as immediate in "add" instruction.
+ void try_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case);
+
+ void initialize_header(Register obj, Register klass, Register len, Register tmp);
+
+ // Cleans object body [base..obj_end]. Clobbers `base` and `tmp` registers.
+ void initialize_body(Register base, Register obj_end, Register tmp);
+
+ void initialize_object(Register obj, Register obj_end, Register klass,
+ Register len, Register tmp1, Register tmp2,
+ RegisterOrConstant header_size_expression, int obj_size_in_bytes,
+ bool is_tlab_allocated);
+
+ void allocate_object(Register obj, Register tmp1, Register tmp2, Register tmp3,
+ int header_size, int object_size,
+ Register klass, Label& slow_case);
+
+ void allocate_array(Register obj, Register len,
+ Register tmp1, Register tmp2, Register tmp3,
+ int header_size, int element_size,
+ Register klass, Label& slow_case);
+
+ enum {
+ max_array_allocation_length = 0x01000000
+ };
+
+ int lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
+
+ void unlock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
+
+ // This platform only uses signal-based null checks. The Label is not needed.
+ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
+#endif // CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_Runtime1_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1226 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_arm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_arm.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+// Note: Rtemp usage is this file should not impact C2 and should be
+// correct as long as it is not implicitly used in lower layers (the
+// arm [macro]assembler) and used with care in the other C1 specific
+// files.
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
+ mov(R0, Rthread);
+
+ int call_offset = set_last_Java_frame(SP, FP, false, Rtemp);
+
+ call(entry);
+ if (call_offset == -1) { // PC not saved
+ call_offset = offset();
+ }
+ reset_last_Java_frame(Rtemp);
+
+ assert(frame_size() != no_frame_size, "frame must be fixed");
+ if (_stub_id != Runtime1::forward_exception_id) {
+ ldr(R3, Address(Rthread, Thread::pending_exception_offset()));
+ }
+
+ if (oop_result1->is_valid()) {
+ assert_different_registers(oop_result1, R3, Rtemp);
+ get_vm_result(oop_result1, Rtemp);
+ }
+ if (metadata_result->is_valid()) {
+ assert_different_registers(metadata_result, R3, Rtemp);
+ get_vm_result_2(metadata_result, Rtemp);
+ }
+
+ // Check for pending exception
+ // unpack_with_exception_in_tls path is taken through
+ // Runtime1::exception_handler_for_pc
+ if (_stub_id != Runtime1::forward_exception_id) {
+ assert(frame_size() != no_frame_size, "cannot directly call forward_exception_id");
+#ifdef AARCH64
+ Label skip;
+ cbz(R3, skip);
+ jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp);
+ bind(skip);
+#else
+ cmp(R3, 0);
+ jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne);
+#endif // AARCH64
+ } else {
+#ifdef ASSERT
+ // Should not have pending exception in forward_exception stub
+ ldr(R3, Address(Rthread, Thread::pending_exception_offset()));
+ cmp(R3, 0);
+ breakpoint(ne);
+#endif // ASSERT
+ }
+ return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
+ if (arg1 != R1) {
+ mov(R1, arg1);
+ }
+ return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
+ assert(arg1 == R1 && arg2 == R2, "cannot handle otherwise");
+ return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+ assert(arg1 == R1 && arg2 == R2 && arg3 == R3, "cannot handle otherwise");
+ return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+
+#define __ sasm->
+
+// TODO: ARM - does this duplicate RegisterSaver in SharedRuntime?
+#ifdef AARCH64
+
+ //
+ // On AArch64 registers save area has the following layout:
+ //
+ // |---------------------|
+ // | return address (LR) |
+ // | FP |
+ // |---------------------|
+ // | D31 |
+ // | ... |
+ // | D0 |
+ // |---------------------|
+ // | padding |
+ // |---------------------|
+ // | R28 |
+ // | ... |
+ // | R0 |
+ // |---------------------| <-- SP
+ //
+
+enum RegisterLayout {
+ number_of_saved_gprs = 29,
+ number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
+
+ R0_offset = 0,
+ D0_offset = R0_offset + number_of_saved_gprs + 1,
+ FP_offset = D0_offset + number_of_saved_fprs,
+ LR_offset = FP_offset + 1,
+
+ reg_save_size = LR_offset + 1,
+
+ arg1_offset = reg_save_size * wordSize,
+ arg2_offset = (reg_save_size + 1) * wordSize
+};
+
+#else
+
+enum RegisterLayout {
+ fpu_save_size = pd_nof_fpu_regs_reg_alloc,
+#ifndef __SOFTFP__
+ D0_offset = 0,
+#endif
+ R0_offset = fpu_save_size,
+ R1_offset,
+ R2_offset,
+ R3_offset,
+ R4_offset,
+ R5_offset,
+ R6_offset,
+#if (FP_REG_NUM != 7)
+ R7_offset,
+#endif
+ R8_offset,
+ R9_offset,
+ R10_offset,
+#if (FP_REG_NUM != 11)
+ R11_offset,
+#endif
+ R12_offset,
+ FP_offset,
+ LR_offset,
+ reg_save_size,
+ arg1_offset = reg_save_size * wordSize,
+ arg2_offset = (reg_save_size + 1) * wordSize
+};
+
+#endif // AARCH64
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) {
+ sasm->set_frame_size(reg_save_size /* in words */);
+
+ // Record saved value locations in an OopMap.
+ // Locations are offsets from sp after runtime call.
+ OopMap* map = new OopMap(VMRegImpl::slots_per_word * reg_save_size, 0);
+
+#ifdef AARCH64
+ for (int i = 0; i < number_of_saved_gprs; i++) {
+ map->set_callee_saved(VMRegImpl::stack2reg((R0_offset + i) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
+ }
+ map->set_callee_saved(VMRegImpl::stack2reg(FP_offset * VMRegImpl::slots_per_word), FP->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg(LR_offset * VMRegImpl::slots_per_word), LR->as_VMReg());
+
+ if (save_fpu_registers) {
+ for (int i = 0; i < number_of_saved_fprs; i++) {
+ map->set_callee_saved(VMRegImpl::stack2reg((D0_offset + i) * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
+ }
+ }
+#else
+ int j=0;
+ for (int i = R0_offset; i < R10_offset; i++) {
+ if (j == FP_REG_NUM) {
+ // skip the FP register, saved below
+ j++;
+ }
+ map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
+ j++;
+ }
+ assert(j == R10->encoding(), "must be");
+#if (FP_REG_NUM != 11)
+ // add R11, if not saved as FP
+ map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
+#endif
+ map->set_callee_saved(VMRegImpl::stack2reg(FP_offset), FP->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg(LR_offset), LR->as_VMReg());
+
+ if (save_fpu_registers) {
+ for (int i = 0; i < fpu_save_size; i++) {
+ map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
+ }
+ }
+#endif // AARCH64
+
+ return map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) {
+ __ block_comment("save_live_registers");
+ sasm->set_frame_size(reg_save_size /* in words */);
+
+#ifdef AARCH64
+ assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
+
+ __ raw_push(FP, LR);
+
+ __ sub(SP, SP, (reg_save_size - 2) * wordSize);
+
+ for (int i = 0; i < round_down(number_of_saved_gprs, 2); i += 2) {
+ __ stp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
+ }
+
+ if (is_odd(number_of_saved_gprs)) {
+ int i = number_of_saved_gprs - 1;
+ __ str(as_Register(i), Address(SP, (R0_offset + i) * wordSize));
+ }
+
+ if (save_fpu_registers) {
+ assert (is_even(number_of_saved_fprs), "adjust this code");
+ for (int i = 0; i < number_of_saved_fprs; i += 2) {
+ __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize));
+ }
+ }
+#else
+ __ push(RegisterSet(FP) | RegisterSet(LR));
+ __ push(RegisterSet(R0, R6) | RegisterSet(R8, R10) | R12 | altFP_7_11);
+ if (save_fpu_registers) {
+ __ fstmdbd(SP, FloatRegisterSet(D0, fpu_save_size / 2), writeback);
+ } else {
+ __ sub(SP, SP, fpu_save_size * wordSize);
+ }
+#endif // AARCH64
+
+ return generate_oop_map(sasm, save_fpu_registers);
+}
+
+
+static void restore_live_registers(StubAssembler* sasm,
+ bool restore_R0,
+ bool restore_FP_LR,
+ bool do_return,
+ bool restore_fpu_registers = HaveVFP) {
+ __ block_comment("restore_live_registers");
+
+#ifdef AARCH64
+ if (restore_R0) {
+ __ ldr(R0, Address(SP, R0_offset * wordSize));
+ }
+
+ assert(is_odd(number_of_saved_gprs), "adjust this code");
+ for (int i = 1; i < number_of_saved_gprs; i += 2) {
+ __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
+ }
+
+ if (restore_fpu_registers) {
+ assert (is_even(number_of_saved_fprs), "adjust this code");
+ for (int i = 0; i < number_of_saved_fprs; i += 2) {
+ __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize));
+ }
+ }
+
+ __ add(SP, SP, (reg_save_size - 2) * wordSize);
+
+ if (restore_FP_LR) {
+ __ raw_pop(FP, LR);
+ if (do_return) {
+ __ ret();
+ }
+ } else {
+ assert (!do_return, "return without restoring FP/LR");
+ }
+#else
+ if (restore_fpu_registers) {
+ __ fldmiad(SP, FloatRegisterSet(D0, fpu_save_size / 2), writeback);
+ if (!restore_R0) {
+ __ add(SP, SP, (R1_offset - fpu_save_size) * wordSize);
+ }
+ } else {
+ __ add(SP, SP, (restore_R0 ? fpu_save_size : R1_offset) * wordSize);
+ }
+ __ pop(RegisterSet((restore_R0 ? R0 : R1), R6) | RegisterSet(R8, R10) | R12 | altFP_7_11);
+ if (restore_FP_LR) {
+ __ pop(RegisterSet(FP) | RegisterSet(do_return ? PC : LR));
+ } else {
+ assert (!do_return, "return without restoring FP/LR");
+ }
+#endif // AARCH64
+}
+
+
+static void restore_live_registers_except_R0(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
+ restore_live_registers(sasm, false, true, true, restore_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
+ restore_live_registers(sasm, true, true, true, restore_fpu_registers);
+}
+
+#ifndef AARCH64
+static void restore_live_registers_except_FP_LR(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
+ restore_live_registers(sasm, true, false, false, restore_fpu_registers);
+}
+#endif // !AARCH64
+
+static void restore_live_registers_without_return(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
+ restore_live_registers(sasm, true, true, false, restore_fpu_registers);
+}
+
+
+void Runtime1::initialize_pd() {
+}
+
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+ OopMap* oop_map = save_live_registers(sasm);
+
+ if (has_argument) {
+ __ ldr(R1, Address(SP, arg1_offset));
+ }
+
+ int call_offset = __ call_RT(noreg, noreg, target);
+ OopMapSet* oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ DEBUG_ONLY(STOP("generate_exception_throw");) // Should not reach here
+ return oop_maps;
+}
+
+
+static void restore_sp_for_method_handle(StubAssembler* sasm) {
+ // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site.
+ __ ldr_s32(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset()));
+#ifdef AARCH64
+ Label skip;
+ __ cbz(Rtemp, skip);
+ __ mov(SP, Rmh_SP_save);
+ __ bind(skip);
+#else
+ __ cmp(Rtemp, 0);
+ __ mov(SP, Rmh_SP_save, ne);
+#endif // AARCH64
+}
+
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
+ __ block_comment("generate_handle_exception");
+
+ bool save_fpu_registers = false;
+
+ // Save registers, if required.
+ OopMapSet* oop_maps = new OopMapSet();
+ OopMap* oop_map = NULL;
+
+ switch (id) {
+ case forward_exception_id: {
+ save_fpu_registers = HaveVFP;
+ oop_map = generate_oop_map(sasm);
+ __ ldr(Rexception_obj, Address(Rthread, Thread::pending_exception_offset()));
+ __ ldr(Rexception_pc, Address(SP, LR_offset * wordSize));
+ Register zero = __ zero_register(Rtemp);
+ __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
+ break;
+ }
+ case handle_exception_id:
+ save_fpu_registers = HaveVFP;
+ // fall-through
+ case handle_exception_nofpu_id:
+ // At this point all registers MAY be live.
+ oop_map = save_live_registers(sasm, save_fpu_registers);
+ break;
+ case handle_exception_from_callee_id:
+ // At this point all registers except exception oop (R4/R19) and
+ // exception pc (R5/R20) are dead.
+ oop_map = save_live_registers(sasm); // TODO it's not required to save all registers
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
+
+ __ str(Rexception_pc, Address(SP, LR_offset * wordSize)); // patch throwing pc into return address
+
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ // Exception handler found
+ __ str(R0, Address(SP, LR_offset * wordSize)); // patch the return address
+
+ // Restore the registers that were saved at the beginning, remove
+ // frame and jump to the exception handler.
+ switch (id) {
+ case forward_exception_id:
+ case handle_exception_nofpu_id:
+ case handle_exception_id:
+ restore_live_registers(sasm, save_fpu_registers);
+ // Note: the restore live registers includes the jump to LR (patched to R0)
+ break;
+ case handle_exception_from_callee_id:
+ restore_live_registers_without_return(sasm); // must not jump immediatly to handler
+ restore_sp_for_method_handle(sasm);
+ __ ret();
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ DEBUG_ONLY(STOP("generate_handle_exception");) // Should not reach here
+
+ return oop_maps;
+}
+
+
+void Runtime1::generate_unwind_exception(StubAssembler* sasm) {
+ // FP no longer used to find the frame start
+ // on entry, remove_frame() has already been called (restoring FP and LR)
+
+ // search the exception handler address of the caller (using the return address)
+ __ mov(c_rarg0, Rthread);
+ __ mov(Rexception_pc, LR);
+ __ mov(c_rarg1, LR);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), c_rarg0, c_rarg1);
+
+ // Exception oop should be still in Rexception_obj and pc in Rexception_pc
+ // Jump to handler
+ __ verify_not_null_oop(Rexception_obj);
+
+ // JSR292 extension
+ restore_sp_for_method_handle(sasm);
+
+ __ jump(R0);
+}
+
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+ OopMap* oop_map = save_live_registers(sasm);
+
+ // call the runtime patching routine, returns non-zero if nmethod got deopted.
+ int call_offset = __ call_RT(noreg, noreg, target);
+ OopMapSet* oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+ __ cmp_32(R0, 0);
+
+#ifdef AARCH64
+ Label call_deopt;
+
+ restore_live_registers_without_return(sasm);
+ __ b(call_deopt, ne);
+ __ ret();
+
+ __ bind(call_deopt);
+#else
+ restore_live_registers_except_FP_LR(sasm);
+ __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
+
+ // Deoptimization needed
+ // TODO: ARM - no need to restore FP & LR because unpack_with_reexecution() stores them back
+ __ pop(RegisterSet(FP) | RegisterSet(LR));
+#endif // AARCH64
+
+ __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp);
+
+ DEBUG_ONLY(STOP("generate_patching");) // Should not reach here
+ return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ const bool must_gc_arguments = true;
+ const bool dont_gc_arguments = false;
+
+ OopMapSet* oop_maps = NULL;
+ bool save_fpu_registers = HaveVFP;
+
+ switch (id) {
+ case forward_exception_id:
+ {
+ oop_maps = generate_handle_exception(id, sasm);
+ // does not return on ARM
+ }
+ break;
+
+#if INCLUDE_ALL_GCS
+ case g1_pre_barrier_slow_id:
+ {
+ // Input:
+ // - pre_val pushed on the stack
+
+ __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+ // save at least the registers that need saving if the runtime is called
+#ifdef AARCH64
+ __ raw_push(R0, R1);
+ __ raw_push(R2, R3);
+ const int nb_saved_regs = 4;
+#else // AARCH64
+ const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
+ const int nb_saved_regs = 6;
+ assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
+ __ push(saved_regs);
+#endif // AARCH64
+
+ const Register r_pre_val_0 = R0; // must be R0, to be ready for the runtime call
+ const Register r_index_1 = R1;
+ const Register r_buffer_2 = R2;
+
+ Address queue_index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_buf()));
+
+ Label done;
+ Label runtime;
+
+ __ ldr(r_index_1, queue_index);
+ __ ldr(r_pre_val_0, Address(SP, nb_saved_regs*wordSize));
+ __ ldr(r_buffer_2, buffer);
+
+ __ subs(r_index_1, r_index_1, wordSize);
+ __ b(runtime, lt);
+
+ __ str(r_index_1, queue_index);
+ __ str(r_pre_val_0, Address(r_buffer_2, r_index_1));
+
+ __ bind(done);
+
+#ifdef AARCH64
+ __ raw_pop(R2, R3);
+ __ raw_pop(R0, R1);
+#else // AARCH64
+ __ pop(saved_regs);
+#endif // AARCH64
+
+ __ ret();
+
+ __ bind(runtime);
+
+ save_live_registers(sasm);
+
+ assert(r_pre_val_0 == c_rarg0, "pre_val should be in R0");
+ __ mov(c_rarg1, Rthread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, c_rarg1);
+
+ restore_live_registers_without_return(sasm);
+
+ __ b(done);
+ }
+ break;
+ case g1_post_barrier_slow_id:
+ {
+ // Input:
+ // - store_addr, pushed on the stack
+
+ __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
+ Label done;
+ Label recheck;
+ Label runtime;
+
+ Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_buf()));
+
+ AddressLiteral cardtable((address)ct->byte_map_base);
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+ // save at least the registers that need saving if the runtime is called
+#ifdef AARCH64
+ __ raw_push(R0, R1);
+ __ raw_push(R2, R3);
+ const int nb_saved_regs = 4;
+#else // AARCH64
+ const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
+ const int nb_saved_regs = 6;
+ assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
+ __ push(saved_regs);
+#endif // AARCH64
+
+ const Register r_card_addr_0 = R0; // must be R0 for the slow case
+ const Register r_obj_0 = R0;
+ const Register r_card_base_1 = R1;
+ const Register r_tmp2 = R2;
+ const Register r_index_2 = R2;
+ const Register r_buffer_3 = R3;
+ const Register tmp1 = Rtemp;
+
+ __ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize));
+ // Note: there is a comment in x86 code about not using
+ // ExternalAddress / lea, due to relocation not working
+ // properly for that address. Should be OK for arm, where we
+ // explicitly specify that 'cartable' has a relocInfo::none
+ // type.
+ __ lea(r_card_base_1, cardtable);
+ __ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTableModRefBS::card_shift));
+
+ // first quick check without barrier
+ __ ldrb(r_tmp2, Address(r_card_addr_0));
+
+ __ cmp(r_tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ __ b(recheck, ne);
+
+ __ bind(done);
+
+#ifdef AARCH64
+ __ raw_pop(R2, R3);
+ __ raw_pop(R0, R1);
+#else // AARCH64
+ __ pop(saved_regs);
+#endif // AARCH64
+
+ __ ret();
+
+ __ bind(recheck);
+
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1);
+
+ // reload card state after the barrier that ensures the stored oop was visible
+ __ ldrb(r_tmp2, Address(r_card_addr_0));
+
+ assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code");
+ __ cbz(r_tmp2, done);
+
+ // storing region crossing non-NULL, card is clean.
+ // dirty card and log.
+
+ assert(0 == (int)CardTableModRefBS::dirty_card_val(), "adjust this code");
+ if (((intptr_t)ct->byte_map_base & 0xff) == 0) {
+ // Card table is aligned so the lowest byte of the table address base is zero.
+ __ strb(r_card_base_1, Address(r_card_addr_0));
+ } else {
+ __ strb(__ zero_register(r_tmp2), Address(r_card_addr_0));
+ }
+
+ __ ldr(r_index_2, queue_index);
+ __ ldr(r_buffer_3, buffer);
+
+ __ subs(r_index_2, r_index_2, wordSize);
+ __ b(runtime, lt); // go to runtime if now negative
+
+ __ str(r_index_2, queue_index);
+
+ __ str(r_card_addr_0, Address(r_buffer_3, r_index_2));
+
+ __ b(done);
+
+ __ bind(runtime);
+
+ save_live_registers(sasm);
+
+ assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0");
+ __ mov(c_rarg1, Rthread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), c_rarg0, c_rarg1);
+
+ restore_live_registers_without_return(sasm);
+
+ __ b(done);
+ }
+ break;
+#endif // INCLUDE_ALL_GCS
+ case new_instance_id:
+ case fast_new_instance_id:
+ case fast_new_instance_init_check_id:
+ {
+ const Register result = R0;
+ const Register klass = R1;
+
+ if (UseTLAB && FastTLABRefill && id != new_instance_id) {
+ // We come here when TLAB allocation failed.
+ // In this case we either refill TLAB or allocate directly from eden.
+ Label retry_tlab, try_eden, slow_case, slow_case_no_pop;
+
+ // Make sure the class is fully initialized
+ if (id == fast_new_instance_init_check_id) {
+ __ ldrb(result, Address(klass, InstanceKlass::init_state_offset()));
+ __ cmp(result, InstanceKlass::fully_initialized);
+ __ b(slow_case_no_pop, ne);
+ }
+
+ // Free some temporary registers
+ const Register obj_size = R4;
+ const Register tmp1 = R5;
+ const Register tmp2 = LR;
+ const Register obj_end = Rtemp;
+
+ __ raw_push(R4, R5, LR);
+
+ __ tlab_refill(result, obj_size, tmp1, tmp2, obj_end, try_eden, slow_case);
+
+ __ bind(retry_tlab);
+ __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
+ __ tlab_allocate(result, obj_end, tmp1, obj_size, slow_case); // initializes result and obj_end
+ __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
+ instanceOopDesc::header_size() * HeapWordSize, -1,
+ /* is_tlab_allocated */ true);
+ __ raw_pop_and_ret(R4, R5);
+
+ __ bind(try_eden);
+ __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
+ __ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case); // initializes result and obj_end
+ __ incr_allocated_bytes(obj_size, tmp2);
+ __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
+ instanceOopDesc::header_size() * HeapWordSize, -1,
+ /* is_tlab_allocated */ false);
+ __ raw_pop_and_ret(R4, R5);
+
+ __ bind(slow_case);
+ __ raw_pop(R4, R5, LR);
+
+ __ bind(slow_case_no_pop);
+ }
+
+ OopMap* map = save_live_registers(sasm);
+ int call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+
+ restore_live_registers_except_R0(sasm);
+ }
+ break;
+
+ case counter_overflow_id:
+ {
+ OopMap* oop_map = save_live_registers(sasm);
+ __ ldr(R1, Address(SP, arg1_offset));
+ __ ldr(R2, Address(SP, arg2_offset));
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), R1, R2);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+ restore_live_registers(sasm);
+ }
+ break;
+
+ case new_type_array_id:
+ case new_object_array_id:
+ {
+ if (id == new_type_array_id) {
+ __ set_info("new_type_array", dont_gc_arguments);
+ } else {
+ __ set_info("new_object_array", dont_gc_arguments);
+ }
+
+ const Register result = R0;
+ const Register klass = R1;
+ const Register length = R2;
+
+ if (UseTLAB && FastTLABRefill) {
+ // We come here when TLAB allocation failed.
+ // In this case we either refill TLAB or allocate directly from eden.
+ Label retry_tlab, try_eden, slow_case, slow_case_no_pop;
+
+#ifdef AARCH64
+ __ mov_slow(Rtemp, C1_MacroAssembler::max_array_allocation_length);
+ __ cmp_32(length, Rtemp);
+#else
+ __ cmp_32(length, C1_MacroAssembler::max_array_allocation_length);
+#endif // AARCH64
+ __ b(slow_case_no_pop, hs);
+
+ // Free some temporary registers
+ const Register arr_size = R4;
+ const Register tmp1 = R5;
+ const Register tmp2 = LR;
+ const Register tmp3 = Rtemp;
+ const Register obj_end = tmp3;
+
+ __ raw_push(R4, R5, LR);
+
+ __ tlab_refill(result, arr_size, tmp1, tmp2, tmp3, try_eden, slow_case);
+
+ __ bind(retry_tlab);
+ // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size)
+ __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset()));
+ __ mov(arr_size, MinObjAlignmentInBytesMask);
+ __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift));
+
+#ifdef AARCH64
+ __ lslv_w(tmp3, length, tmp1);
+ __ add(arr_size, arr_size, tmp3);
+#else
+ __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1));
+#endif // AARCH64
+
+ __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
+ __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes);
+
+ // tlab_allocate initializes result and obj_end, and preserves tmp2 which contains header_size
+ __ tlab_allocate(result, obj_end, tmp1, arr_size, slow_case);
+
+ assert_different_registers(result, obj_end, klass, length, tmp1, tmp2);
+ __ initialize_header(result, klass, length, tmp1);
+
+ __ add(tmp2, result, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
+ if (!ZeroTLAB) {
+ __ initialize_body(tmp2, obj_end, tmp1);
+ }
+
+ __ membar(MacroAssembler::StoreStore, tmp1);
+
+ __ raw_pop_and_ret(R4, R5);
+
+ __ bind(try_eden);
+ // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size)
+ __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset()));
+ __ mov(arr_size, MinObjAlignmentInBytesMask);
+ __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift));
+
+#ifdef AARCH64
+ __ lslv_w(tmp3, length, tmp1);
+ __ add(arr_size, arr_size, tmp3);
+#else
+ __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1));
+#endif // AARCH64
+
+ __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
+ __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes);
+
+ // eden_allocate destroys tmp2, so reload header_size after allocation
+ // eden_allocate initializes result and obj_end
+ __ eden_allocate(result, obj_end, tmp1, tmp2, arr_size, slow_case);
+ __ incr_allocated_bytes(arr_size, tmp2);
+ __ ldrb(tmp2, Address(klass, in_bytes(Klass::layout_helper_offset()) +
+ Klass::_lh_header_size_shift / BitsPerByte));
+ __ initialize_object(result, obj_end, klass, length, tmp1, tmp2, tmp2, -1, /* is_tlab_allocated */ false);
+ __ raw_pop_and_ret(R4, R5);
+
+ __ bind(slow_case);
+ __ raw_pop(R4, R5, LR);
+ __ bind(slow_case_no_pop);
+ }
+
+ OopMap* map = save_live_registers(sasm);
+ int call_offset;
+ if (id == new_type_array_id) {
+ call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+ } else {
+ call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+ }
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+
+ restore_live_registers_except_R0(sasm);
+ }
+ break;
+
+ case new_multi_array_id:
+ {
+ __ set_info("new_multi_array", dont_gc_arguments);
+
+ // R0: klass
+ // R2: rank
+ // SP: address of 1st dimension
+ const Register result = R0;
+ OopMap* map = save_live_registers(sasm);
+
+ __ mov(R1, R0);
+ __ add(R3, SP, arg1_offset);
+ int call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_multi_array), R1, R2, R3);
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+
+ restore_live_registers_except_R0(sasm);
+ }
+ break;
+
+ case register_finalizer_id:
+ {
+ __ set_info("register_finalizer", dont_gc_arguments);
+
+ // Do not call runtime if JVM_ACC_HAS_FINALIZER flag is not set
+ __ load_klass(Rtemp, R0);
+ __ ldr_u32(Rtemp, Address(Rtemp, Klass::access_flags_offset()));
+
+#ifdef AARCH64
+ Label L;
+ __ tbnz(Rtemp, exact_log2(JVM_ACC_HAS_FINALIZER), L);
+ __ ret();
+ __ bind(L);
+#else
+ __ tst(Rtemp, JVM_ACC_HAS_FINALIZER);
+ __ bx(LR, eq);
+#endif // AARCH64
+
+ // Call VM
+ OopMap* map = save_live_registers(sasm);
+ oop_maps = new OopMapSet();
+ int call_offset = __ call_RT(noreg, noreg,
+ CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), R0);
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers(sasm);
+ }
+ break;
+
+ case throw_range_check_failed_id:
+ {
+ __ set_info("range_check_failed", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+ }
+ break;
+
+ case throw_index_exception_id:
+ {
+ __ set_info("index_range_check_failed", dont_gc_arguments);
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+ }
+ break;
+
+ case throw_div0_exception_id:
+ {
+ __ set_info("throw_div0_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+ }
+ break;
+
+ case throw_null_pointer_exception_id:
+ {
+ __ set_info("throw_null_pointer_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+ }
+ break;
+
+ case handle_exception_nofpu_id:
+ case handle_exception_id:
+ {
+ __ set_info("handle_exception", dont_gc_arguments);
+ oop_maps = generate_handle_exception(id, sasm);
+ }
+ break;
+
+ case handle_exception_from_callee_id:
+ {
+ __ set_info("handle_exception_from_callee", dont_gc_arguments);
+ oop_maps = generate_handle_exception(id, sasm);
+ }
+ break;
+
+ case unwind_exception_id:
+ {
+ __ set_info("unwind_exception", dont_gc_arguments);
+ generate_unwind_exception(sasm);
+ }
+ break;
+
+ case throw_array_store_exception_id:
+ {
+ __ set_info("throw_array_store_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+ }
+ break;
+
+ case throw_class_cast_exception_id:
+ {
+ __ set_info("throw_class_cast_exception", dont_gc_arguments);
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+ }
+ break;
+
+ case throw_incompatible_class_change_error_id:
+ {
+ __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+ }
+ break;
+
+ case slow_subtype_check_id:
+ {
+ // (in) R0 - sub, destroyed,
+ // (in) R1 - super, not changed
+ // (out) R0 - result: 1 if check passed, 0 otherwise
+ __ raw_push(R2, R3, LR);
+
+ // Load an array of secondary_supers
+ __ ldr(R2, Address(R0, Klass::secondary_supers_offset()));
+ // Length goes to R3
+ __ ldr_s32(R3, Address(R2, Array<Klass*>::length_offset_in_bytes()));
+ __ add(R2, R2, Array<Klass*>::base_offset_in_bytes());
+
+ Label loop, miss;
+ __ bind(loop);
+ __ cbz(R3, miss);
+ __ ldr(LR, Address(R2, wordSize, post_indexed));
+ __ sub(R3, R3, 1);
+ __ cmp(LR, R1);
+ __ b(loop, ne);
+
+ // We get here if an equal cache entry is found
+ __ str(R1, Address(R0, Klass::secondary_super_cache_offset()));
+ __ mov(R0, 1);
+ __ raw_pop_and_ret(R2, R3);
+
+ // A cache entry not found - return false
+ __ bind(miss);
+ __ mov(R0, 0);
+ __ raw_pop_and_ret(R2, R3);
+ }
+ break;
+
+ case monitorenter_nofpu_id:
+ save_fpu_registers = false;
+ // fall through
+ case monitorenter_id:
+ {
+ __ set_info("monitorenter", dont_gc_arguments);
+ const Register obj = R1;
+ const Register lock = R2;
+ OopMap* map = save_live_registers(sasm, save_fpu_registers);
+ __ ldr(obj, Address(SP, arg1_offset));
+ __ ldr(lock, Address(SP, arg2_offset));
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), obj, lock);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers(sasm, save_fpu_registers);
+ }
+ break;
+
+ case monitorexit_nofpu_id:
+ save_fpu_registers = false;
+ // fall through
+ case monitorexit_id:
+ {
+ __ set_info("monitorexit", dont_gc_arguments);
+ const Register lock = R1;
+ OopMap* map = save_live_registers(sasm, save_fpu_registers);
+ __ ldr(lock, Address(SP, arg1_offset));
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), lock);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, map);
+ restore_live_registers(sasm, save_fpu_registers);
+ }
+ break;
+
+ case deoptimize_id:
+ {
+ __ set_info("deoptimize", dont_gc_arguments);
+ OopMap* oop_map = save_live_registers(sasm);
+ const Register trap_request = R1;
+ __ ldr(trap_request, Address(SP, arg1_offset));
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), trap_request);
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+ restore_live_registers_without_return(sasm);
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+ __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg));
+ }
+ break;
+
+ case access_field_patching_id:
+ {
+ __ set_info("access_field_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+ }
+ break;
+
+ case load_klass_patching_id:
+ {
+ __ set_info("load_klass_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+ }
+ break;
+
+ case load_appendix_patching_id:
+ {
+ __ set_info("load_appendix_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+ }
+ break;
+
+ case load_mirror_patching_id:
+ {
+ __ set_info("load_mirror_patching", dont_gc_arguments);
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+ }
+ break;
+
+ case predicate_failed_trap_id:
+ {
+ __ set_info("predicate_failed_trap", dont_gc_arguments);
+
+ OopMap* oop_map = save_live_registers(sasm);
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+
+ oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(call_offset, oop_map);
+
+ restore_live_registers_without_return(sasm);
+
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
+ __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp);
+ }
+ break;
+
+ default:
+ {
+ __ set_info("unimplemented entry", dont_gc_arguments);
+ STOP("unimplemented entry");
+ }
+ break;
+ }
+ return oop_maps;
+}
+
+#undef __
+
+#ifdef __SOFTFP__
+const char *Runtime1::pd_name_for_address(address entry) {
+
+#define FUNCTION_CASE(a, f) \
+ if ((intptr_t)a == CAST_FROM_FN_PTR(intptr_t, f)) return #f
+
+ FUNCTION_CASE(entry, __aeabi_fadd_glibc);
+ FUNCTION_CASE(entry, __aeabi_fmul);
+ FUNCTION_CASE(entry, __aeabi_fsub_glibc);
+ FUNCTION_CASE(entry, __aeabi_fdiv);
+
+ // __aeabi_XXXX_glibc: Imported code from glibc soft-fp bundle for calculation accuracy improvement. See CR 6757269.
+ FUNCTION_CASE(entry, __aeabi_dadd_glibc);
+ FUNCTION_CASE(entry, __aeabi_dmul);
+ FUNCTION_CASE(entry, __aeabi_dsub_glibc);
+ FUNCTION_CASE(entry, __aeabi_ddiv);
+
+ FUNCTION_CASE(entry, __aeabi_f2d);
+ FUNCTION_CASE(entry, __aeabi_d2f);
+ FUNCTION_CASE(entry, __aeabi_i2f);
+ FUNCTION_CASE(entry, __aeabi_i2d);
+ FUNCTION_CASE(entry, __aeabi_f2iz);
+
+ FUNCTION_CASE(entry, SharedRuntime::fcmpl);
+ FUNCTION_CASE(entry, SharedRuntime::fcmpg);
+ FUNCTION_CASE(entry, SharedRuntime::dcmpl);
+ FUNCTION_CASE(entry, SharedRuntime::dcmpg);
+
+ FUNCTION_CASE(entry, SharedRuntime::unordered_fcmplt);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_dcmplt);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_fcmple);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_dcmple);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_fcmpge);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_dcmpge);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_fcmpgt);
+ FUNCTION_CASE(entry, SharedRuntime::unordered_dcmpgt);
+
+ FUNCTION_CASE(entry, SharedRuntime::fneg);
+ FUNCTION_CASE(entry, SharedRuntime::dneg);
+
+ FUNCTION_CASE(entry, __aeabi_fcmpeq);
+ FUNCTION_CASE(entry, __aeabi_fcmplt);
+ FUNCTION_CASE(entry, __aeabi_fcmple);
+ FUNCTION_CASE(entry, __aeabi_fcmpge);
+ FUNCTION_CASE(entry, __aeabi_fcmpgt);
+
+ FUNCTION_CASE(entry, __aeabi_dcmpeq);
+ FUNCTION_CASE(entry, __aeabi_dcmplt);
+ FUNCTION_CASE(entry, __aeabi_dcmple);
+ FUNCTION_CASE(entry, __aeabi_dcmpge);
+ FUNCTION_CASE(entry, __aeabi_dcmpgt);
+#undef FUNCTION_CASE
+ return "";
+}
+#else // __SOFTFP__
+const char *Runtime1::pd_name_for_address(address entry) {
+ return "<unknown function>";
+}
+#endif // __SOFTFP__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c1_globals_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C1_GLOBALS_ARM_HPP
+#define CPU_ARM_VM_C1_GLOBALS_ARM_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+//
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+//
+
+#ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version
+define_pd_global(bool, BackgroundCompilation, true );
+define_pd_global(bool, UseTLAB, true );
+define_pd_global(bool, ResizeTLAB, true );
+define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps, false);
+define_pd_global(bool, UseOnStackReplacement, true );
+define_pd_global(bool, TieredCompilation, false);
+define_pd_global(intx, CompileThreshold, 1500 );
+
+define_pd_global(intx, OnStackReplacePercentage, 933 );
+define_pd_global(intx, FreqInlineSize, 325 );
+define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
+define_pd_global(size_t, InitialCodeCacheSize, 160*K);
+define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
+define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
+define_pd_global(size_t, ProfiledCodeHeapSize, 14*M );
+define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
+define_pd_global(bool, ProfileInterpreter, false);
+define_pd_global(size_t, CodeCacheExpansionSize, 32*K );
+define_pd_global(uintx, CodeCacheMinBlockLength, 1);
+define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
+define_pd_global(size_t, MetaspaceSize, 12*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true);
+define_pd_global(uint64_t, MaxRAM, 1ULL*G);
+define_pd_global(bool, CICompileOSR, true );
+#endif // COMPILER2
+define_pd_global(bool, UseTypeProfile, false);
+define_pd_global(bool, RoundFPResults, false);
+
+
+define_pd_global(bool, LIRFillDelaySlots, false);
+define_pd_global(bool, OptimizeSinglePrecision, true);
+define_pd_global(bool, CSEArrayLength, true);
+define_pd_global(bool, TwoOperandLIRForm, false);
+
+#endif // CPU_ARM_VM_C1_GLOBALS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/c2_globals_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_C2_GLOBALS_ARM_HPP
+#define CPU_ARM_VM_C2_GLOBALS_ARM_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+//
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp). Alpha-sorted.
+
+define_pd_global(bool, BackgroundCompilation, true);
+define_pd_global(bool, CICompileOSR, true);
+define_pd_global(bool, InlineIntrinsics, false);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps, true);
+define_pd_global(bool, UseOnStackReplacement, true);
+define_pd_global(bool, ProfileInterpreter, true);
+#ifdef AARCH64
+define_pd_global(bool, TieredCompilation, trueInTiered);
+#else
+define_pd_global(bool, TieredCompilation, false);
+#endif
+define_pd_global(intx, CompileThreshold, 10000);
+
+define_pd_global(intx, OnStackReplacePercentage, 140);
+define_pd_global(intx, ConditionalMoveLimit, 4);
+// C2 gets to use all the float/double registers
+#ifdef AARCH64
+define_pd_global(intx, FLOATPRESSURE, 31);
+#else
+define_pd_global(intx, FLOATPRESSURE, 30);
+#endif
+define_pd_global(intx, FreqInlineSize, 175);
+#ifdef AARCH64
+define_pd_global(intx, INTPRESSURE, 27);
+#else
+define_pd_global(intx, INTPRESSURE, 12);
+#endif
+define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment
+define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+// The default setting 16/16 seems to work best.
+// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
+//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize
+define_pd_global(intx, RegisterCostAreaRatio, 16000);
+define_pd_global(bool, UseTLAB, true);
+define_pd_global(bool, ResizeTLAB, true);
+define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1
+define_pd_global(intx, LoopPercentProfileLimit, 10);
+define_pd_global(intx, PostLoopMultiversioning, false);
+define_pd_global(intx, MinJumpTableSize, 16);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole, false);
+define_pd_global(bool, UseCISCSpill, false);
+define_pd_global(bool, OptoBundling, false);
+define_pd_global(bool, OptoScheduling, true);
+define_pd_global(bool, OptoRegScheduling, false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
+define_pd_global(bool, IdealizeClearArrayNode, true);
+
+#ifdef _LP64
+// We need to make sure that all generated code is within
+// 2 gigs of the libjvm.so runtime routines so we can use
+// the faster "call" instruction rather than the expensive
+// sequence of instructions to load a 64 bit pointer.
+//
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(size_t, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(size_t, ReservedCodeCacheSize, 48*M);
+define_pd_global(size_t, NonProfiledCodeHeapSize, 21*M);
+define_pd_global(size_t, ProfiledCodeHeapSize, 22*M);
+define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
+define_pd_global(size_t, CodeCacheExpansionSize, 64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t, MaxRAM, 128ULL*G);
+#else
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(size_t, InitialCodeCacheSize, 1536*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(size_t, ReservedCodeCacheSize, 32*M);
+define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M);
+define_pd_global(size_t, ProfiledCodeHeapSize, 14*M);
+define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
+define_pd_global(size_t, CodeCacheExpansionSize, 32*K);
+// Ergonomics related flags
+define_pd_global(uint64_t, MaxRAM, 4ULL*G);
+#endif
+define_pd_global(uintx, CodeCacheMinBlockLength, 4);
+define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
+
+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed
+
+// Heap related flags
+define_pd_global(size_t, MetaspaceSize, ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_ARM_VM_C2_GLOBALS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/codeBuffer_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_CODEBUFFER_ARM_HPP
+#define CPU_ARM_VM_CODEBUFFER_ARM_HPP
+
+private:
+ void pd_initialize() {}
+
+public:
+ void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_ARM_VM_CODEBUFFER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/compiledIC_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// ----------------------------------------------------------------------------
+#if defined(COMPILER2) || INCLUDE_JVMCI
+#define __ _masm.
+// emit call stub, compiled java to interpreter
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+ // Stub is fixed up when the corresponding call is converted from calling
+ // compiled code to calling interpreted code.
+ // set (empty), R9
+ // b -1
+
+ if (mark == NULL) {
+ mark = cbuf.insts_mark(); // get mark within main instrs section
+ }
+
+ MacroAssembler _masm(&cbuf);
+
+ address base = __ start_a_stub(to_interp_stub_size());
+ if (base == NULL) {
+ return NULL; // CodeBuffer::expand failed
+ }
+
+ // static stub relocation stores the instruction address of the call
+ __ relocate(static_stub_Relocation::spec(mark));
+
+ InlinedMetadata object_literal(NULL);
+ // single instruction, see NativeMovConstReg::next_instruction_address() in
+ // CompiledStaticCall::set_to_interpreted()
+ __ ldr_literal(Rmethod, object_literal);
+
+ __ set_inst_mark(); // Who uses this?
+
+ bool near_range = __ cache_fully_reachable();
+ InlinedAddress dest((address)-1);
+ address branch_site = __ pc();
+ if (near_range) {
+ __ b(branch_site); // special NativeJump -1 destination
+ } else {
+ // Can't trash LR, FP, or argument registers
+ __ indirect_jump(dest, Rtemp);
+ }
+ __ bind_literal(object_literal); // includes spec_for_immediate reloc
+ if (!near_range) {
+ __ bind_literal(dest); // special NativeJump -1 destination
+ }
+
+ assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
+
+ // Update current stubs pointer and restore code_end.
+ __ end_a_stub();
+ return base;
+}
+#undef __
+
+// size of C2 call stub, compiled java to interpretor
+int CompiledStaticCall::to_interp_stub_size() {
+ return 8 * NativeInstruction::instruction_size;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+ return 10; // 4 in emit_to_interp_stub + 1 in Java_Static_Call
+}
+#endif // COMPILER2 || JVMCI
+
+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
+ address stub = find_stub(/*is_aot*/ false);
+ guarantee(stub != NULL, "stub not found");
+
+ if (TraceICs) {
+ ResourceMark rm;
+ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+ p2i(instruction_address()),
+ callee->name_and_sig_as_C_string());
+ }
+
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+#ifdef ASSERT
+ // read the value once
+ volatile intptr_t data = method_holder->data();
+ volatile address destination = jump->jump_destination();
+ assert(data == 0 || data == (intptr_t)callee(),
+ "a) MT-unsafe modification of inline cache");
+ assert(destination == (address)-1 || destination == entry,
+ "b) MT-unsafe modification of inline cache");
+#endif
+
+ // Update stub.
+ method_holder->set_data((intptr_t)callee());
+ jump->set_jump_destination(entry);
+
+ // Update jump to call.
+ set_destination_mt_safe(stub);
+}
+
+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+ // Reset stub.
+ address stub = static_stub->addr();
+ assert(stub != NULL, "stub not found");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+ method_holder->set_data(0);
+ jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledDirectStaticCall::verify() {
+ // Verify call.
+ _call->verify();
+ if (os::is_MP()) {
+ _call->verify_alignment();
+ }
+
+ // Verify stub.
+ address stub = find_stub(/*is_aot*/ false);
+ assert(stub != NULL, "no stub found for static call");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+ // Verify state.
+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/copy_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_COPY_ARM_HPP
+#define CPU_ARM_VM_COPY_ARM_HPP
+
+#include "utilities/macros.hpp"
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#include OS_CPU_HEADER_INLINE(copy)
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+ juint* to = (juint*)tohw;
+ count *= HeapWordSize / BytesPerInt;
+ while (count-- > 0) {
+ *to++ = value;
+ }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+ pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+ memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+ pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+ memset(to, 0, count);
+}
+
+#endif // CPU_ARM_VM_COPY_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/debug_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+
+void pd_ps(frame f) {}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/depChecker_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "depChecker_arm.hpp"
+
+// Nothing to do
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/depChecker_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_DEPCHECKER_ARM_HPP
+#define CPU_ARM_VM_DEPCHECKER_ARM_HPP
+
+// Nothing to do
+
+#endif // CPU_ARM_VM_DEPCHECKER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/disassembler_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_DISASSEMBLER_ARM_HPP
+#define CPU_ARM_VM_DISASSEMBLER_ARM_HPP
+
+ static int pd_instruction_alignment() {
+ return sizeof(int);
+ }
+
+ static const char* pd_cpu_opts() {
+ return "";
+ }
+
+#endif // CPU_ARM_VM_DISASSEMBLER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/frame_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_arm.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+#include "prims/methodHandles.hpp"
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+ address sp = (address)_sp;
+ address fp = (address)_fp;
+ address unextended_sp = (address)_unextended_sp;
+
+ static size_t stack_guard_size = os::uses_stack_guard_pages() ?
+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
+ size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+ // sp must be within the usable part of the stack (not in guards)
+ bool sp_safe = (sp != NULL &&
+ (sp <= thread->stack_base()) &&
+ (sp >= thread->stack_base() - usable_stack_size));
+
+ if (!sp_safe) {
+ return false;
+ }
+
+ bool unextended_sp_safe = (unextended_sp != NULL &&
+ (unextended_sp <= thread->stack_base()) &&
+ (unextended_sp >= sp));
+ if (!unextended_sp_safe) {
+ return false;
+ }
+
+ // We know sp/unextended_sp are safe. Only fp is questionable here.
+
+ bool fp_safe = (fp != NULL &&
+ (fp <= thread->stack_base()) &&
+ fp >= sp);
+
+ if (_cb != NULL ) {
+
+ // First check if frame is complete and tester is reliable
+ // Unfortunately we can only check frame complete for runtime stubs and nmethod
+ // other generic buffer blobs are more problematic so we just assume they are
+ // ok. adapter blobs never have a frame complete and are never ok.
+
+ if (!_cb->is_frame_complete_at(_pc)) {
+ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+ return false;
+ }
+ }
+
+ // Could just be some random pointer within the codeBlob
+ if (!_cb->code_contains(_pc)) {
+ return false;
+ }
+
+ // Entry frame checks
+ if (is_entry_frame()) {
+ // an entry frame must have a valid fp.
+ return fp_safe && is_entry_frame_valid(thread);
+ }
+
+ intptr_t* sender_sp = NULL;
+ address sender_pc = NULL;
+
+ if (is_interpreted_frame()) {
+ // fp must be safe
+ if (!fp_safe) {
+ return false;
+ }
+
+ sender_pc = (address) this->fp()[return_addr_offset];
+ sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+
+ } else {
+ // must be some sort of compiled/runtime frame
+ // fp does not have to be safe (although it could be check for c1?)
+
+ sender_sp = _unextended_sp + _cb->frame_size();
+ // Is sender_sp safe?
+ if ((address)sender_sp >= thread->stack_base()) {
+ return false;
+ }
+ // With our calling conventions, the return_address should
+ // end up being the word on the stack
+ sender_pc = (address) *(sender_sp - sender_sp_offset + return_addr_offset);
+ }
+
+ // We must always be able to find a recognizable pc
+ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+ if (sender_pc == NULL || sender_blob == NULL) {
+ return false;
+ }
+
+
+ // If the potential sender is the interpreter then we can do some more checking
+ if (Interpreter::contains(sender_pc)) {
+
+ // FP is always saved in a recognizable place in any code we generate. However
+ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
+ // is really a frame pointer.
+
+ intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset + link_offset);
+ bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp);
+
+ if (!saved_fp_safe) {
+ return false;
+ }
+
+ // construct the potential sender
+
+ frame sender(sender_sp, saved_fp, sender_pc);
+
+ return sender.is_interpreted_frame_valid(thread);
+ }
+
+ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+ return false;
+ }
+
+ // Could just be some random pointer within the codeBlob
+ if (!sender_blob->code_contains(sender_pc)) {
+ return false;
+ }
+
+ // We should never be able to see an adapter if the current frame is something from code cache
+ if (sender_blob->is_adapter_blob()) {
+ return false;
+ }
+
+ // Could be the call_stub
+ if (StubRoutines::returns_to_call_stub(sender_pc)) {
+ intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset + link_offset);
+ bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp >= sender_sp);
+
+ if (!saved_fp_safe) {
+ return false;
+ }
+
+ // construct the potential sender
+
+ frame sender(sender_sp, saved_fp, sender_pc);
+
+ // Validate the JavaCallWrapper an entry frame must have
+ address jcw = (address)sender.entry_frame_call_wrapper();
+
+ bool jcw_safe = (jcw <= thread->stack_base()) && (jcw > (address)sender.fp());
+
+ return jcw_safe;
+ }
+
+ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+ // because the return address counts against the callee's frame.
+
+ if (sender_blob->frame_size() <= 0) {
+ assert(!sender_blob->is_compiled(), "should count return address at least");
+ return false;
+ }
+
+ // We should never be able to see anything here except an nmethod. If something in the
+ // code cache (current frame) is called by an entity within the code cache that entity
+ // should not be anything but the call stub (already covered), the interpreter (already covered)
+ // or an nmethod.
+
+ if (!sender_blob->is_compiled()) {
+ return false;
+ }
+
+ // Could put some more validation for the potential non-interpreted sender
+ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+ // We've validated the potential sender that would be created
+ return true;
+ }
+
+ // Must be native-compiled frame. Since sender will try and use fp to find
+ // linkages it must be safe
+
+ if (!fp_safe) {
+ return false;
+ }
+
+ // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+ if ((address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+ // could try and do some more potential verification of native frame if we could think of some...
+
+ return true;
+}
+
+
+void frame::patch_pc(Thread* thread, address pc) {
+ address* pc_addr = &((address *)sp())[-sender_sp_offset+return_addr_offset];
+ if (TracePcPatching) {
+ tty->print_cr("patch_pc at address" INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "] ",
+ p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+ }
+ *pc_addr = pc;
+ _cb = CodeCache::find_blob(pc);
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ assert(original_pc == _pc, "expected original PC to be stored before patching");
+ _deopt_state = is_deoptimized;
+ // leave _pc as is
+ } else {
+ _deopt_state = not_deoptimized;
+ _pc = pc;
+ }
+}
+
+bool frame::is_interpreted_frame() const {
+ return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+ frame sender = this->sender(map);
+ return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+ assert(is_entry_frame(), "entry frame expected");
+ // convert offset to index to deal with tsi
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+ // Entry frame's arguments are always in relation to unextended_sp()
+ return &unextended_sp()[index];
+}
+
+// sender_sp
+intptr_t* frame::interpreter_frame_sender_sp() const {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+ // make sure the pointer points inside the frame
+ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer");
+ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
+ return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+#ifdef AARCH64
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_stack_top(intptr_t* stack_top) {
+ *((intptr_t**)addr_at(interpreter_frame_stack_top_offset)) = stack_top;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_extended_sp(intptr_t* sp) {
+ *((intptr_t**)addr_at(interpreter_frame_extended_sp_offset)) = sp;
+}
+
+#else
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+
+#endif // AARCH64
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+ assert(map != NULL, "map must be set");
+ // Java frame called from C; skip all C frames and return top C
+ // frame of that chunk as the sender
+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+ assert(!entry_frame_is_first(), "next Java fp must be non zero");
+ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+ map->clear();
+ assert(map->include_argument_oops(), "should be set by clear");
+#ifdef AARCH64
+ assert (jfa->last_Java_pc() != NULL, "pc should be stored");
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+ return fr;
+#else
+ if (jfa->last_Java_pc() != NULL) {
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+ return fr;
+ }
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+ return fr;
+#endif // AARCH64
+}
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP. The unextended SP might also be the saved SP
+// for MethodHandle call sites.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+ frame fr;
+
+ // This is ugly but it's better than to change {get,set}_original_pc
+ // to take an SP value as argument. And it's only a debugging
+ // method anyway.
+ fr._unextended_sp = unextended_sp;
+
+ address original_pc = nm->get_original_pc(&fr);
+ assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
+ assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+ // same as on x86
+
+ // If we are returning to a compiled MethodHandle call site, the
+ // saved_fp will in fact be a saved value of the unextended SP. The
+ // simplest way to tell whether we are returning to such a call site
+ // is as follows:
+
+ CompiledMethod* sender_cm = (_cb == NULL) ? NULL : _cb->as_compiled_method_or_null();
+ if (sender_cm != NULL) {
+ // If the sender PC is a deoptimization point, get the original
+ // PC. For MethodHandle call site the unextended_sp is stored in
+ // saved_fp.
+ if (sender_cm->is_deopt_mh_entry(_pc)) {
+ DEBUG_ONLY(verify_deopt_mh_original_pc(sender_cm, _fp));
+ _unextended_sp = _fp;
+ }
+ else if (sender_cm->is_deopt_entry(_pc)) {
+ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
+ }
+ else if (sender_cm->is_method_handle_return(_pc)) {
+ _unextended_sp = _fp;
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+ // see x86 for comments
+ map->set_location(FP->as_VMReg(), (address) link_addr);
+#ifdef AARCH64
+ // also adjust a high part of register
+ map->set_location(FP->as_VMReg()->next(), (address) link_addr);
+#endif // AARCH64
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+ // SP is the raw SP from the sender after adapter or interpreter
+ // extension.
+ intptr_t* sender_sp = this->sender_sp();
+
+ // This is the sp before any possible extension (adapter/locals).
+ intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+#ifdef COMPILER2
+ if (map->update_map()) {
+ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+ }
+#endif // COMPILER2
+
+ return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+ assert(map != NULL, "map must be set");
+
+ // frame owned by optimizing compiler
+ assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+ intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
+ intptr_t* unextended_sp = sender_sp;
+
+ address sender_pc = (address) *(sender_sp - sender_sp_offset + return_addr_offset);
+
+ // This is the saved value of FP which may or may not really be an FP.
+ // It is only an FP if the sender is an interpreter frame (or C1?).
+ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - sender_sp_offset + link_offset);
+
+ if (map->update_map()) {
+ // Tell GC to use argument oopmaps for some runtime stubs that need it.
+ // For C1, the runtime stub might not have oop maps, so set this flag
+ // outside of update_register_map.
+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+ if (_cb->oop_maps() != NULL) {
+ OopMapSet::update_register_map(this, map);
+ }
+
+ // Since the prolog does the save and restore of FP there is no oopmap
+ // for it so we must fill in its location as if there was an oopmap entry
+ // since if our caller was compiled code there could be live jvm state in it.
+ update_map_with_saved_link(map, saved_fp_addr);
+ }
+
+ assert(sender_sp != sp(), "must have changed");
+ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+frame frame::sender(RegisterMap* map) const {
+ // Default is we done have to follow them. The sender_for_xxx will
+ // update it accordingly
+ map->set_include_argument_oops(false);
+
+ if (is_entry_frame()) return sender_for_entry_frame(map);
+ if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+ if (_cb != NULL) {
+ return sender_for_compiled_frame(map);
+ }
+
+ assert(false, "should not be called for a C frame");
+ return frame();
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+ assert(is_interpreted_frame(), "Not an interpreted frame");
+ // These are reasonable sanity checks
+ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+ return false;
+ }
+ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+ return false;
+ }
+ if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+ return false;
+ }
+ // These are hacks to keep us out of trouble.
+ // The problem with these is that they mask other problems
+ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above
+ return false;
+ }
+ // do some validation of frame elements
+
+ // first the method
+
+ Method* m = *interpreter_frame_method_addr();
+
+ // validate the method we'd find in this potential sender
+ if (!m->is_valid_method()) return false;
+
+ // stack frames shouldn't be much larger than max_stack elements
+
+ if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
+ return false;
+ }
+
+ // validate bci/bcp
+
+ address bcp = interpreter_frame_bcp();
+ if (m->validate_bci_from_bcp(bcp) < 0) {
+ return false;
+ }
+
+ // validate ConstantPoolCache*
+ ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+ if (cp == NULL || !cp->is_metaspace_object()) return false;
+
+ // validate locals
+
+ address locals = (address) *interpreter_frame_locals_addr();
+
+ if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+ // We'd have to be pretty unlucky to be mislead at this point
+
+ return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ Method* method = interpreter_frame_method();
+ BasicType type = method->result_type();
+
+ intptr_t* res_addr;
+ if (method->is_native()) {
+ // Prior to calling into the runtime to report the method_exit both of
+ // the possible return value registers are saved.
+#ifdef AARCH64
+ // Return value registers are saved into the frame
+ if (type == T_FLOAT || type == T_DOUBLE) {
+ res_addr = addr_at(interpreter_frame_fp_saved_result_offset);
+ } else {
+ res_addr = addr_at(interpreter_frame_gp_saved_result_offset);
+ }
+#else
+ // Return value registers are pushed to the native stack
+ res_addr = (intptr_t*)sp();
+#ifdef __ABI_HARD__
+ // FP result is pushed onto a stack along with integer result registers
+ if (type == T_FLOAT || type == T_DOUBLE) {
+ res_addr += 2;
+ }
+#endif // __ABI_HARD__
+#endif // AARCH64
+ } else {
+ res_addr = (intptr_t*)interpreter_frame_tos_address();
+ }
+
+ switch (type) {
+ case T_OBJECT :
+ case T_ARRAY : {
+ oop obj;
+ if (method->is_native()) {
+ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+ } else {
+ obj = *(oop*)res_addr;
+ }
+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+ *oop_result = obj;
+ break;
+ }
+ case T_BOOLEAN : value_result->z = *(jboolean*)res_addr; break;
+ case T_BYTE : value_result->b = *(jbyte*)res_addr; break;
+ case T_CHAR : value_result->c = *(jchar*)res_addr; break;
+ case T_SHORT : value_result->s = *(jshort*)res_addr; break;
+ case T_INT : value_result->i = *(jint*)res_addr; break;
+ case T_LONG : value_result->j = *(jlong*)res_addr; break;
+ case T_FLOAT : value_result->f = *(jfloat*)res_addr; break;
+ case T_DOUBLE : value_result->d = *(jdouble*)res_addr; break;
+ case T_VOID : /* Nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+
+ return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+ return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+ values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+ if (is_interpreted_frame()) {
+ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+#ifdef AARCH64
+ DESCRIBE_FP_OFFSET(interpreter_frame_stack_top);
+ DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp);
+#else
+ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+#endif // AARCH64
+ DESCRIBE_FP_OFFSET(interpreter_frame_method);
+ DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+ DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+ DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+ }
+}
+
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+ init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+ // used to reset the saved FP
+ return fp();
+}
+
+intptr_t* frame::real_fp() const {
+#ifndef AARCH64
+ if (is_entry_frame()) {
+ // Work-around: FP (currently) does not conform to the ABI for entry
+ // frames (see generate_call_stub). Might be worth fixing as another CR.
+ // Following code assumes (and asserts) this has not yet been fixed.
+ assert(frame::entry_frame_call_wrapper_offset == 0, "adjust this code");
+ intptr_t* new_fp = fp();
+ new_fp += 5; // saved R0,R1,R2,R4,R10
+#ifndef __SOFTFP__
+ new_fp += 8*2; // saved D8..D15
+#endif
+ return new_fp;
+ }
+#endif // !AARCH64
+ if (_cb != NULL) {
+ // use the frame size if valid
+ int size = _cb->frame_size();
+ if (size > 0) {
+ return unextended_sp() + size;
+ }
+ }
+ // else rely on fp()
+ assert(! is_compiled_frame(), "unknown compiled frame size");
+ return fp();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/frame_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_FRAME_ARM_HPP
+#define CPU_ARM_VM_FRAME_ARM_HPP
+
+#include "runtime/synchronizer.hpp"
+
+ public:
+ enum {
+ pc_return_offset = 0,
+ // All frames
+ link_offset = 0,
+ return_addr_offset = 1,
+ // non-interpreter frames
+ sender_sp_offset = 2,
+
+ // Interpreter frames
+#ifdef AARCH64
+ interpreter_frame_gp_saved_result_offset = 4, // for native calls only
+ interpreter_frame_fp_saved_result_offset = 3, // for native calls only
+#endif
+ interpreter_frame_oop_temp_offset = 2, // for native calls only
+
+ interpreter_frame_sender_sp_offset = -1,
+#ifdef AARCH64
+ interpreter_frame_stack_top_offset = interpreter_frame_sender_sp_offset - 1,
+ interpreter_frame_extended_sp_offset = interpreter_frame_stack_top_offset - 1,
+ interpreter_frame_method_offset = interpreter_frame_extended_sp_offset - 1,
+#else
+ // outgoing sp before a call to an invoked method
+ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
+ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
+#endif // AARCH64
+ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1,
+ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1,
+ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1,
+ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
+ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
+ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1,
+
+ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset,
+ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset,
+
+ // Entry frames
+ entry_frame_call_wrapper_offset = AARCH64_ONLY(2) NOT_AARCH64(0)
+ };
+
+ intptr_t ptr_at(int offset) const {
+ return *ptr_at_addr(offset);
+ }
+
+ void ptr_at_put(int offset, intptr_t value) {
+ *ptr_at_addr(offset) = value;
+ }
+
+ private:
+ // an additional field beyond _sp and _pc:
+ intptr_t* _fp; // frame pointer
+ // The interpreter and adapters will extend the frame of the caller.
+ // Since oopMaps are based on the sp of the caller before extension
+ // we need to know that value. However in order to compute the address
+ // of the return address we need the real "raw" sp. Since sparc already
+ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+ // original sp we use that convention.
+
+ intptr_t* _unextended_sp;
+ void adjust_unextended_sp();
+
+ intptr_t* ptr_at_addr(int offset) const {
+ return (intptr_t*) addr_at(offset);
+ }
+
+#ifdef ASSERT
+ // Used in frame::sender_for_{interpreter,compiled}_frame
+ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
+ static void verify_deopt_mh_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
+ verify_deopt_original_pc(nm, unextended_sp, true);
+ }
+#endif
+
+ public:
+ // Constructors
+
+ frame(intptr_t* sp, intptr_t* fp, address pc);
+
+ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+#ifndef AARCH64
+ frame(intptr_t* sp, intptr_t* fp);
+#endif // !AARCH64
+
+ void init(intptr_t* sp, intptr_t* fp, address pc);
+
+ // accessors for the instance variables
+ // Note: not necessarily the real 'frame pointer' (see real_fp)
+ intptr_t* fp() const { return _fp; }
+
+ inline address* sender_pc_addr() const;
+
+#ifdef AARCH64
+ // Used by template based interpreter deoptimization
+ void interpreter_frame_set_stack_top(intptr_t* stack_top);
+ void interpreter_frame_set_extended_sp(intptr_t* sp);
+
+#else
+ // expression stack tos if we are nested in a java call
+ intptr_t* interpreter_frame_last_sp() const;
+
+ // deoptimization support
+ void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // AARCH64
+
+ // helper to update a map with callee-saved FP
+ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+#endif // CPU_ARM_VM_FRAME_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/frame_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_FRAME_ARM_INLINE_HPP
+#define CPU_ARM_VM_FRAME_ARM_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for ARM frames:
+
+// Constructors:
+
+inline frame::frame() {
+ _pc = NULL;
+ _sp = NULL;
+ _unextended_sp = NULL;
+ _fp = NULL;
+ _cb = NULL;
+ _deopt_state = unknown;
+}
+
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
+ _sp = sp;
+ _unextended_sp = sp;
+ _fp = fp;
+ _pc = pc;
+ assert(pc != NULL, "no pc?");
+ _cb = CodeCache::find_blob(pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+ init(sp, fp, pc);
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+ _sp = sp;
+ _unextended_sp = unextended_sp;
+ _fp = fp;
+ _pc = pc;
+ assert(pc != NULL, "no pc?");
+ _cb = CodeCache::find_blob(pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ assert(_cb->as_compiled_method()->insts_contains(_pc), "original PC must be in CompiledMethod");
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+#ifndef AARCH64
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+ _sp = sp;
+ _unextended_sp = sp;
+ _fp = fp;
+ assert(sp != NULL,"null SP ?");
+ _pc = (address)(sp[-1]);
+ // assert(_pc != NULL, "no pc?"); // see comments in x86
+ _cb = CodeCache::find_blob(_pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+#endif // !AARCH64
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+ bool ret = sp() == other.sp()
+ && unextended_sp() == other.unextended_sp()
+ && fp() == other.fp()
+ && pc() == other.pc();
+ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+ return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+ return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+ return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+
+inline intptr_t* frame::unextended_sp() const { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); }
+inline address frame::sender_pc() const { return *sender_pc_addr(); }
+
+inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+ return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+#ifndef AARCH64
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+#endif // !AARCH64
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+ return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+ return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+ return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+inline oop* frame::interpreter_frame_mirror_addr() const {
+ return (oop*)addr_at(interpreter_frame_mirror_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+#ifdef AARCH64
+ intptr_t* stack_top = (intptr_t*)*addr_at(interpreter_frame_stack_top_offset);
+ assert(stack_top != NULL, "should be stored before call");
+ assert(stack_top <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+ return stack_top;
+#else
+ intptr_t* last_sp = interpreter_frame_last_sp();
+ if (last_sp == NULL ) {
+ return sp();
+ } else {
+ // sp() may have been extended or shrunk by an adapter. At least
+ // check that we don't fall behind the legal region.
+ // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
+ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+ return last_sp;
+ }
+#endif // AARCH64
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+ return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+ return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+ return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+
+inline bool frame::volatile_across_calls(Register reg) {
+ return true;
+}
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+ oop* result_adr = (oop*) map->location(R0->as_VMReg());
+ guarantee(result_adr != NULL, "bad register save location");
+ return (*result_adr);
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+ oop* result_adr = (oop*) map->location(R0->as_VMReg());
+ guarantee(result_adr != NULL, "bad register save location");
+ *result_adr = obj;
+}
+
+#endif // CPU_ARM_VM_FRAME_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/globalDefinitions_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
+#define CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
+
+#ifdef AARCH64
+#define AARCH64_ONLY(code) code
+#define AARCH64_ONLY_ARG(arg) , arg
+#define NOT_AARCH64(code)
+#define NOT_AARCH64_ARG(arg)
+#else
+#define AARCH64_ONLY(code)
+#define AARCH64_ONLY_ARG(arg)
+#define NOT_AARCH64(code) code
+#define NOT_AARCH64_ARG(arg) , arg
+#endif
+
+const int StackAlignmentInBytes = AARCH64_ONLY(16) NOT_AARCH64(8);
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
+#ifdef __SOFTFP__
+const bool HaveVFP = false;
+#else
+const bool HaveVFP = true;
+#endif
+
+#if defined(__ARM_PCS_VFP) || defined(AARCH64)
+#define __ABI_HARD__
+#endif
+
+#if defined(__ARM_ARCH_7A__) || defined(AARCH64)
+#define SUPPORTS_NATIVE_CX8
+#endif
+
+#define STUBROUTINES_MD_HPP "stubRoutines_arm.hpp"
+#define INTERP_MASM_MD_HPP "interp_masm_arm.hpp"
+#define TEMPLATETABLE_MD_HPP "templateTable_arm.hpp"
+#ifdef AARCH64
+#define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_64.hpp"
+#define AD_MD_HPP "adfiles/ad_arm_64.hpp"
+#else
+#define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_32.hpp"
+#define AD_MD_HPP "adfiles/ad_arm_32.hpp"
+#endif
+#define C1_LIRGENERATOR_MD_HPP "c1_LIRGenerator_arm.hpp"
+
+#ifdef TARGET_COMPILER_gcc
+#ifdef ARM32
+#undef BREAKPOINT
+#define BREAKPOINT __asm__ volatile ("bkpt")
+#endif
+#endif
+
+#endif // CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/globals_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_GLOBALS_ARM_HPP
+#define CPU_ARM_VM_GLOBALS_ARM_HPP
+
+//
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+//
+
+define_pd_global(bool, ShareVtableStubs, true);
+
+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
+define_pd_global(bool, TrapBasedNullChecks, false); // Not needed
+
+define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
+define_pd_global(intx, CodeEntryAlignment, 16);
+define_pd_global(intx, OptoLoopAlignment, 16);
+
+define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this
+
+#define DEFAULT_STACK_YELLOW_PAGES (2)
+#define DEFAULT_STACK_RED_PAGES (1)
+#define DEFAULT_STACK_SHADOW_PAGES (5 DEBUG_ONLY(+1))
+#define DEFAULT_STACK_RESERVED_PAGES (0)
+
+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_RESERVED_PAGES (0)
+
+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
+
+define_pd_global(intx, InlineFrequencyCount, 50);
+#if defined(COMPILER1) || defined(COMPILER2)
+define_pd_global(intx, InlineSmallCode, 1500);
+#endif
+
+define_pd_global(bool, RewriteBytecodes, true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, UseMembar, true);
+
+define_pd_global(bool, PreserveFramePointer, false);
+
+// GC Ergo Flags
+define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread
+
+define_pd_global(uintx, TypeProfileLevel, 0);
+
+// No performance work done here yet.
+define_pd_global(bool, CompactStrings, false);
+
+define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
+
+#define ARCH_FLAGS(develop, \
+ product, \
+ diagnostic, \
+ experimental, \
+ notproduct, \
+ range, \
+ constraint, \
+ writeable) \
+ \
+ develop(bool, VerifyInterpreterStackTop, false, \
+ "Verify interpreter stack top at every stack expansion (AArch64 only)") \
+ \
+ develop(bool, ZapHighNonSignificantBits, false, \
+ "Zap high non-significant bits of values (AArch64 only)") \
+ \
+
+#endif // CPU_ARM_VM_GLOBALS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/icBuffer_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/oop.inline.hpp"
+
+#define __ masm->
+
+int InlineCacheBuffer::ic_stub_code_size() {
+ return (AARCH64_ONLY(8) NOT_AARCH64(4)) * Assembler::InstructionSize;
+}
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+ ResourceMark rm;
+ CodeBuffer code(code_begin, ic_stub_code_size());
+ MacroAssembler* masm = new MacroAssembler(&code);
+
+ InlinedAddress oop_literal((address) cached_value);
+ __ ldr_literal(Ricklass, oop_literal);
+ // FIXME: OK to remove reloc here?
+ __ patchable_jump(entry_point, relocInfo::runtime_call_type, Rtemp);
+ __ bind_literal(oop_literal);
+ __ flush();
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+ address jump_address;
+ jump_address = code_begin + NativeInstruction::instruction_size;
+ NativeJump* jump = nativeJump_at(jump_address);
+ return jump->jump_destination();
+}
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin);
+ return (void*)move->data();
+}
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/icache_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "runtime/icache.hpp"
+
+#define __ _masm->
+
+#ifdef AARCH64
+
+static int icache_flush(address addr, int lines, int magic) {
+ // TODO-AARCH64 Figure out actual cache line size (mrs Xt, CTR_EL0)
+
+ address p = addr;
+ for (int i = 0; i < lines; i++, p += ICache::line_size) {
+ __asm__ volatile(
+ " dc cvau, %[p]"
+ :
+ : [p] "r" (p)
+ : "memory");
+ }
+
+ __asm__ volatile(
+ " dsb ish"
+ : : : "memory");
+
+ p = addr;
+ for (int i = 0; i < lines; i++, p += ICache::line_size) {
+ __asm__ volatile(
+ " ic ivau, %[p]"
+ :
+ : [p] "r" (p)
+ : "memory");
+ }
+
+ __asm__ volatile(
+ " dsb ish\n\t"
+ " isb\n\t"
+ : : : "memory");
+
+ return magic;
+}
+
+#else
+
+static int icache_flush(address addr, int lines, int magic) {
+ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size));
+ return magic;
+}
+
+#endif // AARCH64
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+ address start = (address)icache_flush;
+
+ *flush_icache_stub = (ICache::flush_icache_stub_t)start;
+
+ // ICache::invalidate_range() contains explicit condition that the first
+ // call is invoked on the generated icache flush stub code range.
+ ICache::invalidate_range(start, 0);
+
+ {
+ // dummy code mark to make the shared code happy
+ // (fields that would need to be modified to emulate the correct
+ // mark are not accessible)
+ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
+ __ ret();
+ }
+}
+
+#undef __
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/icache_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_ICACHE_ARM_HPP
+#define CPU_ARM_VM_ICACHE_ARM_HPP
+
+// Interface for updating the instruction cache. Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+ enum {
+ stub_size = 32, // Size of the icache flush stub in bytes
+ line_size = BytesPerWord, // conservative
+ log2_line_size = LogBytesPerWord // log2(line_size)
+ };
+};
+
+#endif // CPU_ARM_VM_ICACHE_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/interp_masm_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,2272 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSet.inline.hpp"
+#include "gc/shared/cardTableModRefBS.inline.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interp_masm_arm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "logging/log.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+//--------------------------------------------------------------------
+// Implementation of InterpreterMacroAssembler
+
+
+
+
+InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {
+}
+
+void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+#if defined(ASSERT) && !defined(AARCH64)
+ // Ensure that last_sp is not filled.
+ { Label L;
+ ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+ cbz(Rtemp, L);
+ stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL");
+ bind(L);
+ }
+#endif // ASSERT && !AARCH64
+
+ // Rbcp must be saved/restored since it may change due to GC.
+ save_bcp();
+
+#ifdef AARCH64
+ check_no_cached_stack_top(Rtemp);
+ save_stack_top();
+ check_extended_sp(Rtemp);
+ cut_sp_before_call();
+#endif // AARCH64
+
+ // super call
+ MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions);
+
+#ifdef AARCH64
+ // Restore SP to extended SP
+ restore_sp_after_call(Rtemp);
+ check_stack_top();
+ clear_cached_stack_top();
+#endif // AARCH64
+
+ // Restore interpreter specific registers.
+ restore_bcp();
+ restore_method();
+}
+
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+ assert(entry, "Entry must have been generated by now");
+ b(entry);
+}
+
+void InterpreterMacroAssembler::check_and_handle_popframe() {
+ if (can_pop_frame()) {
+ Label L;
+ const Register popframe_cond = R2_tmp;
+
+ // Initiate popframe handling only if it is not already being processed. If the flag
+ // has the popframe_processing bit set, it means that this code is called *during* popframe
+ // handling - we don't want to reenter.
+
+ ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset()));
+ tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L);
+ tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L);
+
+ // Call Interpreter::remove_activation_preserving_args_entry() to get the
+ // address of the same-named entrypoint in the generated interpreter code.
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+
+ // Call indirectly to avoid generation ordering problem.
+ jump(R0);
+
+ bind(L);
+ }
+}
+
+
+// Blows R2, Rtemp. Sets TOS cached value.
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+ const Register thread_state = R2_tmp;
+
+ ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset()));
+
+ const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset());
+ const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset());
+ const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset());
+#ifndef AARCH64
+ const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset()
+ + in_ByteSize(wordSize));
+#endif // !AARCH64
+
+ Register zero = zero_register(Rtemp);
+
+ switch (state) {
+ case atos: ldr(R0_tos, oop_addr);
+ str(zero, oop_addr);
+ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+ break;
+
+#ifdef AARCH64
+ case ltos: ldr(R0_tos, val_addr); break;
+#else
+ case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through
+#endif // AARCH64
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos: ldr_s32(R0_tos, val_addr); break;
+#ifdef __SOFTFP__
+ case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through
+ case ftos: ldr(R0_tos, val_addr); break;
+#else
+ case ftos: ldr_float (S0_tos, val_addr); break;
+ case dtos: ldr_double(D0_tos, val_addr); break;
+#endif // __SOFTFP__
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+ // Clean up tos value in the thread object
+ str(zero, val_addr);
+#ifndef AARCH64
+ str(zero, val_addr_hi);
+#endif // !AARCH64
+
+ mov(Rtemp, (int) ilgl);
+ str_32(Rtemp, tos_addr);
+}
+
+
+// Blows R2, Rtemp.
+void InterpreterMacroAssembler::check_and_handle_earlyret() {
+ if (can_force_early_return()) {
+ Label L;
+ const Register thread_state = R2_tmp;
+
+ ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset()));
+ cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit;
+
+ // Initiate earlyret handling only if it is not already being processed.
+ // If the flag has the earlyret_processing bit set, it means that this code
+ // is called *during* earlyret handling - we don't want to reenter.
+
+ ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset()));
+ cmp(Rtemp, JvmtiThreadState::earlyret_pending);
+ b(L, ne);
+
+ // Call Interpreter::remove_activation_early_entry() to get the address of the
+ // same-named entrypoint in the generated interpreter code.
+
+ ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset()));
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0);
+
+ jump(R0);
+
+ bind(L);
+ }
+}
+
+
+// Sets reg. Blows Rtemp.
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
+ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+ assert(reg != Rtemp, "should be different registers");
+
+ ldrb(Rtemp, Address(Rbcp, bcp_offset));
+ ldrb(reg, Address(Rbcp, bcp_offset+1));
+ orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte));
+}
+
+void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) {
+ assert_different_registers(index, tmp_reg);
+ if (index_size == sizeof(u2)) {
+ // load bytes of index separately to avoid unaligned access
+ ldrb(index, Address(Rbcp, bcp_offset+1));
+ ldrb(tmp_reg, Address(Rbcp, bcp_offset));
+ orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
+ } else if (index_size == sizeof(u4)) {
+ // TODO-AARCH64: consider using unaligned access here
+ ldrb(index, Address(Rbcp, bcp_offset+3));
+ ldrb(tmp_reg, Address(Rbcp, bcp_offset+2));
+ orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
+ ldrb(tmp_reg, Address(Rbcp, bcp_offset+1));
+ orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
+ ldrb(tmp_reg, Address(Rbcp, bcp_offset));
+ orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
+ // Check if the secondary index definition is still ~x, otherwise
+ // we have to change the following assembler code to calculate the
+ // plain index.
+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+ mvn_32(index, index); // convert to plain index
+ } else if (index_size == sizeof(u1)) {
+ ldrb(index, Address(Rbcp, bcp_offset));
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+// Sets cache, index.
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) {
+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+ assert_different_registers(cache, index);
+
+ get_index_at_bcp(index, bcp_offset, cache, index_size);
+
+ // load constant pool cache pointer
+ ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize));
+
+ // convert from field index to ConstantPoolCacheEntry index
+ assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
+ // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called
+ logical_shift_left(index, index, 2);
+}
+
+// Sets cache, index, bytecode.
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) {
+ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+ // caution index and bytecode can be the same
+ add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord));
+#ifdef AARCH64
+ add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+ ldarb(bytecode, bytecode);
+#else
+ ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())));
+ TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true);
+#endif // AARCH64
+}
+
+// Sets cache. Blows reg_tmp.
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) {
+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+ assert_different_registers(cache, reg_tmp);
+
+ get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size);
+
+ // load constant pool cache pointer
+ ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize));
+
+ // skip past the header
+ add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+ // convert from field index to ConstantPoolCacheEntry index
+ // and from word offset to byte offset
+ assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
+ add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord));
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+ Register result, Register index) {
+ assert_different_registers(result, index);
+ get_constant_pool(result);
+
+ Register cache = result;
+ // load pointer for resolved_references[] objArray
+ ldr(cache, Address(result, ConstantPool::resolved_references_offset_in_bytes()));
+ // JNIHandles::resolve(result)
+ ldr(cache, Address(cache, 0));
+ // Add in the index
+ // convert from field index to resolved_references() index and from
+ // word index to byte offset. Since this is a java object, it can be compressed
+ add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop));
+ load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+// Generate a subtype check: branch to not_subtype if sub_klass is
+// not a subtype of super_klass.
+// Profiling code for the subtype check failure (profile_typecheck_failed)
+// should be explicitly generated by the caller in the not_subtype case.
+// Blows Rtemp, tmp1, tmp2.
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+ Register Rsuper_klass,
+ Label ¬_subtype,
+ Register tmp1,
+ Register tmp2) {
+
+ assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp);
+ Label ok_is_subtype, loop, update_cache;
+
+ const Register super_check_offset = tmp1;
+ const Register cached_super = tmp2;
+
+ // Profile the not-null value's klass.
+ profile_typecheck(tmp1, Rsub_klass);
+
+ // Load the super-klass's check offset into
+ ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset()));
+
+ // Check for self
+ cmp(Rsub_klass, Rsuper_klass);
+
+ // Load from the sub-klass's super-class display list, or a 1-word cache of
+ // the secondary superclass list, or a failing value with a sentinel offset
+ // if the super-klass is an interface or exceptionally deep in the Java
+ // hierarchy and we have to scan the secondary superclass list the hard way.
+ // See if we get an immediate positive hit
+ ldr(cached_super, Address(Rsub_klass, super_check_offset));
+
+ cond_cmp(Rsuper_klass, cached_super, ne);
+ b(ok_is_subtype, eq);
+
+ // Check for immediate negative hit
+ cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset()));
+ b(not_subtype, ne);
+
+ // Now do a linear scan of the secondary super-klass chain.
+ const Register supers_arr = tmp1;
+ const Register supers_cnt = tmp2;
+ const Register cur_super = Rtemp;
+
+ // Load objArrayOop of secondary supers.
+ ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset()));
+
+ ldr_u32(supers_cnt, Address(supers_arr, Array<Klass*>::length_offset_in_bytes())); // Load the array length
+#ifdef AARCH64
+ cbz(supers_cnt, not_subtype);
+ add(supers_arr, supers_arr, Array<Klass*>::base_offset_in_bytes());
+#else
+ cmp(supers_cnt, 0);
+
+ // Skip to the start of array elements and prefetch the first super-klass.
+ ldr(cur_super, Address(supers_arr, Array<Klass*>::base_offset_in_bytes(), pre_indexed), ne);
+ b(not_subtype, eq);
+#endif // AARCH64
+
+ bind(loop);
+
+#ifdef AARCH64
+ ldr(cur_super, Address(supers_arr, wordSize, post_indexed));
+#endif // AARCH64
+
+ cmp(cur_super, Rsuper_klass);
+ b(update_cache, eq);
+
+ subs(supers_cnt, supers_cnt, 1);
+
+#ifndef AARCH64
+ ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne);
+#endif // !AARCH64
+
+ b(loop, ne);
+
+ b(not_subtype);
+
+ bind(update_cache);
+ // Must be equal but missed in cache. Update cache.
+ str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset()));
+
+ bind(ok_is_subtype);
+}
+
+
+// The 1st part of the store check.
+// Sets card_table_base register.
+void InterpreterMacroAssembler::store_check_part1(Register card_table_base) {
+ // Check barrier set type (should be card table) and element size
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableForRS ||
+ bs->kind() == BarrierSet::CardTableExtension,
+ "Wrong barrier set kind");
+
+ CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code");
+
+ // Load card table base address.
+
+ /* Performance note.
+
+ There is an alternative way of loading card table base address
+ from thread descriptor, which may look more efficient:
+
+ ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset()));
+
+ However, performance measurements of micro benchmarks and specJVM98
+ showed that loading of card table base from thread descriptor is
+ 7-18% slower compared to loading of literal embedded into the code.
+ Possible cause is a cache miss (card table base address resides in a
+ rarely accessed area of thread descriptor).
+ */
+ // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64
+ mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference);
+}
+
+// The 2nd part of the store check.
+void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) {
+ assert_different_registers(obj, card_table_base, tmp);
+
+ assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations.");
+#ifdef AARCH64
+ add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift));
+ Address card_table_addr(card_table_base);
+#else
+ Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift);
+#endif
+
+ if (UseCondCardMark) {
+ if (UseConcMarkSweepGC) {
+ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg);
+ }
+ Label already_dirty;
+
+ ldrb(tmp, card_table_addr);
+ cbz(tmp, already_dirty);
+
+ set_card(card_table_base, card_table_addr, tmp);
+ bind(already_dirty);
+
+ } else {
+ if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg);
+ }
+ set_card(card_table_base, card_table_addr, tmp);
+ }
+}
+
+void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) {
+#ifdef AARCH64
+ strb(ZR, card_table_addr);
+#else
+ CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set());
+ if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) {
+ // Card table is aligned so the lowest byte of the table address base is zero.
+ // This works only if the code is not saved for later use, possibly
+ // in a context where the base would no longer be aligned.
+ strb(card_table_base, card_table_addr);
+ } else {
+ mov(tmp, 0);
+ strb(tmp, card_table_addr);
+ }
+#endif // AARCH64
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+// G1 pre-barrier.
+// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// If store_addr != noreg, then previous value is loaded from [store_addr];
+// in such case store_addr and new_val registers are preserved;
+// otherwise pre_val register is preserved.
+void InterpreterMacroAssembler::g1_write_barrier_pre(Register store_addr,
+ Register new_val,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ Label runtime;
+
+ if (store_addr != noreg) {
+ assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
+ } else {
+ assert (new_val == noreg, "should be");
+ assert_different_registers(pre_val, tmp1, tmp2, noreg);
+ }
+
+ Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_active()));
+ Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_buf()));
+
+ // Is marking active?
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
+ ldrb(tmp1, in_progress);
+ cbz(tmp1, done);
+
+ // Do we need to load the previous value?
+ if (store_addr != noreg) {
+ load_heap_oop(pre_val, Address(store_addr, 0));
+ }
+
+ // Is the previous value null?
+ cbz(pre_val, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+
+ ldr(tmp1, index); // tmp1 := *index_adr
+ ldr(tmp2, buffer);
+
+ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
+ b(runtime, lt); // If negative, goto runtime
+
+ str(tmp1, index); // *index_adr := tmp1
+
+ // Record the previous value
+ str(pre_val, Address(tmp2, tmp1));
+ b(done);
+
+ bind(runtime);
+
+ // save the live input values
+#ifdef AARCH64
+ if (store_addr != noreg) {
+ raw_push(store_addr, new_val);
+ } else {
+ raw_push(pre_val, ZR);
+ }
+#else
+ if (store_addr != noreg) {
+ // avoid raw_push to support any ordering of store_addr and new_val
+ push(RegisterSet(store_addr) | RegisterSet(new_val));
+ } else {
+ push(pre_val);
+ }
+#endif // AARCH64
+
+ if (pre_val != R0) {
+ mov(R0, pre_val);
+ }
+ mov(R1, Rthread);
+
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
+
+#ifdef AARCH64
+ if (store_addr != noreg) {
+ raw_pop(store_addr, new_val);
+ } else {
+ raw_pop(pre_val, ZR);
+ }
+#else
+ if (store_addr != noreg) {
+ pop(RegisterSet(store_addr) | RegisterSet(new_val));
+ } else {
+ pop(pre_val);
+ }
+#endif // AARCH64
+
+ bind(done);
+}
+
+// G1 post-barrier.
+// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+void InterpreterMacroAssembler::g1_write_barrier_post(Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+
+ Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_buf()));
+
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ Label done;
+ Label runtime;
+
+ // Does store cross heap regions?
+
+ eor(tmp1, store_addr, new_val);
+#ifdef AARCH64
+ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
+ cbz(tmp1, done);
+#else
+ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
+ b(done, eq);
+#endif
+
+ // crosses regions, storing NULL?
+
+ cbz(new_val, done);
+
+ // storing region crossing non-NULL, is card already dirty?
+ const Register card_addr = tmp1;
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+ mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference);
+ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift));
+
+ ldrb(tmp2, Address(card_addr));
+ cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ b(done, eq);
+
+ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
+
+ assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code");
+ ldrb(tmp2, Address(card_addr));
+ cbz(tmp2, done);
+
+ // storing a region crossing, non-NULL oop, card is clean.
+ // dirty card and log.
+
+ strb(zero_register(tmp2), Address(card_addr));
+
+ ldr(tmp2, queue_index);
+ ldr(tmp3, buffer);
+
+ subs(tmp2, tmp2, wordSize);
+ b(runtime, lt); // go to runtime if now negative
+
+ str(tmp2, queue_index);
+
+ str(card_addr, Address(tmp3, tmp2));
+ b(done);
+
+ bind(runtime);
+
+ if (card_addr != R0) {
+ mov(R0, card_addr);
+ }
+ mov(R1, Rthread);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
+
+ bind(done);
+}
+
+#endif // INCLUDE_ALL_GCS
+//////////////////////////////////////////////////////////////////////////////////
+
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ ldr(r, Address(Rstack_top, wordSize, post_indexed));
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ ldr_s32(r, Address(Rstack_top, wordSize, post_indexed));
+ zap_high_non_significant_bits(r);
+}
+
+#ifdef AARCH64
+void InterpreterMacroAssembler::pop_l(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ ldr(r, Address(Rstack_top, 2*wordSize, post_indexed));
+}
+#else
+void InterpreterMacroAssembler::pop_l(Register lo, Register hi) {
+ assert_different_registers(lo, hi);
+ assert(lo < hi, "lo must be < hi");
+ pop(RegisterSet(lo) | RegisterSet(hi));
+}
+#endif // AARCH64
+
+void InterpreterMacroAssembler::pop_f(FloatRegister fd) {
+#ifdef AARCH64
+ ldr_s(fd, Address(Rstack_top, wordSize, post_indexed));
+#else
+ fpops(fd);
+#endif // AARCH64
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister fd) {
+#ifdef AARCH64
+ ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed));
+#else
+ fpopd(fd);
+#endif // AARCH64
+}
+
+
+// Transition vtos -> state. Blows R0, R1. Sets TOS cached value.
+void InterpreterMacroAssembler::pop(TosState state) {
+ switch (state) {
+ case atos: pop_ptr(R0_tos); break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos: pop_i(R0_tos); break;
+#ifdef AARCH64
+ case ltos: pop_l(R0_tos); break;
+#else
+ case ltos: pop_l(R0_tos_lo, R1_tos_hi); break;
+#endif // AARCH64
+#ifdef __SOFTFP__
+ case ftos: pop_i(R0_tos); break;
+ case dtos: pop_l(R0_tos_lo, R1_tos_hi); break;
+#else
+ case ftos: pop_f(S0_tos); break;
+ case dtos: pop_d(D0_tos); break;
+#endif // __SOFTFP__
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ str(r, Address(Rstack_top, -wordSize, pre_indexed));
+ check_stack_top_on_expansion();
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ str_32(r, Address(Rstack_top, -wordSize, pre_indexed));
+ check_stack_top_on_expansion();
+}
+
+#ifdef AARCH64
+void InterpreterMacroAssembler::push_l(Register r) {
+ assert(r != Rstack_top, "unpredictable instruction");
+ stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed));
+ check_stack_top_on_expansion();
+}
+#else
+void InterpreterMacroAssembler::push_l(Register lo, Register hi) {
+ assert_different_registers(lo, hi);
+ assert(lo < hi, "lo must be < hi");
+ push(RegisterSet(lo) | RegisterSet(hi));
+}
+#endif // AARCH64
+
+void InterpreterMacroAssembler::push_f() {
+#ifdef AARCH64
+ str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed));
+ check_stack_top_on_expansion();
+#else
+ fpushs(S0_tos);
+#endif // AARCH64
+}
+
+void InterpreterMacroAssembler::push_d() {
+#ifdef AARCH64
+ str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed));
+ check_stack_top_on_expansion();
+#else
+ fpushd(D0_tos);
+#endif // AARCH64
+}
+
+// Transition state -> vtos. Blows Rtemp.
+void InterpreterMacroAssembler::push(TosState state) {
+ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+ switch (state) {
+ case atos: push_ptr(R0_tos); break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos: push_i(R0_tos); break;
+#ifdef AARCH64
+ case ltos: push_l(R0_tos); break;
+#else
+ case ltos: push_l(R0_tos_lo, R1_tos_hi); break;
+#endif // AARCH64
+#ifdef __SOFTFP__
+ case ftos: push_i(R0_tos); break;
+ case dtos: push_l(R0_tos_lo, R1_tos_hi); break;
+#else
+ case ftos: push_f(); break;
+ case dtos: push_d(); break;
+#endif // __SOFTFP__
+ case vtos: /* nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+}
+
+
+#ifndef AARCH64
+
+// Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value.
+void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) {
+#if (!defined __SOFTFP__ && !defined __ABI_HARD__)
+ // According to interpreter calling conventions, result is returned in R0/R1,
+ // but templates expect ftos in S0, and dtos in D0.
+ if (state == ftos) {
+ fmsr(S0_tos, R0);
+ } else if (state == dtos) {
+ fmdrr(D0_tos, R0, R1);
+ }
+#endif // !__SOFTFP__ && !__ABI_HARD__
+}
+
+// Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions).
+void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) {
+#if (!defined __SOFTFP__ && !defined __ABI_HARD__)
+ // According to interpreter calling conventions, result is returned in R0/R1,
+ // so ftos (S0) and dtos (D0) are moved to R0/R1.
+ if (state == ftos) {
+ fmrs(R0, S0_tos);
+ } else if (state == dtos) {
+ fmrrd(R0, R1, D0_tos);
+ }
+#endif // !__SOFTFP__ && !__ABI_HARD__
+}
+
+#endif // !AARCH64
+
+
+// Helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+ ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+ str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n)));
+}
+
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+#ifdef AARCH64
+ check_no_cached_stack_top(Rtemp);
+ save_stack_top();
+ cut_sp_before_call();
+ mov(Rparams, Rstack_top);
+#endif // AARCH64
+
+ // set sender sp
+ mov(Rsender_sp, SP);
+
+#ifndef AARCH64
+ // record last_sp
+ str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // !AARCH64
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
+ assert_different_registers(method, Rtemp);
+
+ prepare_to_jump_from_interpreted();
+
+ if (can_post_interpreter_events()) {
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+ // compiled code in threads for which the event is enabled. Check here for
+ // interp_only_mode if these events CAN be enabled.
+
+ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
+#ifdef AARCH64
+ {
+ Label not_interp_only_mode;
+
+ cbz(Rtemp, not_interp_only_mode);
+ indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp);
+
+ bind(not_interp_only_mode);
+ }
+#else
+ cmp(Rtemp, 0);
+ ldr(PC, Address(method, Method::interpreter_entry_offset()), ne);
+#endif // AARCH64
+ }
+
+ indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp);
+}
+
+
+void InterpreterMacroAssembler::restore_dispatch() {
+ mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos));
+}
+
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+ // Nothing ARM-specific to be done here.
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+ dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+ DispatchTableMode table_mode,
+ bool verifyoop) {
+ if (VerifyActivationFrameSize) {
+ Label L;
+#ifdef AARCH64
+ mov(Rtemp, SP);
+ sub(Rtemp, FP, Rtemp);
+#else
+ sub(Rtemp, FP, SP);
+#endif // AARCH64
+ int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize;
+ cmp(Rtemp, min_frame_size);
+ b(L, ge);
+ stop("broken stack frame");
+ bind(L);
+ }
+
+ if (verifyoop) {
+ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+ }
+
+ if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) {
+ zap_high_non_significant_bits(R0_tos);
+ }
+
+#ifdef ASSERT
+ Label L;
+ mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos));
+ cmp(Rtemp, RdispatchTable);
+ b(L, eq);
+ stop("invalid RdispatchTable");
+ bind(L);
+#endif
+
+ if (table_mode == DispatchDefault) {
+ if (state == vtos) {
+ indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp);
+ } else {
+#ifdef AARCH64
+ sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) -
+ Interpreter::distance_from_dispatch_table(state)));
+ indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp);
+#else
+ // on 32-bit ARM this method is faster than the one above.
+ sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) -
+ Interpreter::distance_from_dispatch_table(state)) * wordSize);
+ indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp);
+#endif
+ }
+ } else {
+ assert(table_mode == DispatchNormal, "invalid dispatch table mode");
+ address table = (address) Interpreter::normal_table(state);
+ mov_slow(Rtemp, table);
+ indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp);
+ }
+
+ nop(); // to avoid filling CPU pipeline with invalid instructions
+ nop();
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+ dispatch_base(state, DispatchDefault);
+}
+
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+ dispatch_base(state, DispatchNormal);
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+ dispatch_base(state, DispatchNormal, false);
+}
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+ // load next bytecode and advance Rbcp
+ ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed));
+ dispatch_base(state, DispatchDefault);
+}
+
+void InterpreterMacroAssembler::narrow(Register result) {
+ // mask integer result to narrower return type.
+ const Register Rtmp = R2;
+
+ // get method type
+ ldr(Rtmp, Address(Rmethod, Method::const_offset()));
+ ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset()));
+
+ Label notBool, notByte, notChar, done;
+ cmp(Rtmp, T_INT);
+ b(done, eq);
+
+ cmp(Rtmp, T_BOOLEAN);
+ b(notBool, ne);
+ and_32(result, result, 1);
+ b(done);
+
+ bind(notBool);
+ cmp(Rtmp, T_BYTE);
+ b(notByte, ne);
+ sign_extend(result, result, 8);
+ b(done);
+
+ bind(notByte);
+ cmp(Rtmp, T_CHAR);
+ b(notChar, ne);
+ zero_extend(result, result, 16);
+ b(done);
+
+ bind(notChar);
+ // cmp(Rtmp, T_SHORT);
+ // b(done, ne);
+ sign_extend(result, result, 16);
+
+ // Nothing to do
+ bind(done);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+// If throw_monitor_exception
+// throws IllegalMonitorStateException
+// Else if install_monitor_exception
+// installs IllegalMonitorStateException
+// Else
+// no error processing
+void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr,
+ bool throw_monitor_exception,
+ bool install_monitor_exception,
+ bool notify_jvmdi) {
+ Label unlock, unlocked, no_unlock;
+
+ // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result.
+
+ const Address do_not_unlock_if_synchronized(Rthread,
+ JavaThread::do_not_unlock_if_synchronized_offset());
+
+ const Register Rflag = R2;
+ const Register Raccess_flags = R3;
+
+ restore_method();
+
+ ldrb(Rflag, do_not_unlock_if_synchronized);
+
+ // get method access flags
+ ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset()));
+
+ strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag
+
+ // check if method is synchronized
+
+ tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked);
+
+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
+ cbnz(Rflag, no_unlock);
+
+ // unlock monitor
+ push(state); // save result
+
+ // BasicObjectLock will be first in list, since this is a synchronized method. However, need
+ // to check that the object has not been unlocked by an explicit monitorexit bytecode.
+
+ const Register Rmonitor = R1; // fixed in unlock_object()
+ const Register Robj = R2;
+
+ // address of first monitor
+ sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock));
+
+ ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes()));
+ cbnz(Robj, unlock);
+
+ pop(state);
+
+ if (throw_monitor_exception) {
+ // Entry already unlocked, need to throw exception
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
+ should_not_reach_here();
+ } else {
+ // Monitor already unlocked during a stack unroll.
+ // If requested, install an illegal_monitor_state_exception.
+ // Continue with stack unrolling.
+ if (install_monitor_exception) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
+ }
+ b(unlocked);
+ }
+
+
+ // Exception case for the check that all monitors are unlocked.
+ const Register Rcur = R2;
+ Label restart_check_monitors_unlocked, exception_monitor_is_still_locked;
+
+ bind(exception_monitor_is_still_locked);
+ // Monitor entry is still locked, need to throw exception.
+ // Rcur: monitor entry.
+
+ if (throw_monitor_exception) {
+ // Throw exception
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
+ should_not_reach_here();
+ } else {
+ // Stack unrolling. Unlock object and install illegal_monitor_exception
+ // Unlock does not block, so don't have to worry about the frame
+
+ push(state);
+ mov(R1, Rcur);
+ unlock_object(R1);
+
+ if (install_monitor_exception) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
+ }
+
+ pop(state);
+ b(restart_check_monitors_unlocked);
+ }
+
+ bind(unlock);
+ unlock_object(Rmonitor);
+ pop(state);
+
+ // Check that for block-structured locking (i.e., that all locked objects has been unlocked)
+ bind(unlocked);
+
+ // Check that all monitors are unlocked
+ {
+ Label loop;
+
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ const Register Rbottom = R3;
+ const Register Rcur_obj = Rtemp;
+
+ bind(restart_check_monitors_unlocked);
+
+ ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // points to current entry, starting with top-most entry
+ sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize);
+ // points to word before bottom of monitor block
+
+ cmp(Rcur, Rbottom); // check if there are no monitors
+#ifndef AARCH64
+ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+ // prefetch monitor's object
+#endif // !AARCH64
+ b(no_unlock, eq);
+
+ bind(loop);
+#ifdef AARCH64
+ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
+#endif // AARCH64
+ // check if current entry is used
+ cbnz(Rcur_obj, exception_monitor_is_still_locked);
+
+ add(Rcur, Rcur, entry_size); // otherwise advance to next entry
+ cmp(Rcur, Rbottom); // check if bottom reached
+#ifndef AARCH64
+ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+ // prefetch monitor's object
+#endif // !AARCH64
+ b(loop, ne); // if not at bottom then check this entry
+ }
+
+ bind(no_unlock);
+
+ // jvmti support
+ if (notify_jvmdi) {
+ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
+ } else {
+ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+ }
+
+ // remove activation
+#ifdef AARCH64
+ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+ ldp(FP, LR, Address(FP));
+ mov(SP, Rtemp);
+#else
+ mov(Rtemp, FP);
+ ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
+ ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize));
+#endif
+
+ if (ret_addr != LR) {
+ mov(ret_addr, LR);
+ }
+}
+
+
+// At certain points in the method invocation the monitor of
+// synchronized methods hasn't been entered yet.
+// To correctly handle exceptions at these points, we set the thread local
+// variable _do_not_unlock_if_synchronized to true. The remove_activation will
+// check this flag.
+void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) {
+ const Address do_not_unlock_if_synchronized(Rthread,
+ JavaThread::do_not_unlock_if_synchronized_offset());
+ if (flag) {
+ mov(tmp, 1);
+ strb(tmp, do_not_unlock_if_synchronized);
+ } else {
+ strb(zero_register(tmp), do_not_unlock_if_synchronized);
+ }
+}
+
+// Lock object
+//
+// Argument: R1 : Points to BasicObjectLock to be used for locking.
+// Must be initialized with object to lock.
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM.
+void InterpreterMacroAssembler::lock_object(Register Rlock) {
+ assert(Rlock == R1, "the second argument");
+
+ if (UseHeavyMonitors) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock);
+ } else {
+ Label done;
+
+ const Register Robj = R2;
+ const Register Rmark = R3;
+ assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp);
+
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
+
+ Label already_locked, slow_case;
+
+ // Load object pointer
+ ldr(Robj, Address(Rlock, obj_offset));
+
+ if (UseBiasedLocking) {
+ biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case);
+ }
+
+#ifdef AARCH64
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+ ldr(Rmark, Robj);
+
+ // Test if object is already locked
+ assert(markOopDesc::unlocked_value == 1, "adjust this code");
+ tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked);
+
+#else // AARCH64
+
+ // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
+ // That would be acceptable as ether CAS or slow case path is taken in that case.
+ // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as
+ // loads are satisfied from a store queue if performed on the same processor).
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+ ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes()));
+
+ // Test if object is already locked
+ tst(Rmark, markOopDesc::unlocked_value);
+ b(already_locked, eq);
+
+#endif // !AARCH64
+ // Save old object->mark() into BasicLock's displaced header
+ str(Rmark, Address(Rlock, mark_offset));
+
+ cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case);
+
+#ifndef PRODUCT
+ if (PrintBiasedLockingStatistics) {
+ cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr());
+ }
+#endif //!PRODUCT
+
+ b(done);
+
+ // If we got here that means the object is locked by ether calling thread or another thread.
+ bind(already_locked);
+ // Handling of locked objects: recursive locks and slow case.
+
+ // Fast check for recursive lock.
+ //
+ // Can apply the optimization only if this is a stack lock
+ // allocated in this thread. For efficiency, we can focus on
+ // recently allocated stack locks (instead of reading the stack
+ // base and checking whether 'mark' points inside the current
+ // thread stack):
+ // 1) (mark & 3) == 0
+ // 2) SP <= mark < SP + os::pagesize()
+ //
+ // Warning: SP + os::pagesize can overflow the stack base. We must
+ // neither apply the optimization for an inflated lock allocated
+ // just above the thread stack (this is why condition 1 matters)
+ // nor apply the optimization if the stack lock is inside the stack
+ // of another thread. The latter is avoided even in case of overflow
+ // because we have guard pages at the end of all stacks. Hence, if
+ // we go over the stack base and hit the stack of another thread,
+ // this should not be in a writeable area that could contain a
+ // stack lock allocated by that thread. As a consequence, a stack
+ // lock less than page size away from SP is guaranteed to be
+ // owned by the current thread.
+ //
+ // Note: assuming SP is aligned, we can check the low bits of
+ // (mark-SP) instead of the low bits of mark. In that case,
+ // assuming page size is a power of 2, we can merge the two
+ // conditions into a single test:
+ // => ((mark - SP) & (3 - os::pagesize())) == 0
+
+#ifdef AARCH64
+ // Use the single check since the immediate is OK for AARCH64
+ sub(R0, Rmark, Rstack_top);
+ intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
+ Assembler::LogicalImmediate imm(mask, false);
+ ands(R0, R0, imm);
+
+ // For recursive case store 0 into lock record.
+ // It is harmless to store it unconditionally as lock record contains some garbage
+ // value in its _displaced_header field by this moment.
+ str(ZR, Address(Rlock, mark_offset));
+
+#else // AARCH64
+ // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand.
+ // Check independently the low bits and the distance to SP.
+ // -1- test low 2 bits
+ movs(R0, AsmOperand(Rmark, lsl, 30));
+ // -2- test (mark - SP) if the low two bits are 0
+ sub(R0, Rmark, SP, eq);
+ movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq);
+ // If still 'eq' then recursive locking OK: store 0 into lock record
+ str(R0, Address(Rlock, mark_offset), eq);
+
+#endif // AARCH64
+
+#ifndef PRODUCT
+ if (PrintBiasedLockingStatistics) {
+ cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr());
+ }
+#endif // !PRODUCT
+
+ b(done, eq);
+
+ bind(slow_case);
+
+ // Call the runtime routine for slow case
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock);
+
+ bind(done);
+ }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and remove_activation.
+//
+// Argument: R1: Points to BasicObjectLock structure for lock
+// Throw an IllegalMonitorException if object is not locked by current thread
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM.
+void InterpreterMacroAssembler::unlock_object(Register Rlock) {
+ assert(Rlock == R1, "the second argument");
+
+ if (UseHeavyMonitors) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock);
+ } else {
+ Label done, slow_case;
+
+ const Register Robj = R2;
+ const Register Rmark = R3;
+ const Register Rresult = R0;
+ assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp);
+
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
+
+ const Register Rzero = zero_register(Rtemp);
+
+ // Load oop into Robj
+ ldr(Robj, Address(Rlock, obj_offset));
+
+ // Free entry
+ str(Rzero, Address(Rlock, obj_offset));
+
+ if (UseBiasedLocking) {
+ biased_locking_exit(Robj, Rmark, done);
+ }
+
+ // Load the old header from BasicLock structure
+ ldr(Rmark, Address(Rlock, mark_offset));
+
+ // Test for recursion (zero mark in BasicLock)
+ cbz(Rmark, done);
+
+ bool allow_fallthrough_on_failure = true;
+
+ cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure);
+
+ b(done, eq);
+
+ bind(slow_case);
+
+ // Call the runtime routine for slow case.
+ str(Robj, Address(Rlock, obj_offset)); // restore obj
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock);
+
+ bind(done);
+ }
+}
+
+
+// Test ImethodDataPtr. If it is null, continue at the specified label
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
+ cbz(mdp, zero_continue);
+}
+
+
+// Set the method data pointer for the current bcp.
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ Label set_mdp;
+
+ // Test MDO to avoid the call if it is NULL.
+ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
+ cbz(Rtemp, set_mdp);
+
+ mov(R0, Rmethod);
+ mov(R1, Rbcp);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1);
+ // R0/W0: mdi
+
+ // mdo is guaranteed to be non-zero here, we checked for it before the call.
+ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
+ add(Rtemp, Rtemp, in_bytes(MethodData::data_offset()));
+ add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0);
+
+ bind(set_mdp);
+ str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+ Label verify_continue;
+ save_caller_save_registers();
+
+ const Register Rmdp = R2;
+ test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue
+
+ // If the mdp is valid, it will point to a DataLayout header which is
+ // consistent with the bcp. The converse is highly probable also.
+
+ ldrh(R3, Address(Rmdp, DataLayout::bci_offset()));
+ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ add(R3, R3, Rtemp);
+ add(R3, R3, in_bytes(ConstMethod::codes_offset()));
+ cmp(R3, Rbcp);
+ b(verify_continue, eq);
+
+ mov(R0, Rmethod);
+ mov(R1, Rbcp);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp);
+
+ bind(verify_continue);
+ restore_caller_save_registers();
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert_different_registers(mdp_in, value);
+ str(value, Address(mdp_in, offset));
+}
+
+
+// Increments mdp data. Sets bumped_count register to adjusted counter.
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+ int offset,
+ Register bumped_count,
+ bool decrement) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+
+ // Counter address
+ Address data(mdp_in, offset);
+ assert_different_registers(mdp_in, bumped_count);
+
+ increment_mdp_data_at(data, bumped_count, decrement);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) {
+ assert_different_registers(mdp_in, Rtemp);
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range");
+
+ // Set the flag
+ ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset())));
+ orr(Rtemp, Rtemp, (unsigned)flag_byte_constant);
+ strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset())));
+}
+
+
+// Increments mdp data. Sets bumped_count register to adjusted counter.
+void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
+ Register bumped_count,
+ bool decrement) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+
+ ldr(bumped_count, data);
+ if (decrement) {
+ // Decrement the register. Set condition codes.
+ subs(bumped_count, bumped_count, DataLayout::counter_increment);
+ // Avoid overflow.
+#ifdef AARCH64
+ assert(DataLayout::counter_increment == 1, "required for cinc");
+ cinc(bumped_count, bumped_count, pl);
+#else
+ add(bumped_count, bumped_count, DataLayout::counter_increment, pl);
+#endif // AARCH64
+ } else {
+ // Increment the register. Set condition codes.
+ adds(bumped_count, bumped_count, DataLayout::counter_increment);
+ // Avoid overflow.
+#ifdef AARCH64
+ assert(DataLayout::counter_increment == 1, "required for cinv");
+ cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff
+#else
+ sub(bumped_count, bumped_count, DataLayout::counter_increment, mi);
+#endif // AARCH64
+ }
+ str(bumped_count, data);
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+ int offset,
+ Register value,
+ Register test_value_out,
+ Label& not_equal_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert_different_registers(mdp_in, test_value_out, value);
+
+ ldr(test_value_out, Address(mdp_in, offset));
+ cmp(test_value_out, value);
+
+ b(not_equal_continue, ne);
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert_different_registers(mdp_in, reg_temp);
+
+ ldr(reg_temp, Address(mdp_in, offset_of_disp));
+ add(mdp_in, mdp_in, reg_temp);
+ str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert_different_registers(mdp_in, reg_offset, reg_tmp);
+
+ ldr(reg_tmp, Address(mdp_in, reg_offset));
+ add(mdp_in, mdp_in, reg_tmp);
+ str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ add(mdp_in, mdp_in, constant);
+ str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp);
+
+ mov(R1, return_bci);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1);
+}
+
+
+// Sets mdp, bumped_count registers, blows Rtemp.
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) {
+ assert_different_registers(mdp, bumped_count);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ // Otherwise, assign to mdp
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the taken count.
+ increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp);
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+ assert_different_registers(mdp, Rtemp);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the not taken count.
+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp);
+
+ // The method data pointer needs to be updated to correspond to the next bytecode
+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+ assert_different_registers(mdp, Rtemp);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) {
+ assert_different_registers(mdp, receiver, Rtemp);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ Label skip_receiver_profile;
+ if (receiver_can_be_null) {
+ Label not_null;
+ cbnz(receiver, not_null);
+ // We are making a call. Increment the count for null receiver.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp);
+ b(skip_receiver_profile);
+ bind(not_null);
+ }
+
+ // Record the receiver type.
+ record_klass_in_profile(receiver, mdp, Rtemp, true);
+ bind(skip_receiver_profile);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+ bind(profile_continue);
+ }
+}
+
+
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+ Register receiver, Register mdp,
+ Register reg_tmp,
+ int start_row, Label& done, bool is_virtual_call) {
+ if (TypeProfileWidth == 0)
+ return;
+
+ assert_different_registers(receiver, mdp, reg_tmp);
+
+ int last_row = VirtualCallData::row_limit() - 1;
+ assert(start_row <= last_row, "must be work left to do");
+ // Test this row for both the receiver and for null.
+ // Take any of three different outcomes:
+ // 1. found receiver => increment count and goto done
+ // 2. found null => keep looking for case 1, maybe allocate this cell
+ // 3. found something else => keep looking for cases 1 and 2
+ // Case 3 is handled by a recursive call.
+ for (int row = start_row; row <= last_row; row++) {
+ Label next_test;
+
+ // See if the receiver is receiver[n].
+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+
+ test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test);
+
+ // The receiver is receiver[n]. Increment count[n].
+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+ increment_mdp_data_at(mdp, count_offset, reg_tmp);
+ b(done);
+
+ bind(next_test);
+ // reg_tmp now contains the receiver from the CallData.
+
+ if (row == start_row) {
+ Label found_null;
+ // Failed the equality check on receiver[n]... Test for null.
+ if (start_row == last_row) {
+ // The only thing left to do is handle the null case.
+ if (is_virtual_call) {
+ cbz(reg_tmp, found_null);
+ // Receiver did not match any saved receiver and there is no empty row for it.
+ // Increment total counter to indicate polymorphic case.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp);
+ b(done);
+ bind(found_null);
+ } else {
+ cbnz(reg_tmp, done);
+ }
+ break;
+ }
+ // Since null is rare, make it be the branch-taken case.
+ cbz(reg_tmp, found_null);
+
+ // Put all the "Case 3" tests here.
+ record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call);
+
+ // Found a null. Keep searching for a matching receiver,
+ // but remember that this is an empty (unused) slot.
+ bind(found_null);
+ }
+ }
+
+ // In the fall-through case, we found no matching receiver, but we
+ // observed the receiver[start_row] is NULL.
+
+ // Fill in the receiver field and increment the count.
+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+ set_mdp_data_at(mdp, recvr_offset, receiver);
+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+ mov(reg_tmp, DataLayout::counter_increment);
+ set_mdp_data_at(mdp, count_offset, reg_tmp);
+ if (start_row > 0) {
+ b(done);
+ }
+}
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+ Register mdp,
+ Register reg_tmp,
+ bool is_virtual_call) {
+ assert(ProfileInterpreter, "must be profiling");
+ assert_different_registers(receiver, mdp, reg_tmp);
+
+ Label done;
+
+ record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call);
+
+ bind (done);
+}
+
+// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) {
+ assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+ uint row;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the total ret count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp);
+
+ for (row = 0; row < RetData::row_limit(); row++) {
+ Label next_test;
+
+ // See if return_bci is equal to bci[n]:
+ test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci,
+ Rtemp, next_test);
+
+ // return_bci is equal to bci[n]. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp);
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp);
+ b(profile_continue);
+ bind(next_test);
+ }
+
+ update_mdp_for_ret(return_bci);
+
+ bind(profile_continue);
+ }
+}
+
+
+// Sets mdp.
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+ assert_different_registers(mdp, Rtemp);
+
+ if (ProfileInterpreter && TypeProfileCasts) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ int count_offset = in_bytes(CounterData::count_offset());
+ // Back up the address, since we have already bumped the mdp.
+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // *Decrement* the counter. We expect to see zero or small negatives.
+ increment_mdp_data_at(mdp, count_offset, Rtemp, true);
+
+ bind (profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass)
+{
+ assert_different_registers(mdp, klass, Rtemp);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // Record the object type.
+ record_klass_in_profile(klass, mdp, Rtemp, false);
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind(profile_continue);
+ }
+}
+
+
+// Sets mdp, blows Rtemp.
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+ assert_different_registers(mdp, Rtemp);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the default case count
+ increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp);
+
+ // The method data pointer needs to be updated.
+ update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp);
+
+ bind(profile_continue);
+ }
+}
+
+
+// Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2.
+void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) {
+ assert_different_registers(mdp, reg_tmp1, reg_tmp2);
+ assert_different_registers(mdp, reg_tmp1, index);
+
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ const int count_offset = in_bytes(MultiBranchData::case_array_offset()) +
+ in_bytes(MultiBranchData::relative_count_offset());
+
+ const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) +
+ in_bytes(MultiBranchData::relative_displacement_offset());
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Build the base (index * per_case_size_in_bytes())
+ logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size())));
+
+ // Update the case count
+ add(reg_tmp1, reg_tmp1, count_offset);
+ increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2);
+
+ // The method data pointer needs to be updated.
+ add(reg_tmp1, reg_tmp1, displacement_offset - count_offset);
+ update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2);
+
+ bind (profile_continue);
+ }
+}
+
+
+void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) {
+#ifdef AARCH64
+ rev_w(r, r);
+#else
+ if (VM_Version::supports_rev()) {
+ rev(r, r);
+ } else {
+ eor(rtmp1, r, AsmOperand(r, ror, 16));
+ mvn(rtmp2, 0x0000ff00);
+ andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8));
+ eor(r, rtmp1, AsmOperand(r, ror, 8));
+ }
+#endif // AARCH64
+}
+
+
+void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) {
+ const intx addr = (intx) (address_of_counter + offset);
+
+ assert ((addr & 0x3) == 0, "address of counter should be aligned");
+ const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12));
+
+ const address base = (address) (addr & ~offset_mask);
+ const int offs = (int) (addr & offset_mask);
+
+ const Register addr_base = tmp1;
+ const Register val = tmp2;
+
+ mov_slow(addr_base, base);
+ ldr_s32(val, Address(addr_base, offs));
+
+ if (avoid_overflow) {
+ adds_32(val, val, 1);
+#ifdef AARCH64
+ Label L;
+ b(L, mi);
+ str_32(val, Address(addr_base, offs));
+ bind(L);
+#else
+ str(val, Address(addr_base, offs), pl);
+#endif // AARCH64
+ } else {
+ add_32(val, val, 1);
+ str_32(val, Address(addr_base, offs));
+ }
+}
+
+void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) {
+ if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+// InterpreterRuntime::post_method_entry();
+// }
+// if (DTraceMethodProbes) {
+// SharedRuntime::dtrace_method_entry(method, receiver);
+// }
+// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
+// SharedRuntime::rc_trace_method_entry(method, receiver);
+// }
+
+void InterpreterMacroAssembler::notify_method_entry() {
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+ // track stack depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (can_post_interpreter_events()) {
+ Label L;
+
+ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
+ cbz(Rtemp, L);
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry));
+
+ bind(L);
+ }
+
+ // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry
+ if (DTraceMethodProbes) {
+ Label Lcontinue;
+
+ ldrb_global(Rtemp, (address)&DTraceMethodProbes);
+ cbz(Rtemp, Lcontinue);
+
+ mov(R0, Rthread);
+ mov(R1, Rmethod);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1);
+
+ bind(Lcontinue);
+ }
+ // RedefineClasses() tracing support for obsolete method entry
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
+ mov(R0, Rthread);
+ mov(R1, Rmethod);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+ R0, R1);
+ }
+}
+
+
+void InterpreterMacroAssembler::notify_method_exit(
+ TosState state, NotifyMethodExitMode mode,
+ bool native, Register result_lo, Register result_hi, FloatRegister result_fp) {
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+ // track stack depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (mode == NotifyJVMTI && can_post_interpreter_events()) {
+ Label L;
+ // Note: frame::interpreter_frame_result has a dependency on how the
+ // method result is saved across the call to post_method_exit. If this
+ // is changed then the interpreter_frame_result implementation will
+ // need to be updated too.
+
+ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
+ cbz(Rtemp, L);
+
+ if (native) {
+ // For c++ and template interpreter push both result registers on the
+ // stack in native, we don't know the state.
+ // On AArch64 result registers are stored into the frame at known locations.
+ // See frame::interpreter_frame_result for code that gets the result values from here.
+ assert(result_lo != noreg, "result registers should be defined");
+
+#ifdef AARCH64
+ assert(result_hi == noreg, "result_hi is not used on AArch64");
+ assert(result_fp != fnoreg, "FP result register must be defined");
+
+ str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize));
+ str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize));
+#else
+ assert(result_hi != noreg, "result registers should be defined");
+
+#ifdef __ABI_HARD__
+ assert(result_fp != fnoreg, "FP result register must be defined");
+ sub(SP, SP, 2 * wordSize);
+ fstd(result_fp, Address(SP));
+#endif // __ABI_HARD__
+
+ push(RegisterSet(result_lo) | RegisterSet(result_hi));
+#endif // AARCH64
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+
+#ifdef AARCH64
+ ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize));
+ ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize));
+#else
+ pop(RegisterSet(result_lo) | RegisterSet(result_hi));
+#ifdef __ABI_HARD__
+ fldd(result_fp, Address(SP));
+ add(SP, SP, 2 * wordSize);
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ } else {
+ // For the template interpreter, the value on tos is the size of the
+ // state. (c++ interpreter calls jvmti somewhere else).
+ push(state);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+ pop(state);
+ }
+
+ bind(L);
+ }
+
+ // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit
+ if (DTraceMethodProbes) {
+ Label Lcontinue;
+
+ ldrb_global(Rtemp, (address)&DTraceMethodProbes);
+ cbz(Rtemp, Lcontinue);
+
+ push(state);
+
+ mov(R0, Rthread);
+ mov(R1, Rmethod);
+
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1);
+
+ pop(state);
+
+ bind(Lcontinue);
+ }
+}
+
+
+#ifndef PRODUCT
+
+void InterpreterMacroAssembler::trace_state(const char* msg) {
+ int push_size = save_caller_save_registers();
+
+ Label Lcontinue;
+ InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n");
+ InlinedString Lmsg(msg);
+ InlinedAddress Lprintf((address)printf);
+
+ ldr_literal(R0, Lmsg0);
+ ldr_literal(R1, Lmsg);
+ mov(R2, FP);
+ add(R3, SP, push_size); // original SP (without saved registers)
+ ldr_literal(Rtemp, Lprintf);
+ call(Rtemp);
+
+ b(Lcontinue);
+
+ bind_literal(Lmsg0);
+ bind_literal(Lmsg);
+ bind_literal(Lprintf);
+
+
+ bind(Lcontinue);
+
+ restore_caller_save_registers();
+}
+
+#endif
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+ int increment, Address mask_addr,
+ Register scratch, Register scratch2,
+ AsmCondition cond, Label* where) {
+ // caution: scratch2 and base address of counter_addr can be the same
+ assert_different_registers(scratch, scratch2);
+ ldr_u32(scratch, counter_addr);
+ add(scratch, scratch, increment);
+ str_32(scratch, counter_addr);
+
+#ifdef AARCH64
+ ldr_u32(scratch2, mask_addr);
+ ands_w(ZR, scratch, scratch2);
+#else
+ ldr(scratch2, mask_addr);
+ andrs(scratch, scratch, scratch2);
+#endif // AARCH64
+ b(*where, cond);
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+ Register Rcounters,
+ Label& skip) {
+ const Address method_counters(method, Method::method_counters_offset());
+ Label has_counters;
+
+ ldr(Rcounters, method_counters);
+ cbnz(Rcounters, has_counters);
+
+#ifdef AARCH64
+ const Register tmp = Rcounters;
+ const int saved_regs_size = 20*wordSize;
+
+ // Note: call_VM will cut SP according to Rstack_top value before call, and restore SP to
+ // extended_sp value from frame after the call.
+ // So make sure there is enough stack space to save registers and adjust Rstack_top accordingly.
+ {
+ Label enough_stack_space;
+ check_extended_sp(tmp);
+ sub(Rstack_top, Rstack_top, saved_regs_size);
+ cmp(SP, Rstack_top);
+ b(enough_stack_space, ls);
+
+ align_reg(tmp, Rstack_top, StackAlignmentInBytes);
+ mov(SP, tmp);
+ str(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
+
+ bind(enough_stack_space);
+ check_stack_top();
+
+ int offset = 0;
+ stp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize;
+ stp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize;
+ assert (offset == saved_regs_size, "should be");
+ }
+#else
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14));
+#endif // AARCH64
+
+ mov(R1, method);
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::build_method_counters), R1);
+
+#ifdef AARCH64
+ {
+ int offset = 0;
+ ldp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize;
+ ldp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize;
+ assert (offset == saved_regs_size, "should be");
+
+ add(Rstack_top, Rstack_top, saved_regs_size);
+ }
+#else
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14));
+#endif // AARCH64
+
+ ldr(Rcounters, method_counters);
+ cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory
+
+ bind(has_counters);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/interp_masm_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_INTERP_MASM_ARM_HPP
+#define CPU_ARM_VM_INTERP_MASM_ARM_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
+#include "prims/jvmtiExport.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+
+ public:
+
+ // allow JvmtiExport checks to be extended
+ bool can_force_early_return() { return JvmtiExport::can_force_early_return(); }
+ bool can_post_interpreter_events() { return JvmtiExport::can_post_interpreter_events(); }
+ bool can_pop_frame() { return JvmtiExport::can_pop_frame(); }
+ bool can_post_breakpoint() { return JvmtiExport::can_post_breakpoint(); }
+ bool can_post_field_access() { return JvmtiExport::can_post_field_access(); }
+ bool can_post_field_modification() { return JvmtiExport::can_post_field_modification(); }
+ // flags controlled by JVMTI settings
+ bool rewrite_frequent_pairs() { return RewriteFrequentPairs; }
+
+ protected:
+
+ // Template interpreter specific version of call_VM_helper
+ virtual void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
+
+ virtual void check_and_handle_popframe();
+ virtual void check_and_handle_earlyret();
+
+ // base routine for all dispatches
+ typedef enum { DispatchDefault, DispatchNormal } DispatchTableMode;
+ void dispatch_base(TosState state, DispatchTableMode table_mode, bool verifyoop = true);
+
+ public:
+ InterpreterMacroAssembler(CodeBuffer* code);
+
+ // Interpreter-specific registers
+#if defined(AARCH64) && defined(ASSERT)
+
+#define check_stack_top() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__))
+#define check_stack_top_on_expansion() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__), VerifyInterpreterStackTop)
+#define check_extended_sp(tmp) _check_extended_sp(tmp, "SP does not match extended SP in frame at " __FILE__ ":" XSTR(__LINE__))
+#define check_no_cached_stack_top(tmp) _check_no_cached_stack_top(tmp, "stack_top is already cached in frame at " __FILE__ ":" XSTR(__LINE__))
+
+ void _check_stack_top(const char* msg, bool enabled = true) {
+ if (enabled) {
+ Label L;
+ cmp(SP, Rstack_top);
+ b(L, ls);
+ stop(msg);
+ bind(L);
+ }
+ }
+
+ void _check_extended_sp(Register tmp, const char* msg) {
+ Label L;
+ ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
+ cmp(SP, tmp);
+ b(L, eq);
+ stop(msg);
+ bind(L);
+ }
+
+ void _check_no_cached_stack_top(Register tmp, const char* msg) {
+ Label L;
+ ldr(tmp, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
+ cbz(tmp, L);
+ stop(msg);
+ bind(L);
+ }
+
+#else
+
+ inline void check_stack_top() {}
+ inline void check_stack_top_on_expansion() {}
+ inline void check_extended_sp(Register tmp) {}
+ inline void check_no_cached_stack_top(Register tmp) {}
+
+#endif // AARCH64 && ASSERT
+
+ void save_bcp() { str(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); }
+ void restore_bcp() { ldr(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); }
+ void restore_locals() { ldr(Rlocals, Address(FP, frame::interpreter_frame_locals_offset * wordSize)); }
+ void restore_method() { ldr(Rmethod, Address(FP, frame::interpreter_frame_method_offset * wordSize)); }
+ void restore_dispatch();
+
+#ifdef AARCH64
+ void save_stack_top() { check_stack_top(); str(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); }
+ void clear_cached_stack_top() { str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); }
+ void restore_stack_top() { ldr(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); clear_cached_stack_top(); check_stack_top(); }
+ void cut_sp_before_call() { align_reg(SP, Rstack_top, StackAlignmentInBytes); }
+ void restore_sp_after_call(Register tmp) { ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); mov(SP, tmp); }
+#endif
+
+ // Helpers for runtime call arguments/results
+ void get_const(Register reg) { ldr(reg, Address(Rmethod, Method::const_offset())); }
+ void get_constant_pool(Register reg) { get_const(reg); ldr(reg, Address(reg, ConstMethod::constants_offset())); }
+ void get_constant_pool_cache(Register reg) { get_constant_pool(reg); ldr(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); }
+ void get_cpool_and_tags(Register cpool, Register tags) { get_constant_pool(cpool); ldr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); }
+
+ // Sets reg. Blows Rtemp.
+ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+
+ // Sets index. Blows reg_tmp.
+ void get_index_at_bcp(Register index, int bcp_offset, Register reg_tmp, size_t index_size = sizeof(u2));
+ // Sets cache, index.
+ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+ // Sets cache. Blows reg_tmp.
+ void get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size = sizeof(u2));
+
+ // Load object from cpool->resolved_references(*bcp+1)
+ void load_resolved_reference_at_index(Register result, Register tmp);
+
+ void store_check_part1(Register card_table_base); // Sets card_table_base register.
+ void store_check_part2(Register obj, Register card_table_base, Register tmp);
+
+ void set_card(Register card_table_base, Address card_table_addr, Register tmp);
+
+#if INCLUDE_ALL_GCS
+ // G1 pre-barrier.
+ // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+ // If store_addr != noreg, then previous value is loaded from [store_addr];
+ // in such case store_addr and new_val registers are preserved;
+ // otherwise pre_val register is preserved.
+ void g1_write_barrier_pre(Register store_addr,
+ Register new_val,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2);
+
+ // G1 post-barrier.
+ // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+ void g1_write_barrier_post(Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3);
+#endif // INCLUDE_ALL_GCS
+
+ void pop_ptr(Register r);
+ void pop_i(Register r = R0_tos);
+#ifdef AARCH64
+ void pop_l(Register r = R0_tos);
+#else
+ void pop_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi);
+#endif
+ void pop_f(FloatRegister fd);
+ void pop_d(FloatRegister fd);
+
+ void push_ptr(Register r);
+ void push_i(Register r = R0_tos);
+#ifdef AARCH64
+ void push_l(Register r = R0_tos);
+#else
+ void push_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi);
+#endif
+ void push_f();
+ void push_d();
+
+ // Transition vtos -> state. Blows R0, R1. Sets TOS cached value.
+ void pop(TosState state);
+ // Transition state -> vtos. Blows Rtemp.
+ void push(TosState state);
+
+#ifndef AARCH64
+ // The following methods are overridden to allow overloaded calls to
+ // MacroAssembler::push/pop(Register)
+ // MacroAssembler::push/pop(RegisterSet)
+ // InterpreterMacroAssembler::push/pop(TosState)
+ void push(Register rd, AsmCondition cond = al) { MacroAssembler::push(rd, cond); }
+ void pop(Register rd, AsmCondition cond = al) { MacroAssembler::pop(rd, cond); }
+
+ void push(RegisterSet reg_set, AsmCondition cond = al) { MacroAssembler::push(reg_set, cond); }
+ void pop(RegisterSet reg_set, AsmCondition cond = al) { MacroAssembler::pop(reg_set, cond); }
+
+ // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value.
+ void convert_retval_to_tos(TosState state);
+ // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions).
+ void convert_tos_to_retval(TosState state);
+#endif
+
+ // JVMTI ForceEarlyReturn support
+ void load_earlyret_value(TosState state);
+
+ void jump_to_entry(address entry);
+
+ // Blows Rtemp.
+ void empty_expression_stack() {
+ ldr(Rstack_top, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ check_stack_top();
+#ifdef AARCH64
+ clear_cached_stack_top();
+#else
+ // NULL last_sp until next java call
+ str(zero_register(Rtemp), Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+ }
+
+ // Helpers for swap and dup
+ void load_ptr(int n, Register val);
+ void store_ptr(int n, Register val);
+
+ // Generate a subtype check: branch to not_subtype if sub_klass is
+ // not a subtype of super_klass.
+ // Profiling code for the subtype check failure (profile_typecheck_failed)
+ // should be explicitly generated by the caller in the not_subtype case.
+ // Blows Rtemp, tmp1, tmp2.
+ void gen_subtype_check(Register Rsub_klass, Register Rsuper_klass,
+ Label ¬_subtype, Register tmp1, Register tmp2);
+
+ // Dispatching
+ void dispatch_prolog(TosState state, int step = 0);
+ void dispatch_epilog(TosState state, int step = 0);
+ void dispatch_only(TosState state); // dispatch by R3_bytecode
+ void dispatch_only_normal(TosState state); // dispatch normal table by R3_bytecode
+ void dispatch_only_noverify(TosState state);
+ void dispatch_next(TosState state, int step = 0); // load R3_bytecode from [Rbcp + step] and dispatch by R3_bytecode
+
+ // jump to an invoked target
+ void prepare_to_jump_from_interpreted();
+ void jump_from_interpreted(Register method);
+
+ void narrow(Register result);
+
+ // Returning from interpreted functions
+ //
+ // Removes the current activation (incl. unlocking of monitors)
+ // and sets up the return address. This code is also used for
+ // exception unwindwing. In that case, we do not want to throw
+ // IllegalMonitorStateExceptions, since that might get us into an
+ // infinite rethrow exception loop.
+ // Additionally this code is used for popFrame and earlyReturn.
+ // In popFrame case we want to skip throwing an exception,
+ // installing an exception, and notifying jvmdi.
+ // In earlyReturn case we only want to skip throwing an exception
+ // and installing an exception.
+ void remove_activation(TosState state, Register ret_addr,
+ bool throw_monitor_exception = true,
+ bool install_monitor_exception = true,
+ bool notify_jvmdi = true);
+
+ // At certain points in the method invocation the monitor of
+ // synchronized methods hasn't been entered yet.
+ // To correctly handle exceptions at these points, we set the thread local
+ // variable _do_not_unlock_if_synchronized to true. The remove_activation will
+ // check this flag.
+ void set_do_not_unlock_if_synchronized(bool flag, Register tmp);
+
+ // Debugging
+ void interp_verify_oop(Register reg, TosState state, const char* file, int line); // only if +VerifyOops && state == atos
+
+ void verify_FPU(int stack_depth, TosState state = ftos) {
+ // No VFP state verification is required for ARM
+ }
+
+ // Object locking
+ void lock_object (Register lock_reg);
+ void unlock_object(Register lock_reg);
+
+ // Interpreter profiling operations
+ void set_method_data_pointer_for_bcp(); // Blows R0-R3/R0-R18, Rtemp, LR
+ void test_method_data_pointer(Register mdp, Label& zero_continue);
+ void verify_method_data_pointer();
+
+ void set_mdp_data_at(Register mdp_in, int offset, Register value);
+
+ // Increments mdp data. Sets bumped_count register to adjusted counter.
+ void increment_mdp_data_at(Address data, Register bumped_count, bool decrement = false);
+ // Increments mdp data. Sets bumped_count register to adjusted counter.
+ void increment_mdp_data_at(Register mdp_in, int offset, Register bumped_count, bool decrement = false);
+ void increment_mask_and_jump(Address counter_addr,
+ int increment, Address mask_addr,
+ Register scratch, Register scratch2,
+ AsmCondition cond, Label* where);
+ void set_mdp_flag_at(Register mdp_in, int flag_constant);
+
+ void test_mdp_data_at(Register mdp_in, int offset, Register value,
+ Register test_value_out,
+ Label& not_equal_continue);
+
+ void record_klass_in_profile(Register receiver, Register mdp,
+ Register reg_tmp, bool is_virtual_call);
+ void record_klass_in_profile_helper(Register receiver, Register mdp,
+ Register reg_tmp,
+ int start_row, Label& done, bool is_virtual_call);
+
+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset, Register reg_tmp);
+ void update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp);
+ void update_mdp_by_constant(Register mdp_in, int constant);
+ void update_mdp_for_ret(Register return_bci); // Blows R0-R3/R0-R18, Rtemp, LR
+
+ void profile_taken_branch(Register mdp, Register bumped_count); // Sets mdp, bumped_count registers, blows Rtemp.
+ void profile_not_taken_branch(Register mdp); // Sets mdp, blows Rtemp.
+
+ void profile_call(Register mdp); // Sets mdp, blows Rtemp.
+ void profile_final_call(Register mdp); // Sets mdp, blows Rtemp.
+ void profile_virtual_call(Register mdp, Register receiver, // Sets mdp, blows Rtemp.
+ bool receiver_can_be_null = false);
+ void profile_ret(Register mdp, Register return_bci); // Sets mdp, blows R0-R3/R0-R18, Rtemp, LR
+ void profile_null_seen(Register mdp); // Sets mdp.
+ void profile_typecheck(Register mdp, Register klass); // Sets mdp, blows Rtemp.
+
+ void profile_typecheck_failed(Register mdp); // Sets mdp, blows Rtemp.
+ void profile_switch_default(Register mdp); // Sets mdp, blows Rtemp.
+
+ // Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2.
+ void profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2);
+
+ void byteswap_u32(Register r, Register rtmp1, Register rtmp2);
+
+ void inc_global_counter(address address_of_counter, int offset_in_bytes, Register tmp1, Register tmp2, bool avoid_overflow);
+
+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+ // support for jvmti
+ void notify_method_entry();
+ void notify_method_exit(TosState state, NotifyMethodExitMode mode,
+ bool native = false, Register result_lo = noreg, Register result_hi = noreg, FloatRegister result_fp = fnoreg);
+
+ void trace_state(const char* msg) PRODUCT_RETURN;
+
+ void get_method_counters(Register method, Register Rcounters, Label& skip);
+};
+
+#endif // CPU_ARM_VM_INTERP_MASM_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/interpreterRT_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+#ifdef SHARING_FAST_NATIVE_FINGERPRINTS
+// mapping from SignatureIterator param to (common) type of parsing
+static const u1 shared_type[] = {
+ (u1) SignatureIterator::int_parm, // bool
+ (u1) SignatureIterator::int_parm, // byte
+ (u1) SignatureIterator::int_parm, // char
+ (u1) SignatureIterator::int_parm, // short
+ (u1) SignatureIterator::int_parm, // int
+ (u1) SignatureIterator::long_parm, // long
+#ifndef __ABI_HARD__
+ (u1) SignatureIterator::int_parm, // float, passed as int
+ (u1) SignatureIterator::long_parm, // double, passed as long
+#else
+ (u1) SignatureIterator::float_parm, // float
+ (u1) SignatureIterator::double_parm, // double
+#endif
+ (u1) SignatureIterator::obj_parm, // obj
+ (u1) SignatureIterator::done_parm // done
+};
+
+uint64_t InterpreterRuntime::normalize_fast_native_fingerprint(uint64_t fingerprint) {
+ if (fingerprint == UCONST64(-1)) {
+ // special signature used when the argument list cannot be encoded in a 64 bits value
+ return fingerprint;
+ }
+ int shift = SignatureIterator::static_feature_size;
+ uint64_t result = fingerprint & ((1 << shift) - 1);
+ fingerprint >>= shift;
+
+ BasicType ret_type = (BasicType) (fingerprint & SignatureIterator::result_feature_mask);
+ // For ARM, the fast signature handler only needs to know whether
+ // the return value must be unboxed. T_OBJECT and T_ARRAY need not
+ // be distinguished from each other and all other return values
+ // behave like integers with respect to the handler.
+ bool unbox = (ret_type == T_OBJECT) || (ret_type == T_ARRAY);
+ if (unbox) {
+ ret_type = T_OBJECT;
+ } else {
+ ret_type = T_INT;
+ }
+ result |= ((uint64_t) ret_type) << shift;
+ shift += SignatureIterator::result_feature_size;
+ fingerprint >>= SignatureIterator::result_feature_size;
+
+ while (true) {
+ uint32_t type = (uint32_t) (fingerprint & SignatureIterator::parameter_feature_mask);
+ if (type == SignatureIterator::done_parm) {
+ result |= ((uint64_t) SignatureIterator::done_parm) << shift;
+ return result;
+ }
+ assert((type >= SignatureIterator::bool_parm) && (type <= SignatureIterator::obj_parm), "check fingerprint encoding");
+ int shared = shared_type[type - SignatureIterator::bool_parm];
+ result |= ((uint64_t) shared) << shift;
+ shift += SignatureIterator::parameter_feature_size;
+ fingerprint >>= SignatureIterator::parameter_feature_size;
+ }
+}
+#endif // SHARING_FAST_NATIVE_FINGERPRINTS
+
+// Implementation of SignatureHandlerGenerator
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+ if (_ireg < GPR_PARAMS) {
+ Register dst = as_Register(_ireg);
+ __ ldr_s32(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ _ireg++;
+ } else {
+ __ ldr_s32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str_32(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+#ifdef AARCH64
+ if (_ireg < GPR_PARAMS) {
+ Register dst = as_Register(_ireg);
+ __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
+ _ireg++;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#else
+ if (_ireg <= 2) {
+#if (ALIGN_WIDE_ARGUMENTS == 1)
+ if ((_ireg & 1) != 0) {
+ // 64-bit values should be 8-byte aligned
+ _ireg++;
+ }
+#endif
+ Register dst1 = as_Register(_ireg);
+ Register dst2 = as_Register(_ireg+1);
+ __ ldr(dst1, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
+ __ ldr(dst2, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ _ireg += 2;
+#if (ALIGN_WIDE_ARGUMENTS == 0)
+ } else if (_ireg == 3) {
+ // uses R3 + one stack slot
+ Register dst1 = as_Register(_ireg);
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ ldr(dst1, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _ireg += 1;
+ _abi_offset += 1;
+#endif
+ } else {
+#if (ALIGN_WIDE_ARGUMENTS == 1)
+ if(_abi_offset & 1) _abi_offset++;
+#endif
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
+ __ str(Rtemp, Address(SP, (_abi_offset) * wordSize));
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str(Rtemp, Address(SP, (_abi_offset+1) * wordSize));
+ _abi_offset += 2;
+ _ireg = 4;
+ }
+#endif // AARCH64
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+#ifdef AARCH64
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ cmp(Rtemp, 0);
+ __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset()));
+ if (_ireg < GPR_PARAMS) {
+ Register dst = as_Register(_ireg);
+ __ csel(dst, ZR, Rtemp, eq);
+ _ireg++;
+ } else {
+ __ csel(Rtemp, ZR, Rtemp, eq);
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#else
+ if (_ireg < 4) {
+ Register dst = as_Register(_ireg);
+ __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ cmp(dst, 0);
+ __ sub(dst, Rlocals, -Interpreter::local_offset_in_bytes(offset()), ne);
+ _ireg++;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ cmp(Rtemp, 0);
+ __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset()), ne);
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#endif // AARCH64
+}
+
+#ifndef __ABI_HARD__
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+ if (_ireg < 4) {
+ Register dst = as_Register(_ireg);
+ __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ _ireg++;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+}
+
+#else
+#ifndef __SOFTFP__
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+#ifdef AARCH64
+ if (_freg < FPR_PARAMS) {
+ FloatRegister dst = as_FloatRegister(_freg);
+ __ ldr_s(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ _freg++;
+ } else {
+ __ ldr_u32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str_32(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#else
+ if((_fp_slot < 16) || (_single_fpr_slot & 1)) {
+ if ((_single_fpr_slot & 1) == 0) {
+ _single_fpr_slot = _fp_slot;
+ _fp_slot += 2;
+ }
+ __ flds(as_FloatRegister(_single_fpr_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ _single_fpr_slot++;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#endif // AARCH64
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+#ifdef AARCH64
+ if (_freg < FPR_PARAMS) {
+ FloatRegister dst = as_FloatRegister(_freg);
+ __ ldr_d(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
+ _freg++;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
+ __ str(Rtemp, Address(SP, _abi_offset * wordSize));
+ _abi_offset++;
+ }
+#else
+ if(_fp_slot <= 14) {
+ __ fldd(as_FloatRegister(_fp_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
+ _fp_slot += 2;
+ } else {
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
+ __ str(Rtemp, Address(SP, (_abi_offset) * wordSize));
+ __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
+ __ str(Rtemp, Address(SP, (_abi_offset+1) * wordSize));
+ _abi_offset += 2;
+ _single_fpr_slot = 16;
+ }
+#endif // AARCH64
+}
+#endif // __SOFTFP__
+#endif // __ABI_HARD__
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+ iterate(fingerprint);
+
+ BasicType result_type = SignatureIterator::return_type(fingerprint);
+
+ address result_handler = Interpreter::result_handler(result_type);
+
+#ifdef AARCH64
+ __ mov_slow(R0, (address)result_handler);
+#else
+ // Check that result handlers are not real handler on ARM (0 or -1).
+ // This ensures the signature handlers do not need symbolic information.
+ assert((result_handler == NULL)||(result_handler==(address)0xffffffff),"");
+ __ mov_slow(R0, (intptr_t)result_handler);
+#endif
+
+ __ ret();
+}
+
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+class SlowSignatureHandler: public NativeSignatureIterator {
+ private:
+ address _from;
+ intptr_t* _to;
+
+#ifndef __ABI_HARD__
+ virtual void pass_int() {
+ *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ _from -= Interpreter::stackElementSize;
+ }
+
+ virtual void pass_float() {
+ *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ _from -= Interpreter::stackElementSize;
+ }
+
+ virtual void pass_long() {
+#if (ALIGN_WIDE_ARGUMENTS == 1)
+ if (((intptr_t)_to & 7) != 0) {
+ // 64-bit values should be 8-byte aligned
+ _to++;
+ }
+#endif
+ _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
+ _to += 2;
+ _from -= 2*Interpreter::stackElementSize;
+ }
+
+ virtual void pass_object() {
+ intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
+ *_to++ = (*(intptr_t*)from_addr == 0) ? (intptr_t)NULL : from_addr;
+ _from -= Interpreter::stackElementSize;
+ }
+
+#else
+
+ intptr_t* _toFP;
+ intptr_t* _toGP;
+ int _last_gp;
+ int _last_fp;
+#ifndef AARCH64
+ int _last_single_fp;
+#endif // !AARCH64
+
+ virtual void pass_int() {
+ if(_last_gp < GPR_PARAMS) {
+ _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ } else {
+ *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ }
+ _from -= Interpreter::stackElementSize;
+ }
+
+ virtual void pass_long() {
+#ifdef AARCH64
+ if(_last_gp < GPR_PARAMS) {
+ _toGP[_last_gp++] = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1));
+ } else {
+ *_to++ = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1));
+ }
+#else
+ assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
+ if (_last_gp <= 2) {
+ if(_last_gp & 1) _last_gp++;
+ _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(1));
+ _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ } else {
+ if (((intptr_t)_to & 7) != 0) {
+ // 64-bit values should be 8-byte aligned
+ _to++;
+ }
+ _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
+ _to += 2;
+ _last_gp = 4;
+ }
+#endif // AARCH64
+ _from -= 2*Interpreter::stackElementSize;
+ }
+
+ virtual void pass_object() {
+ intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
+ if(_last_gp < GPR_PARAMS) {
+ _toGP[_last_gp++] = (*(intptr_t*)from_addr == 0) ? NULL : from_addr;
+ } else {
+ *_to++ = (*(intptr_t*)from_addr == 0) ? NULL : from_addr;
+ }
+ _from -= Interpreter::stackElementSize;
+ }
+
+ virtual void pass_float() {
+#ifdef AARCH64
+ if(_last_fp < FPR_PARAMS) {
+ _toFP[_last_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ } else {
+ *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ }
+#else
+ if((_last_fp < 16) || (_last_single_fp & 1)) {
+ if ((_last_single_fp & 1) == 0) {
+ _last_single_fp = _last_fp;
+ _last_fp += 2;
+ }
+
+ _toFP[_last_single_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ } else {
+ *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+ }
+#endif // AARCH64
+ _from -= Interpreter::stackElementSize;
+ }
+
+ virtual void pass_double() {
+#ifdef AARCH64
+ if(_last_fp < FPR_PARAMS) {
+ _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ } else {
+ *_to++ = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ }
+#else
+ assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
+ if(_last_fp <= 14) {
+ _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
+ } else {
+ if (((intptr_t)_to & 7) != 0) { // 64-bit values should be 8-byte aligned
+ _to++;
+ }
+ _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+ _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
+ _to += 2;
+ _last_single_fp = 16;
+ }
+#endif // AARCH64
+ _from -= 2*Interpreter::stackElementSize;
+ }
+
+#endif // !__ABI_HARD__
+
+ public:
+ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) :
+ NativeSignatureIterator(method) {
+ _from = from;
+
+#ifdef __ABI_HARD__
+ _toGP = to;
+ _toFP = _toGP + GPR_PARAMS;
+ _to = _toFP + AARCH64_ONLY(FPR_PARAMS) NOT_AARCH64(8*2);
+ _last_gp = (is_static() ? 2 : 1);
+ _last_fp = 0;
+#ifndef AARCH64
+ _last_single_fp = 0;
+#endif // !AARCH64
+#else
+ _to = to + (is_static() ? 2 : 1);
+#endif // __ABI_HARD__
+ }
+};
+
+IRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* thread, Method* method, intptr_t* from, intptr_t* to))
+ methodHandle m(thread, (Method*)method);
+ assert(m->is_native(), "sanity check");
+ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
+ return Interpreter::result_handler(m->result_type());
+IRT_END
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/interpreterRT_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_INTERPRETERRT_ARM_HPP
+#define CPU_ARM_VM_INTERPRETERRT_ARM_HPP
+
+#include "memory/allocation.hpp"
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+ MacroAssembler* _masm;
+ int _abi_offset;
+ int _ireg;
+
+#ifdef __ABI_HARD__
+#ifdef AARCH64
+ int _freg;
+#else
+ int _fp_slot; // number of FPR's with arguments loaded
+ int _single_fpr_slot;
+#endif
+#endif
+
+ void move(int from_offset, int to_offset);
+ void box(int from_offset, int to_offset);
+
+ void pass_int();
+ void pass_long();
+ void pass_float();
+ void pass_object();
+#ifdef __ABI_HARD__
+ void pass_double();
+#endif
+ public:
+ // Creation
+ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+ _masm = new MacroAssembler(buffer);
+ _abi_offset = 0;
+ _ireg = is_static() ? 2 : 1;
+#ifdef __ABI_HARD__
+#ifdef AARCH64
+ _freg = 0;
+#else
+ _fp_slot = 0;
+ _single_fpr_slot = 0;
+#endif
+#endif
+ }
+
+ // Code generation
+ void generate(uint64_t fingerprint);
+
+};
+
+#ifndef AARCH64
+// ARM provides a normalized fingerprint for native calls (to increase
+// sharing). See normalize_fast_native_fingerprint
+#define SHARING_FAST_NATIVE_FINGERPRINTS
+#endif
+
+#endif // CPU_ARM_VM_INTERPRETERRT_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/javaFrameAnchor_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP
+#define CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP
+
+private:
+
+ // FP value associated with _last_Java_sp:
+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
+
+public:
+ // Each arch must define reset, save, restore
+ // These are used by objects that only care about:
+ // 1 - initializing a new state (thread creation, javaCalls)
+ // 2 - saving a current state (javaCalls)
+ // 3 - restoring an old state (javaCalls)
+
+ void clear(void) {
+ // clearing _last_Java_sp must be first
+ _last_Java_sp = NULL;
+ // fence?
+ _last_Java_fp = NULL;
+ _last_Java_pc = NULL;
+ }
+
+ void copy(JavaFrameAnchor* src) {
+ // In order to make sure the transition state is valid for "this"
+ // We must clear _last_Java_sp before copying the rest of the new data
+ //
+ // Hack Alert: Temporary bugfix for 4717480/4721647
+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+ // unless the value is changing
+ //
+ if (_last_Java_sp != src->_last_Java_sp)
+ _last_Java_sp = NULL;
+
+ _last_Java_fp = src->_last_Java_fp;
+ _last_Java_pc = src->_last_Java_pc;
+ // Must be last so profiler will always see valid frame if has_last_frame() is true
+ _last_Java_sp = src->_last_Java_sp;
+ }
+
+ // Always walkable
+ bool walkable(void) { return true; }
+ // Never any thing to do since we are always walkable and can find address of return addresses
+ void make_walkable(JavaThread* thread) { }
+
+ intptr_t* last_Java_sp(void) const { return _last_Java_sp; }
+
+ address last_Java_pc(void) { return _last_Java_pc; }
+
+private:
+
+ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; }
+
+ intptr_t* last_Java_fp(void) { return _last_Java_fp; }
+ // Assert (last_Java_sp == NULL || fp == NULL)
+ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; }
+
+#endif // CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/jniFastGetField_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define BUFFER_SIZE 96
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+ const char* name = NULL;
+ address slow_case_addr = NULL;
+ switch (type) {
+ case T_BOOLEAN:
+ name = "jni_fast_GetBooleanField";
+ slow_case_addr = jni_GetBooleanField_addr();
+ break;
+ case T_BYTE:
+ name = "jni_fast_GetByteField";
+ slow_case_addr = jni_GetByteField_addr();
+ break;
+ case T_CHAR:
+ name = "jni_fast_GetCharField";
+ slow_case_addr = jni_GetCharField_addr();
+ break;
+ case T_SHORT:
+ name = "jni_fast_GetShortField";
+ slow_case_addr = jni_GetShortField_addr();
+ break;
+ case T_INT:
+ name = "jni_fast_GetIntField";
+ slow_case_addr = jni_GetIntField_addr();
+ break;
+ case T_LONG:
+ name = "jni_fast_GetLongField";
+ slow_case_addr = jni_GetLongField_addr();
+ break;
+ case T_FLOAT:
+ name = "jni_fast_GetFloatField";
+ slow_case_addr = jni_GetFloatField_addr();
+ break;
+ case T_DOUBLE:
+ name = "jni_fast_GetDoubleField";
+ slow_case_addr = jni_GetDoubleField_addr();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ // R0 - jni env
+ // R1 - object handle
+ // R2 - jfieldID
+
+ const Register Rsafepoint_counter_addr = AARCH64_ONLY(R4) NOT_AARCH64(R3);
+ const Register Robj = AARCH64_ONLY(R5) NOT_AARCH64(R1);
+ const Register Rres = AARCH64_ONLY(R6) NOT_AARCH64(R0);
+#ifndef AARCH64
+ const Register Rres_hi = R1;
+#endif // !AARCH64
+ const Register Rsafept_cnt = Rtemp;
+ const Register Rsafept_cnt2 = Rsafepoint_counter_addr;
+ const Register Rtmp1 = AARCH64_ONLY(R7) NOT_AARCH64(R3); // same as Rsafepoint_counter_addr on 32-bit ARM
+ const Register Rtmp2 = AARCH64_ONLY(R8) NOT_AARCH64(R2); // same as jfieldID on 32-bit ARM
+
+#ifdef AARCH64
+ assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, Rtmp1, Rtmp2, R0, R1, R2, LR);
+ assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, R0, R1, R2, LR);
+#else
+ assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, LR);
+ assert_different_registers(Rsafept_cnt, R1, R2, Rtmp1, LR);
+ assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Rres, Rres_hi, Rtmp2, LR);
+ assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, Rres_hi, LR);
+#endif // AARCH64
+
+ address fast_entry;
+
+ ResourceMark rm;
+ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+ CodeBuffer cbuf(blob);
+ MacroAssembler* masm = new MacroAssembler(&cbuf);
+ fast_entry = __ pc();
+
+ // Safepoint check
+ InlinedAddress safepoint_counter_addr(SafepointSynchronize::safepoint_counter_addr());
+ Label slow_case;
+ __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
+
+#ifndef AARCH64
+ __ push(RegisterSet(R0, R3)); // save incoming arguments for slow case
+#endif // !AARCH64
+
+ __ ldr_s32(Rsafept_cnt, Address(Rsafepoint_counter_addr));
+ __ tbnz(Rsafept_cnt, 0, slow_case);
+
+ if (os::is_MP()) {
+ // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier
+ __ andr(Rtmp1, Rsafept_cnt, (unsigned)1);
+ __ ldr(Robj, Address(R1, Rtmp1));
+ } else {
+ __ ldr(Robj, Address(R1));
+ }
+
+#ifdef AARCH64
+ __ add(Robj, Robj, AsmOperand(R2, lsr, 2));
+ Address field_addr = Address(Robj);
+#else
+ Address field_addr;
+ if (type != T_BOOLEAN
+ && type != T_INT
+#ifndef __ABI_HARD__
+ && type != T_FLOAT
+#endif // !__ABI_HARD__
+ ) {
+ // Only ldr and ldrb support embedded shift, other loads do not
+ __ add(Robj, Robj, AsmOperand(R2, lsr, 2));
+ field_addr = Address(Robj);
+ } else {
+ field_addr = Address(Robj, R2, lsr, 2);
+ }
+#endif // AARCH64
+ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+ speculative_load_pclist[count] = __ pc();
+
+ switch (type) {
+ case T_BOOLEAN:
+ __ ldrb(Rres, field_addr);
+ break;
+ case T_BYTE:
+ __ ldrsb(Rres, field_addr);
+ break;
+ case T_CHAR:
+ __ ldrh(Rres, field_addr);
+ break;
+ case T_SHORT:
+ __ ldrsh(Rres, field_addr);
+ break;
+ case T_INT:
+#ifndef __ABI_HARD__
+ case T_FLOAT:
+#endif
+ __ ldr_s32(Rres, field_addr);
+ break;
+ case T_LONG:
+#ifndef __ABI_HARD__
+ case T_DOUBLE:
+#endif
+#ifdef AARCH64
+ __ ldr(Rres, field_addr);
+#else
+ // Safe to use ldrd since long and double fields are 8-byte aligned
+ __ ldrd(Rres, field_addr);
+#endif // AARCH64
+ break;
+#ifdef __ABI_HARD__
+ case T_FLOAT:
+ __ ldr_float(S0, field_addr);
+ break;
+ case T_DOUBLE:
+ __ ldr_double(D0, field_addr);
+ break;
+#endif // __ABI_HARD__
+ default:
+ ShouldNotReachHere();
+ }
+
+ if(os::is_MP()) {
+ // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier
+#if defined(__ABI_HARD__) && !defined(AARCH64)
+ if (type == T_FLOAT || type == T_DOUBLE) {
+ __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
+ __ fmrrd(Rres, Rres_hi, D0);
+ __ eor(Rtmp2, Rres, Rres);
+ __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2));
+ } else
+#endif // __ABI_HARD__ && !AARCH64
+ {
+#ifndef AARCH64
+ __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
+#endif // !AARCH64
+ __ eor(Rtmp2, Rres, Rres);
+ __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2));
+ }
+ } else {
+ __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr));
+ }
+ __ cmp(Rsafept_cnt2, Rsafept_cnt);
+#ifdef AARCH64
+ __ b(slow_case, ne);
+ __ mov(R0, Rres);
+ __ ret();
+#else
+ // discards saved R0 R1 R2 R3
+ __ add(SP, SP, 4 * wordSize, eq);
+ __ bx(LR, eq);
+#endif // AARCH64
+
+ slowcase_entry_pclist[count++] = __ pc();
+
+ __ bind(slow_case);
+#ifndef AARCH64
+ __ pop(RegisterSet(R0, R3));
+#endif // !AARCH64
+ // thumb mode switch handled by MacroAssembler::jump if needed
+ __ jump(slow_case_addr, relocInfo::none, Rtemp);
+
+ __ bind_literal(safepoint_counter_addr);
+
+ __ flush();
+
+ guarantee((__ pc() - fast_entry) <= BUFFER_SIZE, "BUFFER_SIZE too small");
+
+ return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
+ ShouldNotReachHere();
+ return NULL;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+ return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+ return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+ return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+ return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+ return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+ return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+ return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+ return generate_fast_get_int_field0(T_DOUBLE);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/jniTypes_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_JNITYPES_ARM_HPP
+#define CPU_ARM_VM_JNITYPES_ARM_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : AllStatic {
+ // These functions write a java primitive type (in native format)
+ // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+ // I.e., they are functionally 'push' operations if they have a 'pos'
+ // formal parameter. Note that jlong's and jdouble's are written
+ // _in reverse_ of the order in which they appear in the interpreter
+ // stack. This is because call stubs (see stubGenerator_arm.cpp)
+ // reverse the argument list constructed by JavaCallArguments (see
+ // javaCalls.hpp).
+
+private:
+
+#ifndef AARCH64
+ // 32bit Helper routines.
+ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1];
+ *(jint *)(to ) = from[0]; }
+ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
+#endif
+
+public:
+ // Ints are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; }
+ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; }
+ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+#ifdef AARCH64
+ // Longs are stored in native format in one JavaCallArgument slot at *(to+1).
+ static inline void put_long(jlong from, intptr_t *to) { *(jlong *)(to + 1 + 0) = from; }
+ static inline void put_long(jlong from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = from; pos += 2; }
+ static inline void put_long(jlong *from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = *from; pos += 2; }
+#else
+ // Longs are stored in big-endian word format in two JavaCallArgument slots at *to.
+ // The high half is in *to and the low half in *(to+1).
+ static inline void put_long(jlong from, intptr_t *to) { put_int2r((jint *)&from, to); }
+ static inline void put_long(jlong from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
+ static inline void put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
+#endif
+
+ // Oops are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; }
+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; }
+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+ // Floats are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; }
+ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; }
+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#ifdef AARCH64
+ // Doubles are stored in native word format in one JavaCallArgument slot at *(to+1).
+ static inline void put_double(jdouble from, intptr_t *to) { *(jdouble *)(to + 1 + 0) = from; }
+ static inline void put_double(jdouble from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = from; pos += 2; }
+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = *from; pos += 2; }
+#else
+ // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to.
+ // The high half is in *to and the low half in *(to+1).
+ static inline void put_double(jdouble from, intptr_t *to) { put_int2r((jint *)&from, to); }
+ static inline void put_double(jdouble from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
+#endif
+
+};
+
+#endif // CPU_ARM_VM_JNITYPES_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/jni_arm.h Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+// Note: please do not change these without also changing jni_md.h in the JDK
+// repository
+#ifndef __has_attribute
+ #define __has_attribute(x) 0
+#endif
+#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
+ #define JNIEXPORT __attribute__((externally_visible,visibility("default")))
+ #define JNIIMPORT __attribute__((externally_visible,visibility("default")))
+#else
+ #define JNIEXPORT
+ #define JNIIMPORT
+#endif
+
+#define JNICALL
+
+typedef int jint;
+#if defined(_LP64)
+ typedef long jlong;
+#else
+ typedef long long jlong;
+#endif
+typedef signed char jbyte;
+
+#endif /* !_JAVASOFT_JNI_MD_H_ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/jvmciCodeInstaller_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_arm.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
+ Unimplemented();
+ return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
+ Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
+ Unimplemented();
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
+ return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+ return false;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/macroAssembler_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,3120 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "ci/ciEnv.hpp"
+#include "code/nativeInst.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+// Implementation of AddressLiteral
+
+void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
+ switch (rtype) {
+ case relocInfo::oop_type:
+ // Oops are a special case. Normally they would be their own section
+ // but in cases like icBuffer they are literals in the code stream that
+ // we don't have a section for. We use none so that we get a literal address
+ // which is always patchable.
+ break;
+ case relocInfo::external_word_type:
+ _rspec = external_word_Relocation::spec(_target);
+ break;
+ case relocInfo::internal_word_type:
+ _rspec = internal_word_Relocation::spec(_target);
+ break;
+ case relocInfo::opt_virtual_call_type:
+ _rspec = opt_virtual_call_Relocation::spec();
+ break;
+ case relocInfo::static_call_type:
+ _rspec = static_call_Relocation::spec();
+ break;
+ case relocInfo::runtime_call_type:
+ _rspec = runtime_call_Relocation::spec();
+ break;
+ case relocInfo::poll_type:
+ case relocInfo::poll_return_type:
+ _rspec = Relocation::spec_simple(rtype);
+ break;
+ case relocInfo::none:
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+}
+
+// Initially added to the Assembler interface as a pure virtual:
+// RegisterConstant delayed_value(..)
+// for:
+// 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
+// this was subsequently modified to its present name and return type
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset) {
+ ShouldNotReachHere();
+ return RegisterOrConstant(-1);
+}
+
+
+#ifdef AARCH64
+// Note: ARM32 version is OS dependent
+void MacroAssembler::breakpoint(AsmCondition cond) {
+ if (cond == al) {
+ brk();
+ } else {
+ Label L;
+ b(L, inverse(cond));
+ brk();
+ bind(L);
+ }
+}
+#endif // AARCH64
+
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+ Register vtable_index,
+ Register method_result) {
+ const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
+ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+ add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
+ ldr(method_result, Address(recv_klass, base_offset));
+}
+
+
+// Simplified, combined version, good for typical uses.
+// Falls through on failure.
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Register temp_reg3,
+ Label& L_success) {
+ Label L_failure;
+ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
+ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
+ bind(L_failure);
+};
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path) {
+
+ assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
+ const Register super_check_offset = temp_reg2;
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ Address super_check_offset_addr(super_klass, sco_offset);
+
+ // If the pointers are equal, we are done (e.g., String[] elements).
+ // This self-check enables sharing of secondary supertype arrays among
+ // non-primary types such as array-of-interface. Otherwise, each such
+ // type would need its own customized SSA.
+ // We move this check to the front of the fast path because many
+ // type checks are in fact trivially successful in this manner,
+ // so we get a nicely predicted branch right at the start of the check.
+ cmp(sub_klass, super_klass);
+ b(*L_success, eq);
+
+ // Check the supertype display:
+ ldr_u32(super_check_offset, super_check_offset_addr);
+
+ Address super_check_addr(sub_klass, super_check_offset);
+ ldr(temp_reg, super_check_addr);
+ cmp(super_klass, temp_reg); // load displayed supertype
+
+ // This check has worked decisively for primary supers.
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
+ // This works in the same check above because of a tricky aliasing
+ // between the super_cache and the primary super display elements.
+ // (The 'super_check_addr' can address either, as the case requires.)
+ // Note that the cache is updated below if it does not help us find
+ // what we need immediately.
+ // So if it was a primary super, we can just fail immediately.
+ // Otherwise, it's the slow path for us (no success at this point).
+
+ b(*L_success, eq);
+ cmp_32(super_check_offset, sc_offset);
+ if (L_failure == &L_fallthrough) {
+ b(*L_slow_path, eq);
+ } else {
+ b(*L_failure, ne);
+ if (L_slow_path != &L_fallthrough) {
+ b(*L_slow_path);
+ }
+ }
+
+ bind(L_fallthrough);
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp2_reg,
+ Register temp3_reg,
+ Label* L_success,
+ Label* L_failure,
+ bool set_cond_codes) {
+#ifdef AARCH64
+ NOT_IMPLEMENTED();
+#else
+ // Note: if used by code that expects a register to be 0 on success,
+ // this register must be temp_reg and set_cond_codes must be true
+
+ Register saved_reg = noreg;
+
+ // get additional tmp registers
+ if (temp3_reg == noreg) {
+ saved_reg = temp3_reg = LR;
+ push(saved_reg);
+ }
+
+ assert(temp2_reg != noreg, "need all the temporary registers");
+ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
+
+ Register cmp_temp = temp_reg;
+ Register scan_temp = temp3_reg;
+ Register count_temp = temp2_reg;
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ // a couple of useful fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ Address secondary_supers_addr(sub_klass, ss_offset);
+ Address super_cache_addr( sub_klass, sc_offset);
+
+#ifndef PRODUCT
+ inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
+#endif
+
+ // We will consult the secondary-super array.
+ ldr(scan_temp, Address(sub_klass, ss_offset));
+
+ assert(! UseCompressedOops, "search_key must be the compressed super_klass");
+ // else search_key is the
+ Register search_key = super_klass;
+
+ // Load the array length.
+ ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
+ add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
+
+ add(count_temp, count_temp, 1);
+
+ Label L_loop, L_setnz_and_fail, L_fail;
+
+ // Top of search loop
+ bind(L_loop);
+ // Notes:
+ // scan_temp starts at the array elements
+ // count_temp is 1+size
+ subs(count_temp, count_temp, 1);
+ if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
+ // direct jump to L_failure if failed and no cleanup needed
+ b(*L_failure, eq); // not found and
+ } else {
+ b(L_fail, eq); // not found in the array
+ }
+
+ // Load next super to check
+ // In the array of super classes elements are pointer sized.
+ int element_size = wordSize;
+ ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
+
+ // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+ subs(cmp_temp, cmp_temp, search_key);
+
+ // A miss means we are NOT a subtype and need to keep looping
+ b(L_loop, ne);
+
+ // Falling out the bottom means we found a hit; we ARE a subtype
+
+ // Note: temp_reg/cmp_temp is already 0 and flag Z is set
+
+ // Success. Cache the super we found and proceed in triumph.
+ str(super_klass, Address(sub_klass, sc_offset));
+
+ if (saved_reg != noreg) {
+ // Return success
+ pop(saved_reg);
+ }
+
+ b(*L_success);
+
+ bind(L_fail);
+ // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
+ if (set_cond_codes) {
+ movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
+ }
+ if (saved_reg != noreg) {
+ pop(saved_reg);
+ }
+ if (L_failure != &L_fallthrough) {
+ b(*L_failure);
+ }
+
+ bind(L_fallthrough);
+#endif
+}
+
+// Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
+Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
+ assert_different_registers(params_base, params_count);
+ add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
+ return Address(tmp, -Interpreter::stackElementSize);
+}
+
+
+void MacroAssembler::align(int modulus) {
+ while (offset() % modulus != 0) {
+ nop();
+ }
+}
+
+int MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ bool save_last_java_pc,
+ Register tmp) {
+ int pc_offset;
+ if (last_java_fp != noreg) {
+ // optional
+ str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
+ _fp_saved = true;
+ } else {
+ _fp_saved = false;
+ }
+ if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
+#ifdef AARCH64
+ pc_offset = mov_pc_to(tmp);
+ str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
+#else
+ str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
+ pc_offset = offset() + VM_Version::stored_pc_adjustment();
+#endif
+ _pc_saved = true;
+ } else {
+ _pc_saved = false;
+ pc_offset = -1;
+ }
+ // According to comment in javaFrameAnchorm SP must be saved last, so that other
+ // entries are valid when SP is set.
+
+ // However, this is probably not a strong constrainst since for instance PC is
+ // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
+ // we now write the fields in the expected order but we have not added a StoreStore
+ // barrier.
+
+ // XXX: if the ordering is really important, PC should always be saved (without forgetting
+ // to update oop_map offsets) and a StoreStore barrier might be needed.
+
+ if (last_java_sp == noreg) {
+ last_java_sp = SP; // always saved
+ }
+#ifdef AARCH64
+ if (last_java_sp == SP) {
+ mov(tmp, SP);
+ str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ } else {
+ str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ }
+#else
+ str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+#endif
+
+ return pc_offset; // for oopmaps
+}
+
+void MacroAssembler::reset_last_Java_frame(Register tmp) {
+ const Register Rzero = zero_register(tmp);
+ str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ if (_fp_saved) {
+ str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
+ }
+ if (_pc_saved) {
+ str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
+ }
+}
+
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
+ assert(number_of_arguments >= 0, "cannot have negative number of arguments");
+ assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
+
+#ifndef AARCH64
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but is not worth
+ // optimizing for the few platforms where R9 is scratched.
+ push(RegisterSet(R4) | R9ifScratched);
+ mov(R4, SP);
+ bic(SP, SP, StackAlignmentInBytes - 1);
+#endif // AARCH64
+ call(entry_point, relocInfo::runtime_call_type);
+#ifndef AARCH64
+ mov(SP, R4);
+ pop(RegisterSet(R4) | R9ifScratched);
+#endif // AARCH64
+}
+
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+ assert(number_of_arguments >= 0, "cannot have negative number of arguments");
+ assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
+
+ const Register tmp = Rtemp;
+ assert_different_registers(oop_result, tmp);
+
+ set_last_Java_frame(SP, FP, true, tmp);
+
+#ifdef ASSERT
+ AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
+#endif // ASSERT
+
+#ifndef AARCH64
+#if R9_IS_SCRATCHED
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but is not worth
+ // optimizing for the few platforms where R9 is scratched.
+
+ // Note: cannot save R9 above the saved SP (some calls expect for
+ // instance the Java stack top at the saved SP)
+ // => once saved (with set_last_Java_frame), decrease SP before rounding to
+ // ensure the slot at SP will be free for R9).
+ sub(SP, SP, 4);
+ bic(SP, SP, StackAlignmentInBytes - 1);
+ str(R9, Address(SP, 0));
+#else
+ bic(SP, SP, StackAlignmentInBytes - 1);
+#endif // R9_IS_SCRATCHED
+#endif
+
+ mov(R0, Rthread);
+ call(entry_point, relocInfo::runtime_call_type);
+
+#ifndef AARCH64
+#if R9_IS_SCRATCHED
+ ldr(R9, Address(SP, 0));
+#endif
+ ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
+#endif
+
+ reset_last_Java_frame(tmp);
+
+ // C++ interp handles this in the interpreter
+ check_and_handle_popframe();
+ check_and_handle_earlyret();
+
+ if (check_exceptions) {
+ // check for pending exceptions
+ ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
+#ifdef AARCH64
+ Label L;
+ cbz(tmp, L);
+ mov_pc_to(Rexception_pc);
+ b(StubRoutines::forward_exception_entry());
+ bind(L);
+#else
+ cmp(tmp, 0);
+ mov(Rexception_pc, PC, ne);
+ b(StubRoutines::forward_exception_entry(), ne);
+#endif // AARCH64
+ }
+
+ // get oop result if there is one and reset the value in the thread
+ if (oop_result->is_valid()) {
+ get_vm_result(oop_result, tmp);
+ }
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
+ call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ assert (arg_2 == R2, "fixed register for arg_2");
+ call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ assert (arg_2 == R2, "fixed register for arg_2");
+ assert (arg_3 == R3, "fixed register for arg_3");
+ call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+// Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+// Raw call, without saving/restoring registers, exception handling, etc.
+// Mainly used from various stubs.
+void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
+ const Register tmp = Rtemp; // Rtemp free since scratched by call
+ set_last_Java_frame(SP, FP, true, tmp);
+#if R9_IS_SCRATCHED
+ if (save_R9_if_scratched) {
+ // Note: Saving also R10 for alignment.
+ push(RegisterSet(R9, R10));
+ }
+#endif
+ mov(R0, Rthread);
+ call(entry_point, relocInfo::runtime_call_type);
+#if R9_IS_SCRATCHED
+ if (save_R9_if_scratched) {
+ pop(RegisterSet(R9, R10));
+ }
+#endif
+ reset_last_Java_frame(tmp);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point) {
+ call_VM_leaf_helper(entry_point, 0);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ call_VM_leaf_helper(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ call_VM_leaf_helper(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ assert (arg_3 == R2, "fixed register for arg_3");
+ call_VM_leaf_helper(entry_point, 3);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ assert (arg_3 == R2, "fixed register for arg_3");
+ assert (arg_4 == R3, "fixed register for arg_4");
+ call_VM_leaf_helper(entry_point, 4);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
+ assert_different_registers(oop_result, tmp);
+ ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
+ str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
+ verify_oop(oop_result);
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
+ assert_different_registers(metadata_result, tmp);
+ ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
+ str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
+}
+
+void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
+ if (arg2.is_register()) {
+ add(dst, arg1, arg2.as_register());
+ } else {
+ add(dst, arg1, arg2.as_constant());
+ }
+}
+
+void MacroAssembler::add_slow(Register rd, Register rn, int c) {
+#ifdef AARCH64
+ if (c == 0) {
+ if (rd != rn) {
+ mov(rd, rn);
+ }
+ return;
+ }
+ if (c < 0) {
+ sub_slow(rd, rn, -c);
+ return;
+ }
+ if (c > right_n_bits(24)) {
+ guarantee(rd != rn, "no large add_slow with only one register");
+ mov_slow(rd, c);
+ add(rd, rn, rd);
+ } else {
+ int lo = c & right_n_bits(12);
+ int hi = (c >> 12) & right_n_bits(12);
+ if (lo != 0) {
+ add(rd, rn, lo, lsl0);
+ }
+ if (hi != 0) {
+ add(rd, (lo == 0) ? rn : rd, hi, lsl12);
+ }
+ }
+#else
+ // This function is used in compiler for handling large frame offsets
+ if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
+ return sub(rd, rn, (-c));
+ }
+ int low = c & 0x3fc;
+ if (low != 0) {
+ add(rd, rn, low);
+ rn = rd;
+ }
+ if (c & ~0x3fc) {
+ assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
+ add(rd, rn, c & ~0x3fc);
+ } else if (rd != rn) {
+ assert(c == 0, "");
+ mov(rd, rn); // need to generate at least one move!
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
+#ifdef AARCH64
+ if (c <= 0) {
+ add_slow(rd, rn, -c);
+ return;
+ }
+ if (c > right_n_bits(24)) {
+ guarantee(rd != rn, "no large sub_slow with only one register");
+ mov_slow(rd, c);
+ sub(rd, rn, rd);
+ } else {
+ int lo = c & right_n_bits(12);
+ int hi = (c >> 12) & right_n_bits(12);
+ if (lo != 0) {
+ sub(rd, rn, lo, lsl0);
+ }
+ if (hi != 0) {
+ sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
+ }
+ }
+#else
+ // This function is used in compiler for handling large frame offsets
+ if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
+ return add(rd, rn, (-c));
+ }
+ int low = c & 0x3fc;
+ if (low != 0) {
+ sub(rd, rn, low);
+ rn = rd;
+ }
+ if (c & ~0x3fc) {
+ assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
+ sub(rd, rn, c & ~0x3fc);
+ } else if (rd != rn) {
+ assert(c == 0, "");
+ mov(rd, rn); // need to generate at least one move!
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_slow(Register rd, address addr) {
+ // do *not* call the non relocated mov_related_address
+ mov_slow(rd, (intptr_t)addr);
+}
+
+void MacroAssembler::mov_slow(Register rd, const char *str) {
+ mov_slow(rd, (intptr_t)str);
+}
+
+#ifdef AARCH64
+
+// Common code for mov_slow and instr_count_for_mov_slow.
+// Returns number of instructions of mov_slow pattern,
+// generating it if non-null MacroAssembler is given.
+int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
+ // This code pattern is matched in NativeIntruction::is_mov_slow.
+ // Update it at modifications.
+
+ const intx mask = right_n_bits(16);
+ // 1 movz instruction
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ if ((c & ~(mask << base_shift)) == 0) {
+ if (masm != NULL) {
+ masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
+ }
+ return 1;
+ }
+ }
+ // 1 movn instruction
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ if (((~c) & ~(mask << base_shift)) == 0) {
+ if (masm != NULL) {
+ masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
+ }
+ return 1;
+ }
+ }
+ // 1 orr instruction
+ {
+ LogicalImmediate imm(c, false);
+ if (imm.is_encoded()) {
+ if (masm != NULL) {
+ masm->orr(rd, ZR, imm);
+ }
+ return 1;
+ }
+ }
+ // 1 movz/movn + up to 3 movk instructions
+ int zeroes = 0;
+ int ones = 0;
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ int part = (c >> base_shift) & mask;
+ if (part == 0) {
+ ++zeroes;
+ } else if (part == mask) {
+ ++ones;
+ }
+ }
+ int def_bits = 0;
+ if (ones > zeroes) {
+ def_bits = mask;
+ }
+ int inst_count = 0;
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ int part = (c >> base_shift) & mask;
+ if (part != def_bits) {
+ if (masm != NULL) {
+ if (inst_count > 0) {
+ masm->movk(rd, part, base_shift);
+ } else {
+ if (def_bits == 0) {
+ masm->movz(rd, part, base_shift);
+ } else {
+ masm->movn(rd, ~part & mask, base_shift);
+ }
+ }
+ }
+ inst_count++;
+ }
+ }
+ assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
+ return inst_count;
+}
+
+void MacroAssembler::mov_slow(Register rd, intptr_t c) {
+#ifdef ASSERT
+ int off = offset();
+#endif
+ (void) mov_slow_helper(rd, c, this);
+ assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
+}
+
+// Counts instructions generated by mov_slow(rd, c).
+int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
+ return mov_slow_helper(noreg, c, NULL);
+}
+
+int MacroAssembler::instr_count_for_mov_slow(address c) {
+ return mov_slow_helper(noreg, (intptr_t)c, NULL);
+}
+
+#else
+
+void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
+ if (AsmOperand::is_rotated_imm(c)) {
+ mov(rd, c, cond);
+ } else if (AsmOperand::is_rotated_imm(~c)) {
+ mvn(rd, ~c, cond);
+ } else if (VM_Version::supports_movw()) {
+ movw(rd, c & 0xffff, cond);
+ if ((unsigned int)c >> 16) {
+ movt(rd, (unsigned int)c >> 16, cond);
+ }
+ } else {
+ // Find first non-zero bit
+ int shift = 0;
+ while ((c & (3 << shift)) == 0) {
+ shift += 2;
+ }
+ // Put the least significant part of the constant
+ int mask = 0xff << shift;
+ mov(rd, c & mask, cond);
+ // Add up to 3 other parts of the constant;
+ // each of them can be represented as rotated_imm
+ if (c & (mask << 8)) {
+ orr(rd, rd, c & (mask << 8), cond);
+ }
+ if (c & (mask << 16)) {
+ orr(rd, rd, c & (mask << 16), cond);
+ }
+ if (c & (mask << 24)) {
+ orr(rd, rd, c & (mask << 24), cond);
+ }
+ }
+}
+
+#endif // AARCH64
+
+void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
+#ifdef AARCH64
+ bool patchable
+#else
+ AsmCondition cond
+#endif
+ ) {
+
+ if (o == NULL) {
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+ mov(rd, ZR);
+#else
+ mov(rd, 0, cond);
+#endif
+ return;
+ }
+
+ if (oop_index == 0) {
+ oop_index = oop_recorder()->allocate_oop_index(o);
+ }
+ relocate(oop_Relocation::spec(oop_index));
+
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+ ldr(rd, pc());
+#else
+ if (VM_Version::supports_movw()) {
+ movw(rd, 0, cond);
+ movt(rd, 0, cond);
+ } else {
+ ldr(rd, Address(PC), cond);
+ // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
+ nop();
+ }
+#endif
+}
+
+void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
+ if (o == NULL) {
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+#endif
+ mov(rd, 0);
+ return;
+ }
+
+ if (metadata_index == 0) {
+ metadata_index = oop_recorder()->allocate_metadata_index(o);
+ }
+ relocate(metadata_Relocation::spec(metadata_index));
+
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+#ifdef COMPILER2
+ if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
+ mov_slow(rd, (address)o);
+ return;
+ }
+#endif
+ ldr(rd, pc());
+#else
+ if (VM_Version::supports_movw()) {
+ movw(rd, ((int)o) & 0xffff);
+ movt(rd, (unsigned int)o >> 16);
+ } else {
+ ldr(rd, Address(PC));
+ // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
+ nop();
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
+ Label skip_constant;
+ union {
+ jfloat f;
+ jint i;
+ } accessor;
+ accessor.f = c;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
+ Label L;
+ ldr_s(fd, target(L));
+ b(skip_constant);
+ bind(L);
+ emit_int32(accessor.i);
+ bind(skip_constant);
+#else
+ flds(fd, Address(PC), cond);
+ b(skip_constant);
+ emit_int32(accessor.i);
+ bind(skip_constant);
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
+ Label skip_constant;
+ union {
+ jdouble d;
+ jint i[2];
+ } accessor;
+ accessor.d = c;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - try to optimize loading of double constants with fmov
+ Label L;
+ ldr_d(fd, target(L));
+ b(skip_constant);
+ align(wordSize);
+ bind(L);
+ emit_int32(accessor.i[0]);
+ emit_int32(accessor.i[1]);
+ bind(skip_constant);
+#else
+ fldd(fd, Address(PC), cond);
+ b(skip_constant);
+ emit_int32(accessor.i[0]);
+ emit_int32(accessor.i[1]);
+ bind(skip_constant);
+#endif // AARCH64
+}
+
+void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
+ intptr_t addr = (intptr_t) address_of_global;
+#ifdef AARCH64
+ assert((addr & 0x3) == 0, "address should be aligned");
+
+ // FIXME: TODO
+ if (false && page_reachable_from_cache(address_of_global)) {
+ assert(false,"TODO: relocate");
+ //relocate();
+ adrp(reg, address_of_global);
+ ldrsw(reg, Address(reg, addr & 0xfff));
+ } else {
+ mov_slow(reg, addr & ~0x3fff);
+ ldrsw(reg, Address(reg, addr & 0x3fff));
+ }
+#else
+ mov_slow(reg, addr & ~0xfff);
+ ldr(reg, Address(reg, addr & 0xfff));
+#endif
+}
+
+void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
+#ifdef AARCH64
+ intptr_t addr = (intptr_t) address_of_global;
+ assert ((addr & 0x7) == 0, "address should be aligned");
+ mov_slow(reg, addr & ~0x7fff);
+ ldr(reg, Address(reg, addr & 0x7fff));
+#else
+ ldr_global_s32(reg, address_of_global);
+#endif
+}
+
+void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
+ intptr_t addr = (intptr_t) address_of_global;
+ mov_slow(reg, addr & ~0xfff);
+ ldrb(reg, Address(reg, addr & 0xfff));
+}
+
+void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
+#ifdef AARCH64
+ switch (bits) {
+ case 8: uxtb(rd, rn); break;
+ case 16: uxth(rd, rn); break;
+ case 32: mov_w(rd, rn); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ if (bits <= 8) {
+ andr(rd, rn, (1 << bits) - 1);
+ } else if (bits >= 24) {
+ bic(rd, rn, -1 << bits);
+ } else {
+ mov(rd, AsmOperand(rn, lsl, 32 - bits));
+ mov(rd, AsmOperand(rd, lsr, 32 - bits));
+ }
+#endif
+}
+
+void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
+#ifdef AARCH64
+ switch (bits) {
+ case 8: sxtb(rd, rn); break;
+ case 16: sxth(rd, rn); break;
+ case 32: sxtw(rd, rn); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ mov(rd, AsmOperand(rn, lsl, 32 - bits));
+ mov(rd, AsmOperand(rd, asr, 32 - bits));
+#endif
+}
+
+#ifndef AARCH64
+
+void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmCondition cond) {
+ if (rd_lo != rn_hi) {
+ if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
+ if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
+ } else if (rd_hi != rn_lo) {
+ if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
+ if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
+ } else {
+ eor(rd_lo, rd_hi, rd_lo, cond);
+ eor(rd_hi, rd_lo, rd_hi, cond);
+ eor(rd_lo, rd_hi, rd_lo, cond);
+ }
+}
+
+void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, Register count) {
+ Register tmp;
+ if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
+ tmp = rd_lo;
+ } else {
+ tmp = rd_hi;
+ }
+ assert_different_registers(tmp, count, rn_lo, rn_hi);
+
+ subs(tmp, count, 32);
+ if (shift == lsl) {
+ assert_different_registers(rd_hi, rn_lo);
+ assert_different_registers(count, rd_hi);
+ mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
+ rsb(tmp, count, 32, mi);
+ if (rd_hi == rn_hi) {
+ mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
+ orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
+ } else {
+ mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
+ orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
+ }
+ mov(rd_lo, AsmOperand(rn_lo, shift, count));
+ } else {
+ assert_different_registers(rd_lo, rn_hi);
+ assert_different_registers(rd_lo, count);
+ mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
+ rsb(tmp, count, 32, mi);
+ if (rd_lo == rn_lo) {
+ mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
+ orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
+ } else {
+ mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
+ orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
+ }
+ mov(rd_hi, AsmOperand(rn_hi, shift, count));
+ }
+}
+
+void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, int count) {
+ assert(count != 0 && (count & ~63) == 0, "must be");
+
+ if (shift == lsl) {
+ assert_different_registers(rd_hi, rn_lo);
+ if (count >= 32) {
+ mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
+ mov(rd_lo, 0);
+ } else {
+ mov(rd_hi, AsmOperand(rn_hi, lsl, count));
+ orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
+ mov(rd_lo, AsmOperand(rn_lo, lsl, count));
+ }
+ } else {
+ assert_different_registers(rd_lo, rn_hi);
+ if (count >= 32) {
+ if (count == 32) {
+ mov(rd_lo, rn_hi);
+ } else {
+ mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
+ }
+ if (shift == asr) {
+ mov(rd_hi, AsmOperand(rn_hi, asr, 0));
+ } else {
+ mov(rd_hi, 0);
+ }
+ } else {
+ mov(rd_lo, AsmOperand(rn_lo, lsr, count));
+ orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
+ mov(rd_hi, AsmOperand(rn_hi, shift, count));
+ }
+ }
+}
+#endif // !AARCH64
+
+void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
+ // This code pattern is matched in NativeIntruction::skip_verify_oop.
+ // Update it at modifications.
+ if (!VerifyOops) return;
+
+ char buffer[64];
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+ block_comment(buffer);
+ }
+#endif
+ const char* msg_buffer = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
+ msg_buffer = code_string(ss.as_string());
+ }
+
+ save_all_registers();
+
+ if (reg != R2) {
+ mov(R2, reg); // oop to verify
+ }
+ mov(R1, SP); // register save area
+
+ Label done;
+ InlinedString Lmsg(msg_buffer);
+ ldr_literal(R0, Lmsg); // message
+
+ // call indirectly to solve generation ordering problem
+ ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
+ call(Rtemp);
+
+ restore_all_registers();
+
+ b(done);
+#ifdef COMPILER2
+ int off = offset();
+#endif
+ bind_literal(Lmsg);
+#ifdef COMPILER2
+ if (offset() - off == 1 * wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+ bind(done);
+}
+
+void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
+ if (!VerifyOops) return;
+
+ const char* msg_buffer = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ if ((addr.base() == SP) && (addr.index()==noreg)) {
+ ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
+ } else {
+ ss.print("verify_oop_addr: %s", s);
+ }
+ ss.print(" (%s:%d)", file, line);
+ msg_buffer = code_string(ss.as_string());
+ }
+
+ int push_size = save_all_registers();
+
+ if (addr.base() == SP) {
+ // computes an addr that takes into account the push
+ if (addr.index() != noreg) {
+ Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
+ add(new_base, SP, push_size);
+ addr = addr.rebase(new_base);
+ } else {
+ addr = addr.plus_disp(push_size);
+ }
+ }
+
+ ldr(R2, addr); // oop to verify
+ mov(R1, SP); // register save area
+
+ Label done;
+ InlinedString Lmsg(msg_buffer);
+ ldr_literal(R0, Lmsg); // message
+
+ // call indirectly to solve generation ordering problem
+ ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
+ call(Rtemp);
+
+ restore_all_registers();
+
+ b(done);
+ bind_literal(Lmsg);
+ bind(done);
+}
+
+void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
+ if (needs_explicit_null_check(offset)) {
+#ifdef AARCH64
+ ldr(ZR, Address(reg));
+#else
+ assert_different_registers(reg, tmp);
+ if (tmp == noreg) {
+ tmp = Rtemp;
+ assert((! Thread::current()->is_Compiler_thread()) ||
+ (! (ciEnv::current()->task() == NULL)) ||
+ (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
+ "Rtemp not available in C2"); // explicit tmp register required
+ // XXX: could we mark the code buffer as not compatible with C2 ?
+ }
+ ldr(tmp, Address(reg));
+#endif
+ }
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ b(slow_case);
+ return;
+ }
+
+ CollectedHeap* ch = Universe::heap();
+
+ const Register top_addr = tmp1;
+ const Register heap_end = tmp2;
+
+ if (size_expression.is_register()) {
+ assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
+ } else {
+ assert_different_registers(obj, obj_end, top_addr, heap_end);
+ }
+
+ bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+ if (load_const) {
+ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
+ } else {
+ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
+ }
+ // Calculate new heap_top by adding the size of the object
+ Label retry;
+ bind(retry);
+
+#ifdef AARCH64
+ ldxr(obj, top_addr);
+#else
+ ldr(obj, Address(top_addr));
+#endif // AARCH64
+
+ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
+ add_rc(obj_end, obj, size_expression);
+ // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
+ cmp(obj_end, obj);
+ b(slow_case, lo);
+ // Update heap_top if allocation succeeded
+ cmp(obj_end, heap_end);
+ b(slow_case, hi);
+
+#ifdef AARCH64
+ stxr(heap_end/*scratched*/, obj_end, top_addr);
+ cbnz_w(heap_end, retry);
+#else
+ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
+ b(retry, ne);
+#endif // AARCH64
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ const Register tlab_end = tmp1;
+ assert_different_registers(obj, obj_end, tlab_end);
+
+ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
+ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
+ add_rc(obj_end, obj, size_expression);
+ cmp(obj_end, tlab_end);
+ b(slow_case, hi);
+ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+}
+
+void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ Label& try_eden, Label& slow_case) {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ b(slow_case);
+ return;
+ }
+
+ InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr());
+ Label discard_tlab, do_refill;
+ ldr(top, Address(Rthread, JavaThread::tlab_top_offset()));
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
+
+ // Calculate amount of free space
+ sub(tmp1, tmp1, top);
+ // Retain tlab and allocate in shared space
+ // if the amount of free space in tlab is too large to discard
+ cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize));
+ b(discard_tlab, ge);
+
+ // Increment waste limit to prevent getting stuck on this slow path
+ mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment());
+ add(tmp2, tmp2, tmp3);
+ str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
+ if (TLABStats) {
+ ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
+ add_32(tmp2, tmp2, 1);
+ str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
+ }
+ b(try_eden);
+ bind_literal(intArrayKlass_addr);
+
+ bind(discard_tlab);
+ if (TLABStats) {
+ ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
+ ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
+ add_32(tmp2, tmp2, 1);
+ add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize));
+ str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
+ str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
+ }
+ // If tlab is currently allocated (top or end != null)
+ // then fill [top, end + alignment_reserve) with array object
+ cbz(top, do_refill);
+
+ // Set up the mark word
+ mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
+ str(tmp2, Address(top, oopDesc::mark_offset_in_bytes()));
+ // Set klass to intArrayKlass and the length to the remaining space
+ ldr_literal(tmp2, intArrayKlass_addr);
+ add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() -
+ typeArrayOopDesc::header_size(T_INT) * HeapWordSize);
+ Register klass = tmp2;
+ ldr(klass, Address(tmp2));
+ logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint)
+ str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes()));
+ store_klass(klass, top); // blows klass:
+ klass = noreg;
+
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset()));
+ sub(tmp1, top, tmp1); // size of tlab's allocated portion
+ incr_allocated_bytes(tmp1, tmp2);
+
+ bind(do_refill);
+ // Refill the tlab with an eden allocation
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset()));
+ logical_shift_left(tmp4, tmp1, LogHeapWordSize);
+ eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case);
+ str(top, Address(Rthread, JavaThread::tlab_start_offset()));
+ str(top, Address(Rthread, JavaThread::tlab_top_offset()));
+
+#ifdef ASSERT
+ // Verify that tmp1 contains tlab_end
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset()));
+ add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize));
+ cmp(tmp1, tmp2);
+ breakpoint(ne);
+#endif
+
+ sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+ str(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
+
+ if (ZeroTLAB) {
+ // clobbers start and tmp
+ // top must be preserved!
+ add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset()));
+ zero_memory(tmp2, tmp1, tmp3);
+ }
+}
+
+// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
+void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
+ Label loop;
+ const Register ptr = start;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
+ const Register size = tmp;
+ Label remaining, done;
+
+ sub(size, end, start);
+
+#ifdef ASSERT
+ { Label L;
+ tst(size, wordSize - 1);
+ b(L, eq);
+ stop("size is not a multiple of wordSize");
+ bind(L);
+ }
+#endif // ASSERT
+
+ subs(size, size, wordSize);
+ b(remaining, le);
+
+ // Zero by 2 words per iteration.
+ bind(loop);
+ subs(size, size, 2*wordSize);
+ stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
+ b(loop, gt);
+
+ bind(remaining);
+ b(done, ne);
+ str(ZR, Address(ptr));
+ bind(done);
+#else
+ mov(tmp, 0);
+ bind(loop);
+ cmp(ptr, end);
+ str(tmp, Address(ptr, wordSize, post_indexed), lo);
+ b(loop, lo);
+#endif // AARCH64
+}
+
+void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
+#ifdef AARCH64
+ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ add_rc(tmp, tmp, size_in_bytes);
+ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+#else
+ // Bump total bytes allocated by this thread
+ Label done;
+
+ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ adds(tmp, tmp, size_in_bytes);
+ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
+ b(done, cc);
+
+ // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
+ // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
+ // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
+ Register low, high;
+ // Select ether R0/R1 or R2/R3
+
+ if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
+ low = R2;
+ high = R3;
+ } else {
+ low = R0;
+ high = R1;
+ }
+ push(RegisterSet(low, high));
+
+ ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ adds(low, low, size_in_bytes);
+ adc(high, high, 0);
+ strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+
+ pop(RegisterSet(low, high));
+
+ bind(done);
+#endif // AARCH64
+}
+
+void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
+ // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
+ if (UseStackBanging) {
+ const int page_size = os::vm_page_size();
+
+ sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
+ strb(R0, Address(tmp));
+#ifdef AARCH64
+ for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
+ sub(tmp, tmp, page_size);
+ strb(R0, Address(tmp));
+ }
+#else
+ for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
+ strb(R0, Address(tmp, -0xff0, pre_indexed));
+ }
+#endif // AARCH64
+ }
+}
+
+void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
+ if (UseStackBanging) {
+ Label loop;
+
+ mov(tmp, SP);
+ add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
+#ifdef AARCH64
+ sub(tmp, tmp, Rsize);
+ bind(loop);
+ subs(Rsize, Rsize, os::vm_page_size());
+ strb(ZR, Address(tmp, Rsize));
+#else
+ bind(loop);
+ subs(Rsize, Rsize, 0xff0);
+ strb(R0, Address(tmp, -0xff0, pre_indexed));
+#endif // AARCH64
+ b(loop, hi);
+ }
+}
+
+void MacroAssembler::stop(const char* msg) {
+ // This code pattern is matched in NativeIntruction::is_stop.
+ // Update it at modifications.
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ block_comment("stop");
+ }
+#endif
+
+ InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
+ InlinedString Lmsg(msg);
+
+ // save all registers for further inspection
+ save_all_registers();
+
+ ldr_literal(R0, Lmsg); // message
+ mov(R1, SP); // register save area
+
+#ifdef AARCH64
+ ldr_literal(Rtemp, Ldebug);
+ br(Rtemp);
+#else
+ ldr_literal(PC, Ldebug); // call MacroAssembler::debug
+#endif // AARCH64
+
+#if defined(COMPILER2) && defined(AARCH64)
+ int off = offset();
+#endif
+ bind_literal(Lmsg);
+ bind_literal(Ldebug);
+#if defined(COMPILER2) && defined(AARCH64)
+ if (offset() - off == 2 * wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+}
+
+void MacroAssembler::warn(const char* msg) {
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ block_comment("warn");
+ }
+#endif
+
+ InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
+ InlinedString Lmsg(msg);
+ Label done;
+
+ int push_size = save_caller_save_registers();
+
+#ifdef AARCH64
+ // TODO-AARCH64 - get rid of extra debug parameters
+ mov(R1, LR);
+ mov(R2, FP);
+ add(R3, SP, push_size);
+#endif
+
+ ldr_literal(R0, Lmsg); // message
+ ldr_literal(LR, Lwarn); // call warning
+
+ call(LR);
+
+ restore_caller_save_registers();
+
+ b(done);
+ bind_literal(Lmsg);
+ bind_literal(Lwarn);
+ bind(done);
+}
+
+
+int MacroAssembler::save_all_registers() {
+ // This code pattern is matched in NativeIntruction::is_save_all_registers.
+ // Update it at modifications.
+#ifdef AARCH64
+ const Register tmp = Rtemp;
+ raw_push(R30, ZR);
+ for (int i = 28; i >= 0; i -= 2) {
+ raw_push(as_Register(i), as_Register(i+1));
+ }
+ mov_pc_to(tmp);
+ str(tmp, Address(SP, 31*wordSize));
+ ldr(tmp, Address(SP, tmp->encoding()*wordSize));
+ return 32*wordSize;
+#else
+ push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
+ return 15*wordSize;
+#endif // AARCH64
+}
+
+void MacroAssembler::restore_all_registers() {
+#ifdef AARCH64
+ for (int i = 0; i <= 28; i += 2) {
+ raw_pop(as_Register(i), as_Register(i+1));
+ }
+ raw_pop(R30, ZR);
+#else
+ pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers
+ add(SP, SP, wordSize); // discard saved PC
+#endif // AARCH64
+}
+
+int MacroAssembler::save_caller_save_registers() {
+#ifdef AARCH64
+ for (int i = 0; i <= 16; i += 2) {
+ raw_push(as_Register(i), as_Register(i+1));
+ }
+ raw_push(R18, LR);
+ return 20*wordSize;
+#else
+#if R9_IS_SCRATCHED
+ // Save also R10 to preserve alignment
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
+ return 8*wordSize;
+#else
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+ return 6*wordSize;
+#endif
+#endif // AARCH64
+}
+
+void MacroAssembler::restore_caller_save_registers() {
+#ifdef AARCH64
+ raw_pop(R18, LR);
+ for (int i = 16; i >= 0; i -= 2) {
+ raw_pop(as_Register(i), as_Register(i+1));
+ }
+#else
+#if R9_IS_SCRATCHED
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
+#else
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+#endif
+#endif // AARCH64
+}
+
+void MacroAssembler::debug(const char* msg, const intx* registers) {
+ // In order to get locks to work, we need to fake a in_VM state
+ JavaThread* thread = JavaThread::current();
+ thread->set_thread_state(_thread_in_vm);
+
+ if (ShowMessageBoxOnError) {
+ ttyLocker ttyl;
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+ BytecodeCounter::print();
+ }
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
+#ifdef AARCH64
+ // saved registers: R0-R30, PC
+ const int nregs = 32;
+#else
+ // saved registers: R0-R12, LR, PC
+ const int nregs = 15;
+ const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
+#endif // AARCH64
+
+ for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
+ tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
+ }
+
+#ifdef AARCH64
+ tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
+#endif // AARCH64
+
+ // derive original SP value from the address of register save area
+ tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs]));
+ }
+ BREAKPOINT;
+ } else {
+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+ }
+ assert(false, "DEBUG MESSAGE: %s", msg);
+ fatal("%s", msg); // returning from MacroAssembler::debug is not supported
+}
+
+void MacroAssembler::unimplemented(const char* what) {
+ const char* buf = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("unimplemented: %s", what);
+ buf = code_string(ss.as_string());
+ }
+ stop(buf);
+}
+
+
+// Implementation of FixedSizeCodeBlock
+
+FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
+_masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
+}
+
+FixedSizeCodeBlock::~FixedSizeCodeBlock() {
+ if (_enabled) {
+ address curr_pc = _masm->pc();
+
+ assert(_start < curr_pc, "invalid current pc");
+ guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
+
+ int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
+ for (int i = 0; i < nops_count; i++) {
+ _masm->nop();
+ }
+ }
+}
+
+#ifdef AARCH64
+
+// Serializes memory.
+// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
+void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
+ if (!os::is_MP()) return;
+
+ // TODO-AARCH64 investigate dsb vs dmb effects
+ if (order_constraint == StoreStore) {
+ dmb(DMB_st);
+ } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
+ dmb(DMB_ld);
+ } else {
+ dmb(DMB_all);
+ }
+}
+
+#else
+
+// Serializes memory. Potentially blows flags and reg.
+// tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
+// preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
+// load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
+void MacroAssembler::membar(Membar_mask_bits order_constraint,
+ Register tmp,
+ bool preserve_flags,
+ Register load_tgt) {
+ if (!os::is_MP()) return;
+
+ if (order_constraint == StoreStore) {
+ dmb(DMB_st, tmp);
+ } else if ((order_constraint & StoreLoad) ||
+ (order_constraint & LoadLoad) ||
+ (order_constraint & StoreStore) ||
+ (load_tgt == noreg) ||
+ preserve_flags) {
+ dmb(DMB_all, tmp);
+ } else {
+ // LoadStore: speculative stores reordeing is prohibited
+
+ // By providing an ordered load target register, we avoid an extra memory load reference
+ Label not_taken;
+ bind(not_taken);
+ cmp(load_tgt, load_tgt);
+ b(not_taken, ne);
+ }
+}
+
+#endif // AARCH64
+
+// If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
+// on failure, so fall-through can only mean success.
+// "one_shot" controls whether we loop and retry to mitigate spurious failures.
+// This is only needed for C2, which for some reason does not rety,
+// while C1/interpreter does.
+// TODO: measure if it makes a difference
+
+void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
+ Register base, Register tmp, Label &slow_case,
+ bool allow_fallthrough_on_failure, bool one_shot)
+{
+
+ bool fallthrough_is_success = false;
+
+ // ARM Litmus Test example does prefetching here.
+ // TODO: investigate if it helps performance
+
+ // The last store was to the displaced header, so to prevent
+ // reordering we must issue a StoreStore or Release barrier before
+ // the CAS store.
+
+#ifdef AARCH64
+
+ Register Rscratch = tmp;
+ Register Roop = base;
+ Register mark = oldval;
+ Register Rbox = newval;
+ Label loop;
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+
+ // Instead of StoreStore here, we use store-release-exclusive below
+
+ bind(loop);
+
+ ldaxr(tmp, base); // acquire
+ cmp(tmp, oldval);
+ b(slow_case, ne);
+ stlxr(tmp, newval, base); // release
+ if (one_shot) {
+ cmp_w(tmp, 0);
+ } else {
+ cbnz_w(tmp, loop);
+ fallthrough_is_success = true;
+ }
+
+ // MemBarAcquireLock would normally go here, but
+ // we already do ldaxr+stlxr above, which has
+ // Sequential Consistency
+
+#else
+ membar(MacroAssembler::StoreStore, noreg);
+
+ if (one_shot) {
+ ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
+ cmp(tmp, oldval);
+ strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
+ cmp(tmp, 0, eq);
+ } else {
+ atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
+ }
+
+ // MemBarAcquireLock barrier
+ // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
+ // but that doesn't prevent a load or store from floating up between
+ // the load and store in the CAS sequence, so play it safe and
+ // do a full fence.
+ membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
+#endif
+ if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
+ b(slow_case, ne);
+ }
+}
+
+void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
+ Register base, Register tmp, Label &slow_case,
+ bool allow_fallthrough_on_failure, bool one_shot)
+{
+
+ bool fallthrough_is_success = false;
+
+ assert_different_registers(oldval,newval,base,tmp);
+
+#ifdef AARCH64
+ Label loop;
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+
+ bind(loop);
+ ldxr(tmp, base);
+ cmp(tmp, oldval);
+ b(slow_case, ne);
+ // MemBarReleaseLock barrier
+ stlxr(tmp, newval, base);
+ if (one_shot) {
+ cmp_w(tmp, 0);
+ } else {
+ cbnz_w(tmp, loop);
+ fallthrough_is_success = true;
+ }
+#else
+ // MemBarReleaseLock barrier
+ // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
+ // but that doesn't prevent a load or store from floating down between
+ // the load and store in the CAS sequence, so play it safe and
+ // do a full fence.
+ membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
+
+ if (one_shot) {
+ ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
+ cmp(tmp, oldval);
+ strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
+ cmp(tmp, 0, eq);
+ } else {
+ atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
+ }
+#endif
+ if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
+ b(slow_case, ne);
+ }
+
+ // ExitEnter
+ // According to JSR-133 Cookbook, this should be StoreLoad, the same
+ // barrier that follows volatile store.
+ // TODO: Should be able to remove on armv8 if volatile loads
+ // use the load-acquire instruction.
+ membar(StoreLoad, noreg);
+}
+
+#ifndef PRODUCT
+
+// Preserves flags and all registers.
+// On SMP the updated value might not be visible to external observers without a sychronization barrier
+void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
+ if (counter_addr != NULL) {
+ InlinedAddress counter_addr_literal((address)counter_addr);
+ Label done, retry;
+ if (cond != al) {
+ b(done, inverse(cond));
+ }
+
+#ifdef AARCH64
+ raw_push(R0, R1);
+ raw_push(R2, ZR);
+
+ ldr_literal(R0, counter_addr_literal);
+
+ bind(retry);
+ ldxr_w(R1, R0);
+ add_w(R1, R1, 1);
+ stxr_w(R2, R1, R0);
+ cbnz_w(R2, retry);
+
+ raw_pop(R2, ZR);
+ raw_pop(R0, R1);
+#else
+ push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
+ ldr_literal(R0, counter_addr_literal);
+
+ mrs(CPSR, Rtemp);
+
+ bind(retry);
+ ldr_s32(R1, Address(R0));
+ add(R2, R1, 1);
+ atomic_cas_bool(R1, R2, R0, 0, R3);
+ b(retry, ne);
+
+ msr(CPSR_fsxc, Rtemp);
+
+ pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
+#endif // AARCH64
+
+ b(done);
+ bind_literal(counter_addr_literal);
+
+ bind(done);
+ }
+}
+
+#endif // !PRODUCT
+
+
+// Building block for CAS cases of biased locking: makes CAS and records statistics.
+// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
+void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
+ Register tmp, Label& slow_case, int* counter_addr) {
+
+ cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
+#ifdef ASSERT
+ breakpoint(ne); // Fallthrough only on success
+#endif
+#ifndef PRODUCT
+ if (counter_addr != NULL) {
+ cond_atomic_inc32(al, counter_addr);
+ }
+#endif // !PRODUCT
+}
+
+int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Register tmp2,
+ Label& done, Label& slow_case,
+ BiasedLockingCounters* counters) {
+ // obj_reg must be preserved (at least) if the bias locking fails
+ // tmp_reg is a temporary register
+ // swap_reg was used as a temporary but contained a value
+ // that was used afterwards in some call pathes. Callers
+ // have been fixed so that swap_reg no longer needs to be
+ // saved.
+ // Rtemp in no longer scratched
+
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
+ guarantee(swap_reg!=tmp_reg, "invariant");
+ assert(tmp_reg != noreg, "must supply tmp_reg");
+
+#ifndef PRODUCT
+ if (PrintBiasedLockingStatistics && (counters == NULL)) {
+ counters = BiasedLocking::counters();
+ }
+#endif
+
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+ Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
+
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits
+ // First check to see whether biasing is even enabled for this object
+ Label cas_label;
+
+ // The null check applies to the mark loading, if we need to load it.
+ // If the mark has already been loaded in swap_reg then it has already
+ // been performed and the offset is irrelevant.
+ int null_check_offset = offset();
+ if (!swap_reg_contains_mark) {
+ ldr(swap_reg, mark_addr);
+ }
+
+ // On MP platform loads could return 'stale' values in some cases.
+ // That is acceptable since either CAS or slow case path is taken in the worst case.
+
+ andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ cmp(tmp_reg, markOopDesc::biased_lock_pattern);
+
+ b(cas_label, ne);
+
+ // The bias pattern is present in the object's header. Need to check
+ // whether the bias owner and the epoch are both still current.
+ load_klass(tmp_reg, obj_reg);
+ ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+ orr(tmp_reg, tmp_reg, Rthread);
+ eor(tmp_reg, tmp_reg, swap_reg);
+
+#ifdef AARCH64
+ ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
+#else
+ bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
+#endif // AARCH64
+
+#ifndef PRODUCT
+ if (counters != NULL) {
+ cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
+ }
+#endif // !PRODUCT
+
+ b(done, eq);
+
+ Label try_revoke_bias;
+ Label try_rebias;
+
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ b(try_revoke_bias, ne);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
+ b(try_rebias, ne);
+
+ // tmp_reg has the age, epoch and pattern bits cleared
+ // The remaining (owner) bits are (Thread ^ current_owner)
+
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+
+ // Note that we know the owner is not ourself. Hence, success can
+ // only happen when the owner bits is 0
+
+#ifdef AARCH64
+ // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
+ // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(swap_reg, swap_reg, tmp2);
+#else
+ // until the assembler can be made smarter, we need to make some assumptions about the values
+ // so we can optimize this:
+ assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
+
+ mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
+ mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
+#endif // AARCH64
+
+ orr(tmp_reg, swap_reg, Rthread); // new mark
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
+ (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
+
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+
+ b(done);
+
+ bind(try_rebias);
+
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+
+ // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
+
+ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
+
+ // owner bits 'random'. Set them to Rthread.
+#ifdef AARCH64
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(tmp_reg, tmp_reg, tmp2);
+#else
+ mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
+ mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
+#endif // AARCH64
+
+ orr(tmp_reg, tmp_reg, Rthread); // new mark
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
+ (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
+
+ // If the biasing toward our thread failed, then another thread
+ // succeeded in biasing it toward itself and we need to revoke that
+ // bias. The revocation will occur in the runtime in the slow case.
+
+ b(done);
+
+ bind(try_revoke_bias);
+
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+
+ // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
+
+ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
+
+ // owner bits 'random'. Clear them
+#ifdef AARCH64
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(tmp_reg, tmp_reg, tmp2);
+#else
+ mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
+ mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
+#endif // AARCH64
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
+ (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
+
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+
+ bind(cas_label);
+
+ return null_check_offset;
+}
+
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+
+ // Check for biased locking unlock case, which is a no-op
+ // Note: we do not have to check the thread ID for two reasons.
+ // First, the interpreter checks for IllegalMonitorStateException at
+ // a higher level. Second, if the bias was revoked while we held the
+ // lock, the object could not be rebiased toward another thread, so
+ // the bias bit would be clear.
+ ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+
+ andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ cmp(tmp_reg, markOopDesc::biased_lock_pattern);
+ b(done, eq);
+}
+
+#ifdef AARCH64
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
+ switch (size_in_bytes) {
+ case 8: ldr(dst, src); break;
+ case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
+ case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
+ case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
+ switch (size_in_bytes) {
+ case 8: str(src, dst); break;
+ case 4: str_32(src, dst); break;
+ case 2: strh(src, dst); break;
+ case 1: strb(src, dst); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+#else
+
+void MacroAssembler::load_sized_value(Register dst, Address src,
+ size_t size_in_bytes, bool is_signed, AsmCondition cond) {
+ switch (size_in_bytes) {
+ case 4: ldr(dst, src, cond); break;
+ case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
+ case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
+ switch (size_in_bytes) {
+ case 4: str(src, dst, cond); break;
+ case 2: strh(src, dst, cond); break;
+ case 1: strb(src, dst, cond); break;
+ default: ShouldNotReachHere();
+ }
+}
+#endif // AARCH64
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <Rinterf, Rindex>.
+// The receiver klass is in Rklass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register Rklass,
+ Register Rinterf,
+ Register Rindex,
+ Register method_result,
+ Register temp_reg1,
+ Register temp_reg2,
+ Label& L_no_such_interface) {
+
+ assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex);
+
+ Register Ritable = temp_reg1;
+
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+ const int base = in_bytes(Klass::vtable_start_offset());
+ const int scale = exact_log2(vtableEntry::size_in_bytes());
+ ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
+ add(Ritable, Rklass, base);
+ add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale));
+
+ Label entry, search;
+
+ b(entry);
+
+ bind(search);
+ add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize);
+
+ bind(entry);
+
+ // Check that the entry is non-null. A null entry means that the receiver
+ // class doesn't implement the interface, and wasn't the same as the
+ // receiver class checked when the interface was resolved.
+
+ ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes()));
+ cbz(temp_reg2, L_no_such_interface);
+
+ cmp(Rinterf, temp_reg2);
+ b(search, ne);
+
+ ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes()));
+ add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass*
+ assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
+ assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
+
+ ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex));
+}
+
+#ifdef COMPILER2
+// TODO: 8 bytes at a time? pre-fetch?
+// Compare char[] arrays aligned to 4 bytes.
+void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+ Register limit, Register result,
+ Register chr1, Register chr2, Label& Ldone) {
+ Label Lvector, Lloop;
+
+ // Note: limit contains number of bytes (2*char_elements) != 0.
+ tst(limit, 0x2); // trailing character ?
+ b(Lvector, eq);
+
+ // compare the trailing char
+ sub(limit, limit, sizeof(jchar));
+ ldrh(chr1, Address(ary1, limit));
+ ldrh(chr2, Address(ary2, limit));
+ cmp(chr1, chr2);
+ mov(result, 0, ne); // not equal
+ b(Ldone, ne);
+
+ // only one char ?
+ tst(limit, limit);
+ mov(result, 1, eq);
+ b(Ldone, eq);
+
+ // word by word compare, dont't need alignment check
+ bind(Lvector);
+
+ // Shift ary1 and ary2 to the end of the arrays, negate limit
+ add(ary1, limit, ary1);
+ add(ary2, limit, ary2);
+ neg(limit, limit);
+
+ bind(Lloop);
+ ldr_u32(chr1, Address(ary1, limit));
+ ldr_u32(chr2, Address(ary2, limit));
+ cmp_32(chr1, chr2);
+ mov(result, 0, ne); // not equal
+ b(Ldone, ne);
+ adds(limit, limit, 2*sizeof(jchar));
+ b(Lloop, ne);
+
+ // Caller should set it:
+ // mov(result_reg, 1); //equal
+}
+#endif
+
+void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
+ mov_slow(tmpreg1, counter_addr);
+ ldr_s32(tmpreg2, tmpreg1);
+ add_32(tmpreg2, tmpreg2, 1);
+ str_32(tmpreg2, tmpreg1);
+}
+
+void MacroAssembler::floating_cmp(Register dst) {
+#ifdef AARCH64
+ NOT_TESTED();
+ cset(dst, gt); // 1 if '>', else 0
+ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+#else
+ vmrs(dst, FPSCR);
+ orr(dst, dst, 0x08000000);
+ eor(dst, dst, AsmOperand(dst, lsl, 3));
+ mov(dst, AsmOperand(dst, asr, 30));
+#endif
+}
+
+void MacroAssembler::restore_default_fp_mode() {
+#ifdef AARCH64
+ msr(SysReg_FPCR, ZR);
+#else
+#ifndef __SOFTFP__
+ // Round to Near mode, IEEE compatible, masked exceptions
+ mov(Rtemp, 0);
+ vmsr(FPSCR, Rtemp);
+#endif // !__SOFTFP__
+#endif // AARCH64
+}
+
+#ifndef AARCH64
+// 24-bit word range == 26-bit byte range
+bool check26(int offset) {
+ // this could be simplified, but it mimics encoding and decoding
+ // an actual branch insrtuction
+ int off1 = offset << 6 >> 8;
+ int encoded = off1 & ((1<<24)-1);
+ int decoded = encoded << 8 >> 6;
+ return offset == decoded;
+}
+#endif // !AARCH64
+
+// Perform some slight adjustments so the default 32MB code cache
+// is fully reachable.
+static inline address first_cache_address() {
+ return CodeCache::low_bound() + sizeof(HeapBlock::Header);
+}
+static inline address last_cache_address() {
+ return CodeCache::high_bound() - Assembler::InstructionSize;
+}
+
+#ifdef AARCH64
+// Can we reach target using ADRP?
+bool MacroAssembler::page_reachable_from_cache(address target) {
+ intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
+ intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
+ intptr_t addr = (intptr_t)target & ~0xfff;
+
+ intptr_t loffset = addr - cl;
+ intptr_t hoffset = addr - ch;
+ return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
+}
+#endif
+
+// Can we reach target using unconditional branch or call from anywhere
+// in the code cache (because code can be relocated)?
+bool MacroAssembler::_reachable_from_cache(address target) {
+#ifdef __thumb__
+ if ((1 & (intptr_t)target) != 0) {
+ // Return false to avoid 'b' if we need switching to THUMB mode.
+ return false;
+ }
+#endif
+
+ address cl = first_cache_address();
+ address ch = last_cache_address();
+
+ if (ForceUnreachable) {
+ // Only addresses from CodeCache can be treated as reachable.
+ if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
+ return false;
+ }
+ }
+
+ intptr_t loffset = (intptr_t)target - (intptr_t)cl;
+ intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
+
+#ifdef AARCH64
+ return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
+#else
+ return check26(loffset - 8) && check26(hoffset - 8);
+#endif
+}
+
+bool MacroAssembler::reachable_from_cache(address target) {
+ assert(CodeCache::contains(pc()), "not supported");
+ return _reachable_from_cache(target);
+}
+
+// Can we reach the entire code cache from anywhere else in the code cache?
+bool MacroAssembler::_cache_fully_reachable() {
+ address cl = first_cache_address();
+ address ch = last_cache_address();
+ return _reachable_from_cache(cl) && _reachable_from_cache(ch);
+}
+
+bool MacroAssembler::cache_fully_reachable() {
+ assert(CodeCache::contains(pc()), "not supported");
+ return _cache_fully_reachable();
+}
+
+void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+ assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
+ if (reachable_from_cache(target)) {
+ relocate(rtype);
+ b(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Note: relocate is not needed for the code below,
+ // encoding targets in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ rtype = relocInfo::none;
+ }
+
+#ifdef AARCH64
+ assert (scratch != noreg, "should be specified");
+ InlinedAddress address_literal(target, rtype);
+ ldr_literal(scratch, address_literal);
+ br(scratch);
+ int off = offset();
+ bind_literal(address_literal);
+#ifdef COMPILER2
+ if (offset() - off == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+#else
+ if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
+ // Note: this version cannot be (atomically) patched
+ mov_slow(scratch, (intptr_t)target, cond);
+ bx(scratch, cond);
+ } else {
+ Label skip;
+ InlinedAddress address_literal(target);
+ if (cond != al) {
+ b(skip, inverse(cond));
+ }
+ relocate(rtype);
+ ldr_literal(PC, address_literal);
+ bind_literal(address_literal);
+ bind(skip);
+ }
+#endif // AARCH64
+}
+
+// Similar to jump except that:
+// - near calls are valid only if any destination in the cache is near
+// - no movt/movw (not atomically patchable)
+void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+ assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
+ if (cache_fully_reachable()) {
+ // Note: this assumes that all possible targets (the initial one
+ // and the addressed patched to) are all in the code cache.
+ assert(CodeCache::contains(target), "target might be too far");
+ relocate(rtype);
+ b(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Discard the relocation information if not needed for CacheCompiledCode
+ // since the next encodings are all in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ rtype = relocInfo::none;
+ }
+
+#ifdef AARCH64
+ assert (scratch != noreg, "should be specified");
+ InlinedAddress address_literal(target);
+ relocate(rtype);
+ ldr_literal(scratch, address_literal);
+ br(scratch);
+ int off = offset();
+ bind_literal(address_literal);
+#ifdef COMPILER2
+ if (offset() - off == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+#else
+ {
+ Label skip;
+ InlinedAddress address_literal(target);
+ if (cond != al) {
+ b(skip, inverse(cond));
+ }
+ relocate(rtype);
+ ldr_literal(PC, address_literal);
+ bind_literal(address_literal);
+ bind(skip);
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
+ Register scratch = LR;
+ assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
+ if (reachable_from_cache(target)) {
+ relocate(rspec);
+ bl(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Note: relocate is not needed for the code below,
+ // encoding targets in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ // This assumes the information was needed only for relocating the code.
+ rspec = RelocationHolder::none;
+ }
+
+#ifndef AARCH64
+ if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
+ // Note: this version cannot be (atomically) patched
+ mov_slow(scratch, (intptr_t)target, cond);
+ blx(scratch, cond);
+ return;
+ }
+#endif
+
+ {
+ Label ret_addr;
+#ifndef AARCH64
+ if (cond != al) {
+ b(ret_addr, inverse(cond));
+ }
+#endif
+
+
+#ifdef AARCH64
+ // TODO-AARCH64: make more optimal implementation
+ // [ Keep in sync with MacroAssembler::call_size ]
+ assert(rspec.type() == relocInfo::none, "call reloc not implemented");
+ mov_slow(scratch, target);
+ blr(scratch);
+#else
+ InlinedAddress address_literal(target);
+ relocate(rspec);
+ adr(LR, ret_addr);
+ ldr_literal(PC, address_literal);
+
+ bind_literal(address_literal);
+ bind(ret_addr);
+#endif
+ }
+}
+
+#if defined(AARCH64) && defined(COMPILER2)
+int MacroAssembler::call_size(address target, bool far, bool patchable) {
+ // FIXME: mov_slow is variable-length
+ if (!far) return 1; // bl
+ if (patchable) return 2; // ldr; blr
+ return instr_count_for_mov_slow((intptr_t)target) + 1;
+}
+#endif
+
+int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
+ assert(rspec.type() == relocInfo::static_call_type ||
+ rspec.type() == relocInfo::none ||
+ rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
+
+ // Always generate the relocation information, needed for patching
+ relocate(rspec); // used by NativeCall::is_call_before()
+ if (cache_fully_reachable()) {
+ // Note: this assumes that all possible targets (the initial one
+ // and the addresses patched to) are all in the code cache.
+ assert(CodeCache::contains(target), "target might be too far");
+ bl(target);
+ } else {
+#if defined(AARCH64) && defined(COMPILER2)
+ if (c2) {
+ // return address needs to match call_size().
+ // no need to trash Rtemp
+ int off = offset();
+ Label skip_literal;
+ InlinedAddress address_literal(target);
+ ldr_literal(LR, address_literal);
+ blr(LR);
+ int ret_addr_offset = offset();
+ assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
+ b(skip_literal);
+ int off2 = offset();
+ bind_literal(address_literal);
+ if (offset() - off2 == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+ bind(skip_literal);
+ return ret_addr_offset;
+ }
+#endif
+ Label ret_addr;
+ InlinedAddress address_literal(target);
+#ifdef AARCH64
+ ldr_literal(Rtemp, address_literal);
+ adr(LR, ret_addr);
+ br(Rtemp);
+#else
+ adr(LR, ret_addr);
+ ldr_literal(PC, address_literal);
+#endif
+ bind_literal(address_literal);
+ bind(ret_addr);
+ }
+ return offset();
+}
+
+
+void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+ ldr(tmp, Address(method, Method::const_offset()));
+ ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
+ ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
+ ldr(mirror, Address(tmp, mirror_offset));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Compressed pointers
+
+#ifdef AARCH64
+
+void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
+ if (UseCompressedClassPointers) {
+ ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
+ decode_klass_not_null(dst_klass);
+ } else {
+ ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
+ }
+}
+
+#else
+
+void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
+ ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
+}
+
+#endif // AARCH64
+
+// Blows src_klass.
+void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
+#ifdef AARCH64
+ if (UseCompressedClassPointers) {
+ assert(src_klass != dst_oop, "not enough registers");
+ encode_klass_not_null(src_klass);
+ str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+ return;
+ }
+#endif // AARCH64
+ str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+}
+
+#ifdef AARCH64
+
+void MacroAssembler::store_klass_gap(Register dst) {
+ if (UseCompressedClassPointers) {
+ str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
+ }
+}
+
+#endif // AARCH64
+
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ ldr_w(dst, src);
+ decode_heap_oop(dst);
+ return;
+ }
+#endif // AARCH64
+ ldr(dst, src);
+}
+
+// Blows src and flags.
+void MacroAssembler::store_heap_oop(Register src, Address dst) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ assert(!dst.uses(src), "not enough registers");
+ encode_heap_oop(src);
+ str_w(src, dst);
+ return;
+ }
+#endif // AARCH64
+ str(src, dst);
+}
+
+void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ str_w(src, dst);
+ return;
+ }
+#endif // AARCH64
+ str(src, dst);
+}
+
+
+#ifdef AARCH64
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register dst, Register src) {
+ // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
+ // Update it at modifications.
+ assert (UseCompressedOops, "must be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+ verify_oop(src);
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_oop_shift());
+ } else if (dst != src) {
+ mov(dst, src);
+ }
+ } else {
+ tst(src, src);
+ csel(dst, Rheap_base, src, eq);
+ sub(dst, dst, Rheap_base);
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_oop_shift());
+ }
+ }
+}
+
+// Same algorithm as oop.inline.hpp decode_heap_oop.
+void MacroAssembler::decode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+ assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ if (Universe::narrow_oop_base() != NULL) {
+ tst(src, src);
+ add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
+ csel(dst, dst, ZR, ne);
+ } else {
+ _lsl(dst, src, Universe::narrow_oop_shift());
+ }
+ verify_oop(dst);
+}
+
+#ifdef COMPILER2
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+// Must preserve condition codes, or C2 encodeHeapOop_not_null rule
+// must be changed.
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+ assert (UseCompressedOops, "must be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+ verify_oop(src);
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_oop_shift());
+ } else if (dst != src) {
+ mov(dst, src);
+ }
+ } else {
+ sub(dst, src, Rheap_base);
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_oop_shift());
+ }
+ }
+}
+
+// Same algorithm as oops.inline.hpp decode_heap_oop.
+// Must preserve condition codes, or C2 decodeHeapOop_not_null rule
+// must be changed.
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+ assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ if (Universe::narrow_oop_base() != NULL) {
+ add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
+ } else {
+ _lsl(dst, src, Universe::narrow_oop_shift());
+ }
+ verify_oop(dst);
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+ assert(UseCompressedClassPointers, "should only be used for compressed header");
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+
+ // Relocation with special format (see relocInfo_arm.hpp).
+ relocate(rspec);
+ narrowKlass encoded_k = Klass::encode_klass(k);
+ movz(dst, encoded_k & 0xffff, 0);
+ movk(dst, (encoded_k >> 16) & 0xffff, 16);
+}
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+ assert(UseCompressedOops, "should only be used for compressed header");
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+ relocate(rspec);
+ movz(dst, 0xffff, 0);
+ movk(dst, 0xffff, 16);
+}
+
+#endif // COMPILER2
+
+// Must preserve condition codes, or C2 encodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::encode_klass_not_null(Register r) {
+ if (Universe::narrow_klass_base() != NULL) {
+ // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
+ assert(r != Rheap_base, "Encoding a klass in Rheap_base");
+ mov_slow(Rheap_base, Universe::narrow_klass_base());
+ sub(r, r, Rheap_base);
+ }
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(r, r, Universe::narrow_klass_shift());
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ reinit_heapbase();
+ }
+}
+
+// Must preserve condition codes, or C2 encodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+ if (dst == src) {
+ encode_klass_not_null(src);
+ return;
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ mov_slow(dst, (int64_t)Universe::narrow_klass_base());
+ sub(dst, src, dst);
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_klass_shift());
+ }
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_klass_shift());
+ } else {
+ mov(dst, src);
+ }
+ }
+}
+
+// Function instr_count_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL). Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_count_for_decode_klass_not_null() {
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ if (Universe::narrow_klass_base() != NULL) {
+ return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
+ 1 + // add
+ instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// Must preserve condition codes, or C2 decodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::decode_klass_not_null(Register r) {
+ int off = offset();
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ assert(r != Rheap_base, "Decoding a klass in Rheap_base");
+ // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_base() != NULL) {
+ // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
+ mov_slow(Rheap_base, Universe::narrow_klass_base());
+ add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
+ reinit_heapbase();
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsl(r, r, Universe::narrow_klass_shift());
+ }
+ }
+ assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
+}
+
+// Must preserve condition codes, or C2 decodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+ if (src == dst) {
+ decode_klass_not_null(src);
+ return;
+ }
+
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ assert(src != Rheap_base, "Decoding a klass in Rheap_base");
+ assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_base() != NULL) {
+ mov_slow(dst, Universe::narrow_klass_base());
+ add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
+ } else {
+ _lsl(dst, src, Universe::narrow_klass_shift());
+ }
+}
+
+
+void MacroAssembler::reinit_heapbase() {
+ if (UseCompressedOops || UseCompressedClassPointers) {
+ if (Universe::heap() != NULL) {
+ mov_slow(Rheap_base, Universe::narrow_ptrs_base());
+ } else {
+ ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
+ }
+ }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+ // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
+ // Update it at modifications.
+ assert (UseCompressedOops, "should be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ if (CheckCompressedOops) {
+ Label ok;
+ str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
+ raw_push(Rtemp, ZR);
+ mrs(Rtemp, Assembler::SysReg_NZCV);
+ str(Rtemp, Address(SP, 1 * wordSize));
+ mov_slow(Rtemp, Universe::narrow_ptrs_base());
+ cmp(Rheap_base, Rtemp);
+ b(ok, eq);
+ stop(msg);
+ bind(ok);
+ ldr(Rtemp, Address(SP, 1 * wordSize));
+ msr(Assembler::SysReg_NZCV, Rtemp);
+ raw_pop(Rtemp, ZR);
+ str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
+ }
+}
+#endif // ASSERT
+
+#endif // AARCH64
+
+#ifdef COMPILER2
+void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
+{
+ assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+ Register Rmark = Rscratch2;
+
+ assert(Roop != Rscratch, "");
+ assert(Roop != Rmark, "");
+ assert(Rbox != Rscratch, "");
+ assert(Rbox != Rmark, "");
+
+ Label fast_lock, done;
+
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ Label failed;
+#ifdef AARCH64
+ biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
+#else
+ biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
+#endif
+ bind(failed);
+ }
+
+ ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
+ tst(Rmark, markOopDesc::unlocked_value);
+ b(fast_lock, ne);
+
+ // Check for recursive lock
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+#ifdef AARCH64
+ intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
+ Assembler::LogicalImmediate imm(mask, false);
+ mov(Rscratch, SP);
+ sub(Rscratch, Rmark, Rscratch);
+ ands(Rscratch, Rscratch, imm);
+ b(done, ne); // exit with failure
+ str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
+ b(done);
+
+#else
+ // -1- test low 2 bits
+ movs(Rscratch, AsmOperand(Rmark, lsl, 30));
+ // -2- test (hdr - SP) if the low two bits are 0
+ sub(Rscratch, Rmark, SP, eq);
+ movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
+ // If still 'eq' then recursive locking OK
+ str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
+ b(done);
+#endif
+
+ bind(fast_lock);
+ str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+
+ bool allow_fallthrough_on_failure = true;
+ bool one_shot = true;
+ cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+ bind(done);
+
+}
+
+void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
+{
+ assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+ Register Rmark = Rscratch2;
+
+ assert(Roop != Rscratch, "");
+ assert(Roop != Rmark, "");
+ assert(Rbox != Rscratch, "");
+ assert(Rbox != Rmark, "");
+
+ Label done;
+
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_exit(Roop, Rscratch, done);
+ }
+
+ ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+ // If hdr is NULL, we've got recursive locking and there's nothing more to do
+ cmp(Rmark, 0);
+ b(done, eq);
+
+ // Restore the object header
+ bool allow_fallthrough_on_failure = true;
+ bool one_shot = true;
+ cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+ bind(done);
+
+}
+#endif // COMPILER2
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/macroAssembler_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1390 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_MACROASSEMBLER_ARM_HPP
+#define CPU_ARM_VM_MACROASSEMBLER_ARM_HPP
+
+#include "code/relocInfo.hpp"
+#include "code/relocInfo_ext.hpp"
+
+class BiasedLockingCounters;
+
+// Introduced AddressLiteral and its subclasses to ease portability from
+// x86 and avoid relocation issues
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+ RelocationHolder _rspec;
+ // Typically we use AddressLiterals we want to use their rval
+ // However in some situations we want the lval (effect address) of the item.
+ // We provide a special factory for making those lvals.
+ bool _is_lval;
+
+ address _target;
+
+ private:
+ static relocInfo::relocType reloc_for_target(address target) {
+ // Used for ExternalAddress or when the type is not specified
+ // Sometimes ExternalAddress is used for values which aren't
+ // exactly addresses, like the card table base.
+ // external_word_type can't be used for values in the first page
+ // so just skip the reloc in that case.
+ return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+ }
+
+ void set_rspec(relocInfo::relocType rtype);
+
+ protected:
+ // creation
+ AddressLiteral()
+ : _is_lval(false),
+ _target(NULL)
+ {}
+
+ public:
+
+ AddressLiteral(address target, relocInfo::relocType rtype) {
+ _is_lval = false;
+ _target = target;
+ set_rspec(rtype);
+ }
+
+ AddressLiteral(address target, RelocationHolder const& rspec)
+ : _rspec(rspec),
+ _is_lval(false),
+ _target(target)
+ {}
+
+ AddressLiteral(address target) {
+ _is_lval = false;
+ _target = target;
+ set_rspec(reloc_for_target(target));
+ }
+
+ AddressLiteral addr() {
+ AddressLiteral ret = *this;
+ ret._is_lval = true;
+ return ret;
+ }
+
+ private:
+
+ address target() { return _target; }
+ bool is_lval() { return _is_lval; }
+
+ relocInfo::relocType reloc() const { return _rspec.type(); }
+ const RelocationHolder& rspec() const { return _rspec; }
+
+ friend class Assembler;
+ friend class MacroAssembler;
+ friend class Address;
+ friend class LIR_Assembler;
+ friend class InlinedAddress;
+};
+
+class ExternalAddress: public AddressLiteral {
+
+ public:
+
+ ExternalAddress(address target) : AddressLiteral(target) {}
+
+};
+
+class InternalAddress: public AddressLiteral {
+
+ public:
+
+ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
+
+};
+
+// Inlined constants, for use with ldr_literal / bind_literal
+// Note: InlinedInteger not supported (use move_slow(Register,int[,cond]))
+class InlinedLiteral: StackObj {
+ public:
+ Label label; // need to be public for direct access with &
+ InlinedLiteral() {
+ }
+};
+
+class InlinedMetadata: public InlinedLiteral {
+ private:
+ Metadata *_data;
+
+ public:
+ InlinedMetadata(Metadata *data): InlinedLiteral() {
+ _data = data;
+ }
+ Metadata *data() { return _data; }
+};
+
+// Currently unused
+// class InlinedOop: public InlinedLiteral {
+// private:
+// jobject _jobject;
+//
+// public:
+// InlinedOop(jobject target): InlinedLiteral() {
+// _jobject = target;
+// }
+// jobject jobject() { return _jobject; }
+// };
+
+class InlinedAddress: public InlinedLiteral {
+ private:
+ AddressLiteral _literal;
+
+ public:
+
+ InlinedAddress(jobject object): InlinedLiteral(), _literal((address)object, relocInfo::oop_type) {
+ ShouldNotReachHere(); // use mov_oop (or implement InlinedOop)
+ }
+
+ InlinedAddress(Metadata *data): InlinedLiteral(), _literal((address)data, relocInfo::metadata_type) {
+ ShouldNotReachHere(); // use InlinedMetadata or mov_metadata
+ }
+
+ InlinedAddress(address target, const RelocationHolder &rspec): InlinedLiteral(), _literal(target, rspec) {
+ assert(rspec.type() != relocInfo::oop_type, "Do not use InlinedAddress for oops");
+ assert(rspec.type() != relocInfo::metadata_type, "Do not use InlinedAddress for metadatas");
+ }
+
+ InlinedAddress(address target, relocInfo::relocType rtype): InlinedLiteral(), _literal(target, rtype) {
+ assert(rtype != relocInfo::oop_type, "Do not use InlinedAddress for oops");
+ assert(rtype != relocInfo::metadata_type, "Do not use InlinedAddress for metadatas");
+ }
+
+ // Note: default is relocInfo::none for InlinedAddress
+ InlinedAddress(address target): InlinedLiteral(), _literal(target, relocInfo::none) {
+ }
+
+ address target() { return _literal.target(); }
+
+ const RelocationHolder& rspec() const { return _literal.rspec(); }
+};
+
+class InlinedString: public InlinedLiteral {
+ private:
+ const char* _msg;
+
+ public:
+ InlinedString(const char* msg): InlinedLiteral() {
+ _msg = msg;
+ }
+ const char* msg() { return _msg; }
+};
+
+class MacroAssembler: public Assembler {
+protected:
+
+ // Support for VM calls
+ //
+
+ // This is the base routine called by the different versions of call_VM_leaf.
+ void call_VM_leaf_helper(address entry_point, int number_of_arguments);
+
+ // This is the base routine called by the different versions of call_VM. The interpreter
+ // may customize this version by overriding it for its purposes (e.g., to save/restore
+ // additional registers when doing a VM call).
+ virtual void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
+
+ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+ // The implementation is only non-empty for the InterpreterMacroAssembler,
+ // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+ virtual void check_and_handle_popframe() {}
+ virtual void check_and_handle_earlyret() {}
+
+public:
+
+ MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+ // By default, we do not need relocation information for non
+ // patchable absolute addresses. However, when needed by some
+ // extensions, ignore_non_patchable_relocations can be modified,
+ // returning false to preserve all relocation information.
+ inline bool ignore_non_patchable_relocations() { return true; }
+
+ // Initially added to the Assembler interface as a pure virtual:
+ // RegisterConstant delayed_value(..)
+ // for:
+ // 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
+ // this was subsequently modified to its present name and return type
+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
+
+#ifdef AARCH64
+# define NOT_IMPLEMENTED() unimplemented("NYI at " __FILE__ ":" XSTR(__LINE__))
+# define NOT_TESTED() warn("Not tested at " __FILE__ ":" XSTR(__LINE__))
+#endif
+
+ void align(int modulus);
+
+ // Support for VM calls
+ //
+ // It is imperative that all calls into the VM are handled via the call_VM methods.
+ // They make sure that the stack linkage is setup correctly. call_VM's correspond
+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+ void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+ // The following methods are required by templateTable.cpp,
+ // but not used on ARM.
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+ // Note: The super_call_VM calls are not used on ARM
+
+ // Raw call, without saving/restoring registers, exception handling, etc.
+ // Mainly used from various stubs.
+ // Note: if 'save_R9_if_scratched' is true, call_VM may on some
+ // platforms save values on the stack. Set it to false (and handle
+ // R9 in the callers) if the top of the stack must not be modified
+ // by call_VM.
+ void call_VM(address entry_point, bool save_R9_if_scratched);
+
+ void call_VM_leaf(address entry_point);
+ void call_VM_leaf(address entry_point, Register arg_1);
+ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
+
+ void get_vm_result(Register oop_result, Register tmp);
+ void get_vm_result_2(Register metadata_result, Register tmp);
+
+ // Always sets/resets sp, which default to SP if (last_sp == noreg)
+ // Optionally sets/resets fp (use noreg to avoid setting it)
+ // Always sets/resets pc on AArch64; optionally sets/resets pc on 32-bit ARM depending on save_last_java_pc flag
+ // Note: when saving PC, set_last_Java_frame returns PC's offset in the code section
+ // (for oop_maps offset computation)
+ int set_last_Java_frame(Register last_sp, Register last_fp, bool save_last_java_pc, Register tmp);
+ void reset_last_Java_frame(Register tmp);
+ // status set in set_last_Java_frame for reset_last_Java_frame
+ bool _fp_saved;
+ bool _pc_saved;
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) __ stop(error)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) __ block_comment(error); __ stop(error)
+#endif
+
+ void lookup_virtual_method(Register recv_klass,
+ Register vtable_index,
+ Register method_result);
+
+ // Test sub_klass against super_klass, with fast and slow paths.
+
+ // The fast path produces a tri-state answer: yes / no / maybe-slow.
+ // One of the three labels can be NULL, meaning take the fall-through.
+ // No registers are killed, except temp_regs.
+ void check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path);
+
+ // The rest of the type check; must be wired to a corresponding fast path.
+ // It does not repeat the fast path logic, so don't use it standalone.
+ // temp_reg3 can be noreg, if no temps are available.
+ // Updates the sub's secondary super cache as necessary.
+ // If set_cond_codes:
+ // - condition codes will be Z on success, NZ on failure.
+ // - temp_reg will be 0 on success, non-0 on failure
+ void check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Register temp_reg3, // auto assigned if noreg
+ Label* L_success,
+ Label* L_failure,
+ bool set_cond_codes = false);
+
+ // Simplified, combined version, good for typical uses.
+ // temp_reg3 can be noreg, if no temps are available. It is used only on slow path.
+ // Falls through on failure.
+ void check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Register temp_reg3, // auto assigned on slow path if noreg
+ Label& L_success);
+
+ // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
+ Address receiver_argument_address(Register params_base, Register params_count, Register tmp);
+
+ void _verify_oop(Register reg, const char* s, const char* file, int line);
+ void _verify_oop_addr(Address addr, const char * s, const char* file, int line);
+
+ // TODO: verify method and klass metadata (compare against vptr?)
+ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
+
+#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__)
+#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop ", __FILE__, __LINE__)
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+ void null_check(Register reg, Register tmp, int offset = -1);
+ inline void null_check(Register reg) { null_check(reg, noreg, -1); } // for C1 lir_null_check
+
+ // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+ void eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case);
+ void tlab_allocate(Register obj, Register obj_end, Register tmp1,
+ RegisterOrConstant size_expression, Label& slow_case);
+
+ void tlab_refill(Register top, Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+ Label& try_eden, Label& slow_case);
+ void zero_memory(Register start, Register end, Register tmp);
+
+ void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp);
+
+ static bool needs_explicit_null_check(intptr_t offset);
+
+ void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp);
+ void arm_stack_overflow_check(Register Rsize, Register tmp);
+
+ void bang_stack_with_offset(int offset) {
+ ShouldNotReachHere();
+ }
+
+ // Biased locking support
+ // lock_reg and obj_reg must be loaded up with the appropriate values.
+ // swap_reg must be supplied.
+ // tmp_reg must be supplied.
+ // Optional slow case is for implementations (interpreter and C1) which branch to
+ // slow case directly. If slow_case is NULL, then leaves condition
+ // codes set (for C2's Fast_Lock node) and jumps to done label.
+ // Falls through for the fast locking attempt.
+ // Returns offset of first potentially-faulting instruction for null
+ // check info (currently consumed only by C1). If
+ // swap_reg_contains_mark is true then returns -1 as it is assumed
+ // the calling code has already passed any potential faults.
+ // Notes:
+ // - swap_reg and tmp_reg are scratched
+ // - Rtemp was (implicitly) scratched and can now be specified as the tmp2
+ int biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Register tmp2,
+ Label& done, Label& slow_case,
+ BiasedLockingCounters* counters = NULL);
+ void biased_locking_exit(Register obj_reg, Register temp_reg, Label& done);
+
+ // Building block for CAS cases of biased locking: makes CAS and records statistics.
+ // Optional slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
+ void biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
+ Register tmp, Label& slow_case, int* counter_addr);
+
+#ifndef AARCH64
+ void nop() {
+ mov(R0, R0);
+ }
+
+ void push(Register rd, AsmCondition cond = al) {
+ assert(rd != SP, "unpredictable instruction");
+ str(rd, Address(SP, -wordSize, pre_indexed), cond);
+ }
+
+ void push(RegisterSet reg_set, AsmCondition cond = al) {
+ assert(!reg_set.contains(SP), "unpredictable instruction");
+ stmdb(SP, reg_set, writeback, cond);
+ }
+
+ void pop(Register rd, AsmCondition cond = al) {
+ assert(rd != SP, "unpredictable instruction");
+ ldr(rd, Address(SP, wordSize, post_indexed), cond);
+ }
+
+ void pop(RegisterSet reg_set, AsmCondition cond = al) {
+ assert(!reg_set.contains(SP), "unpredictable instruction");
+ ldmia(SP, reg_set, writeback, cond);
+ }
+
+ void fpushd(FloatRegister fd, AsmCondition cond = al) {
+ fstmdbd(SP, FloatRegisterSet(fd), writeback, cond);
+ }
+
+ void fpushs(FloatRegister fd, AsmCondition cond = al) {
+ fstmdbs(SP, FloatRegisterSet(fd), writeback, cond);
+ }
+
+ void fpopd(FloatRegister fd, AsmCondition cond = al) {
+ fldmiad(SP, FloatRegisterSet(fd), writeback, cond);
+ }
+
+ void fpops(FloatRegister fd, AsmCondition cond = al) {
+ fldmias(SP, FloatRegisterSet(fd), writeback, cond);
+ }
+#endif // !AARCH64
+
+ // Order access primitives
+ enum Membar_mask_bits {
+ StoreStore = 1 << 3,
+ LoadStore = 1 << 2,
+ StoreLoad = 1 << 1,
+ LoadLoad = 1 << 0
+ };
+
+#ifdef AARCH64
+ // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
+ void membar(Membar_mask_bits order_constraint, Register tmp = noreg);
+#else
+ void membar(Membar_mask_bits mask,
+ Register tmp,
+ bool preserve_flags = true,
+ Register load_tgt = noreg);
+#endif
+
+ void breakpoint(AsmCondition cond = al);
+ void stop(const char* msg);
+ // prints msg and continues
+ void warn(const char* msg);
+ void unimplemented(const char* what = "");
+ void should_not_reach_here() { stop("should not reach here"); }
+ static void debug(const char* msg, const intx* registers);
+
+ // Create a walkable frame to help tracking down who called this code.
+ // Returns the frame size in words.
+ int should_not_call_this() {
+ raw_push(FP, LR);
+ should_not_reach_here();
+ flush();
+ return 2; // frame_size_in_words (FP+LR)
+ }
+
+ int save_all_registers();
+ void restore_all_registers();
+ int save_caller_save_registers();
+ void restore_caller_save_registers();
+
+ void add_rc(Register dst, Register arg1, RegisterOrConstant arg2);
+
+ // add_slow and mov_slow are used to manipulate offsets larger than 1024,
+ // these functions are not expected to handle all possible constants,
+ // only those that can really occur during compilation
+ void add_slow(Register rd, Register rn, int c);
+ void sub_slow(Register rd, Register rn, int c);
+
+#ifdef AARCH64
+ static int mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm /* optional */);
+#endif
+
+ void mov_slow(Register rd, intptr_t c NOT_AARCH64_ARG(AsmCondition cond = al));
+ void mov_slow(Register rd, const char *string);
+ void mov_slow(Register rd, address addr);
+
+ void patchable_mov_oop(Register rd, jobject o, int oop_index) {
+ mov_oop(rd, o, oop_index AARCH64_ONLY_ARG(true));
+ }
+ void mov_oop(Register rd, jobject o, int index = 0
+ AARCH64_ONLY_ARG(bool patchable = false)
+ NOT_AARCH64_ARG(AsmCondition cond = al));
+
+
+ void patchable_mov_metadata(Register rd, Metadata* o, int index) {
+ mov_metadata(rd, o, index AARCH64_ONLY_ARG(true));
+ }
+ void mov_metadata(Register rd, Metadata* o, int index = 0 AARCH64_ONLY_ARG(bool patchable = false));
+
+ void mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond = al));
+ void mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond = al));
+
+#ifdef AARCH64
+ int mov_pc_to(Register rd) {
+ Label L;
+ adr(rd, L);
+ bind(L);
+ return offset();
+ }
+#endif
+
+ // Note: this variant of mov_address assumes the address moves with
+ // the code. Do *not* implement it with non-relocated instructions,
+ // unless PC-relative.
+#ifdef AARCH64
+ void mov_relative_address(Register rd, address addr) {
+ adr(rd, addr);
+ }
+#else
+ void mov_relative_address(Register rd, address addr, AsmCondition cond = al) {
+ int offset = addr - pc() - 8;
+ assert((offset & 3) == 0, "bad alignment");
+ if (offset >= 0) {
+ assert(AsmOperand::is_rotated_imm(offset), "addr too far");
+ add(rd, PC, offset, cond);
+ } else {
+ assert(AsmOperand::is_rotated_imm(-offset), "addr too far");
+ sub(rd, PC, -offset, cond);
+ }
+ }
+#endif // AARCH64
+
+ // Runtime address that may vary from one execution to another. The
+ // symbolic_reference describes what the address is, allowing
+ // the address to be resolved in a different execution context.
+ // Warning: do not implement as a PC relative address.
+ void mov_address(Register rd, address addr, symbolic_Relocation::symbolic_reference t) {
+ mov_address(rd, addr, RelocationHolder::none);
+ }
+
+ // rspec can be RelocationHolder::none (for ignored symbolic_Relocation).
+ // In that case, the address is absolute and the generated code need
+ // not be relocable.
+ void mov_address(Register rd, address addr, RelocationHolder const& rspec) {
+ assert(rspec.type() != relocInfo::runtime_call_type, "do not use mov_address for runtime calls");
+ assert(rspec.type() != relocInfo::static_call_type, "do not use mov_address for relocable calls");
+ if (rspec.type() == relocInfo::none) {
+ // absolute address, relocation not needed
+ mov_slow(rd, (intptr_t)addr);
+ return;
+ }
+#ifndef AARCH64
+ if (VM_Version::supports_movw()) {
+ relocate(rspec);
+ int c = (int)addr;
+ movw(rd, c & 0xffff);
+ if ((unsigned int)c >> 16) {
+ movt(rd, (unsigned int)c >> 16);
+ }
+ return;
+ }
+#endif
+ Label skip_literal;
+ InlinedAddress addr_literal(addr, rspec);
+ ldr_literal(rd, addr_literal);
+ b(skip_literal);
+ bind_literal(addr_literal);
+ // AARCH64 WARNING: because of alignment padding, extra padding
+ // may be required to get a consistent size for C2, or rules must
+ // overestimate size see MachEpilogNode::size
+ bind(skip_literal);
+ }
+
+ // Note: Do not define mov_address for a Label
+ //
+ // Load from addresses potentially within the code are now handled
+ // InlinedLiteral subclasses (to allow more flexibility on how the
+ // ldr_literal is performed).
+
+ void ldr_literal(Register rd, InlinedAddress& L) {
+ assert(L.rspec().type() != relocInfo::runtime_call_type, "avoid ldr_literal for calls");
+ assert(L.rspec().type() != relocInfo::static_call_type, "avoid ldr_literal for calls");
+ relocate(L.rspec());
+#ifdef AARCH64
+ ldr(rd, target(L.label));
+#else
+ ldr(rd, Address(PC, target(L.label) - pc() - 8));
+#endif
+ }
+
+ void ldr_literal(Register rd, InlinedString& L) {
+ const char* msg = L.msg();
+ if (code()->consts()->contains((address)msg)) {
+ // string address moves with the code
+#ifdef AARCH64
+ ldr(rd, (address)msg);
+#else
+ ldr(rd, Address(PC, ((address)msg) - pc() - 8));
+#endif
+ return;
+ }
+ // Warning: use external strings with care. They are not relocated
+ // if the code moves. If needed, use code_string to move them
+ // to the consts section.
+#ifdef AARCH64
+ ldr(rd, target(L.label));
+#else
+ ldr(rd, Address(PC, target(L.label) - pc() - 8));
+#endif
+ }
+
+ void ldr_literal(Register rd, InlinedMetadata& L) {
+ // relocation done in the bind_literal for metadatas
+#ifdef AARCH64
+ ldr(rd, target(L.label));
+#else
+ ldr(rd, Address(PC, target(L.label) - pc() - 8));
+#endif
+ }
+
+ void bind_literal(InlinedAddress& L) {
+ AARCH64_ONLY(align(wordSize));
+ bind(L.label);
+ assert(L.rspec().type() != relocInfo::metadata_type, "Must use InlinedMetadata");
+ // We currently do not use oop 'bound' literals.
+ // If the code evolves and the following assert is triggered,
+ // we need to implement InlinedOop (see InlinedMetadata).
+ assert(L.rspec().type() != relocInfo::oop_type, "Inlined oops not supported");
+ // Note: relocation is handled by relocate calls in ldr_literal
+ AbstractAssembler::emit_address((address)L.target());
+ }
+
+ void bind_literal(InlinedString& L) {
+ const char* msg = L.msg();
+ if (code()->consts()->contains((address)msg)) {
+ // The Label should not be used; avoid binding it
+ // to detect errors.
+ return;
+ }
+ AARCH64_ONLY(align(wordSize));
+ bind(L.label);
+ AbstractAssembler::emit_address((address)L.msg());
+ }
+
+ void bind_literal(InlinedMetadata& L) {
+ AARCH64_ONLY(align(wordSize));
+ bind(L.label);
+ relocate(metadata_Relocation::spec_for_immediate());
+ AbstractAssembler::emit_address((address)L.data());
+ }
+
+ void load_mirror(Register mirror, Register method, Register tmp);
+
+ // Porting layer between 32-bit ARM and AArch64
+
+#define COMMON_INSTR_1(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg_type) \
+ void common_mnemonic(arg_type arg) { \
+ AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg); \
+ }
+
+#define COMMON_INSTR_2(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \
+ void common_mnemonic(arg1_type arg1, arg2_type arg2) { \
+ AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2); \
+ }
+
+#define COMMON_INSTR_3(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \
+ void common_mnemonic(arg1_type arg1, arg2_type arg2, arg3_type arg3) { \
+ AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2, arg3); \
+ }
+
+ COMMON_INSTR_1(jump, br, bx, Register)
+ COMMON_INSTR_1(call, blr, blx, Register)
+
+ COMMON_INSTR_2(cbz_32, cbz_w, cbz, Register, Label&)
+ COMMON_INSTR_2(cbnz_32, cbnz_w, cbnz, Register, Label&)
+
+ COMMON_INSTR_2(ldr_u32, ldr_w, ldr, Register, Address)
+ COMMON_INSTR_2(ldr_s32, ldrsw, ldr, Register, Address)
+ COMMON_INSTR_2(str_32, str_w, str, Register, Address)
+
+ COMMON_INSTR_2(mvn_32, mvn_w, mvn, Register, Register)
+ COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, Register)
+ COMMON_INSTR_2(neg_32, neg_w, neg, Register, Register)
+ COMMON_INSTR_2(clz_32, clz_w, clz, Register, Register)
+ COMMON_INSTR_2(rbit_32, rbit_w, rbit, Register, Register)
+
+ COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, int)
+ COMMON_INSTR_2(cmn_32, cmn_w, cmn, Register, int)
+
+ COMMON_INSTR_3(add_32, add_w, add, Register, Register, Register)
+ COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, Register)
+ COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, Register)
+ COMMON_INSTR_3(mul_32, mul_w, mul, Register, Register, Register)
+ COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, Register)
+ COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, Register)
+ COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, Register)
+
+ COMMON_INSTR_3(add_32, add_w, add, Register, Register, AsmOperand)
+ COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, AsmOperand)
+ COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, AsmOperand)
+ COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, AsmOperand)
+ COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, AsmOperand)
+
+
+ COMMON_INSTR_3(add_32, add_w, add, Register, Register, int)
+ COMMON_INSTR_3(adds_32, adds_w, adds, Register, Register, int)
+ COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, int)
+ COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, int)
+
+ COMMON_INSTR_2(tst_32, tst_w, tst, Register, unsigned int)
+ COMMON_INSTR_2(tst_32, tst_w, tst, Register, AsmOperand)
+
+ COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, uint)
+ COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, uint)
+ COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, uint)
+
+ COMMON_INSTR_1(cmp_zero_float, fcmp0_s, fcmpzs, FloatRegister)
+ COMMON_INSTR_1(cmp_zero_double, fcmp0_d, fcmpzd, FloatRegister)
+
+ COMMON_INSTR_2(ldr_float, ldr_s, flds, FloatRegister, Address)
+ COMMON_INSTR_2(str_float, str_s, fsts, FloatRegister, Address)
+ COMMON_INSTR_2(mov_float, fmov_s, fcpys, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(neg_float, fneg_s, fnegs, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(abs_float, fabs_s, fabss, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(sqrt_float, fsqrt_s, fsqrts, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(cmp_float, fcmp_s, fcmps, FloatRegister, FloatRegister)
+
+ COMMON_INSTR_3(add_float, fadd_s, fadds, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(sub_float, fsub_s, fsubs, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(mul_float, fmul_s, fmuls, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(div_float, fdiv_s, fdivs, FloatRegister, FloatRegister, FloatRegister)
+
+ COMMON_INSTR_2(ldr_double, ldr_d, fldd, FloatRegister, Address)
+ COMMON_INSTR_2(str_double, str_d, fstd, FloatRegister, Address)
+ COMMON_INSTR_2(mov_double, fmov_d, fcpyd, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(neg_double, fneg_d, fnegd, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(cmp_double, fcmp_d, fcmpd, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(abs_double, fabs_d, fabsd, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(sqrt_double, fsqrt_d, fsqrtd, FloatRegister, FloatRegister)
+
+ COMMON_INSTR_3(add_double, fadd_d, faddd, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(sub_double, fsub_d, fsubd, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(mul_double, fmul_d, fmuld, FloatRegister, FloatRegister, FloatRegister)
+ COMMON_INSTR_3(div_double, fdiv_d, fdivd, FloatRegister, FloatRegister, FloatRegister)
+
+ COMMON_INSTR_2(convert_f2d, fcvt_ds, fcvtds, FloatRegister, FloatRegister)
+ COMMON_INSTR_2(convert_d2f, fcvt_sd, fcvtsd, FloatRegister, FloatRegister)
+
+ COMMON_INSTR_2(mov_fpr2gpr_float, fmov_ws, fmrs, Register, FloatRegister)
+
+#undef COMMON_INSTR_1
+#undef COMMON_INSTR_2
+#undef COMMON_INSTR_3
+
+
+#ifdef AARCH64
+
+ void mov(Register dst, Register src, AsmCondition cond) {
+ if (cond == al) {
+ mov(dst, src);
+ } else {
+ csel(dst, src, dst, cond);
+ }
+ }
+
+ // Propagate other overloaded "mov" methods from Assembler.
+ void mov(Register dst, Register src) { Assembler::mov(dst, src); }
+ void mov(Register rd, int imm) { Assembler::mov(rd, imm); }
+
+ void mov(Register dst, int imm, AsmCondition cond) {
+ assert(imm == 0 || imm == 1, "");
+ if (imm == 0) {
+ mov(dst, ZR, cond);
+ } else if (imm == 1) {
+ csinc(dst, dst, ZR, inverse(cond));
+ } else if (imm == -1) {
+ csinv(dst, dst, ZR, inverse(cond));
+ } else {
+ fatal("illegal mov(R%d,%d,cond)", dst->encoding(), imm);
+ }
+ }
+
+ void movs(Register dst, Register src) { adds(dst, src, 0); }
+
+#else // AARCH64
+
+ void tbz(Register rt, int bit, Label& L) {
+ assert(0 <= bit && bit < BitsPerWord, "bit number is out of range");
+ tst(rt, 1 << bit);
+ b(L, eq);
+ }
+
+ void tbnz(Register rt, int bit, Label& L) {
+ assert(0 <= bit && bit < BitsPerWord, "bit number is out of range");
+ tst(rt, 1 << bit);
+ b(L, ne);
+ }
+
+ void cbz(Register rt, Label& L) {
+ cmp(rt, 0);
+ b(L, eq);
+ }
+
+ void cbz(Register rt, address target) {
+ cmp(rt, 0);
+ b(target, eq);
+ }
+
+ void cbnz(Register rt, Label& L) {
+ cmp(rt, 0);
+ b(L, ne);
+ }
+
+ void ret(Register dst = LR) {
+ bx(dst);
+ }
+
+#endif // AARCH64
+
+ Register zero_register(Register tmp) {
+#ifdef AARCH64
+ return ZR;
+#else
+ mov(tmp, 0);
+ return tmp;
+#endif
+ }
+
+ void logical_shift_left(Register dst, Register src, int shift) {
+#ifdef AARCH64
+ _lsl(dst, src, shift);
+#else
+ mov(dst, AsmOperand(src, lsl, shift));
+#endif
+ }
+
+ void logical_shift_left_32(Register dst, Register src, int shift) {
+#ifdef AARCH64
+ _lsl_w(dst, src, shift);
+#else
+ mov(dst, AsmOperand(src, lsl, shift));
+#endif
+ }
+
+ void logical_shift_right(Register dst, Register src, int shift) {
+#ifdef AARCH64
+ _lsr(dst, src, shift);
+#else
+ mov(dst, AsmOperand(src, lsr, shift));
+#endif
+ }
+
+ void arith_shift_right(Register dst, Register src, int shift) {
+#ifdef AARCH64
+ _asr(dst, src, shift);
+#else
+ mov(dst, AsmOperand(src, asr, shift));
+#endif
+ }
+
+ void asr_32(Register dst, Register src, int shift) {
+#ifdef AARCH64
+ _asr_w(dst, src, shift);
+#else
+ mov(dst, AsmOperand(src, asr, shift));
+#endif
+ }
+
+ // If <cond> holds, compares r1 and r2. Otherwise, flags are set so that <cond> does not hold.
+ void cond_cmp(Register r1, Register r2, AsmCondition cond) {
+#ifdef AARCH64
+ ccmp(r1, r2, flags_for_condition(inverse(cond)), cond);
+#else
+ cmp(r1, r2, cond);
+#endif
+ }
+
+ // If <cond> holds, compares r and imm. Otherwise, flags are set so that <cond> does not hold.
+ void cond_cmp(Register r, int imm, AsmCondition cond) {
+#ifdef AARCH64
+ ccmp(r, imm, flags_for_condition(inverse(cond)), cond);
+#else
+ cmp(r, imm, cond);
+#endif
+ }
+
+ void align_reg(Register dst, Register src, int align) {
+ assert (is_power_of_2(align), "should be");
+#ifdef AARCH64
+ andr(dst, src, ~(uintx)(align-1));
+#else
+ bic(dst, src, align-1);
+#endif
+ }
+
+ void prefetch_read(Address addr) {
+#ifdef AARCH64
+ prfm(pldl1keep, addr);
+#else
+ pld(addr);
+#endif
+ }
+
+ void raw_push(Register r1, Register r2) {
+#ifdef AARCH64
+ stp(r1, r2, Address(SP, -2*wordSize, pre_indexed));
+#else
+ assert(r1->encoding() < r2->encoding(), "should be ordered");
+ push(RegisterSet(r1) | RegisterSet(r2));
+#endif
+ }
+
+ void raw_pop(Register r1, Register r2) {
+#ifdef AARCH64
+ ldp(r1, r2, Address(SP, 2*wordSize, post_indexed));
+#else
+ assert(r1->encoding() < r2->encoding(), "should be ordered");
+ pop(RegisterSet(r1) | RegisterSet(r2));
+#endif
+ }
+
+ void raw_push(Register r1, Register r2, Register r3) {
+#ifdef AARCH64
+ raw_push(r1, r2);
+ raw_push(r3, ZR);
+#else
+ assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered");
+ push(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3));
+#endif
+ }
+
+ void raw_pop(Register r1, Register r2, Register r3) {
+#ifdef AARCH64
+ raw_pop(r3, ZR);
+ raw_pop(r1, r2);
+#else
+ assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered");
+ pop(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3));
+#endif
+ }
+
+ // Restores registers r1 and r2 previously saved by raw_push(r1, r2, ret_addr) and returns by ret_addr. Clobbers LR.
+ void raw_pop_and_ret(Register r1, Register r2) {
+#ifdef AARCH64
+ raw_pop(r1, r2, LR);
+ ret();
+#else
+ raw_pop(r1, r2, PC);
+#endif
+ }
+
+ void indirect_jump(Address addr, Register scratch) {
+#ifdef AARCH64
+ ldr(scratch, addr);
+ br(scratch);
+#else
+ ldr(PC, addr);
+#endif
+ }
+
+ void indirect_jump(InlinedAddress& literal, Register scratch) {
+#ifdef AARCH64
+ ldr_literal(scratch, literal);
+ br(scratch);
+#else
+ ldr_literal(PC, literal);
+#endif
+ }
+
+#ifndef AARCH64
+ void neg(Register dst, Register src) {
+ rsb(dst, src, 0);
+ }
+#endif
+
+ void branch_if_negative_32(Register r, Label& L) {
+ // Note about branch_if_negative_32() / branch_if_any_negative_32() implementation for AArch64:
+ // tbnz is not used instead of tst & b.mi because destination may be out of tbnz range (+-32KB)
+ // since these methods are used in LIR_Assembler::emit_arraycopy() to jump to stub entry.
+ tst_32(r, r);
+ b(L, mi);
+ }
+
+ void branch_if_any_negative_32(Register r1, Register r2, Register tmp, Label& L) {
+#ifdef AARCH64
+ orr_32(tmp, r1, r2);
+ tst_32(tmp, tmp);
+#else
+ orrs(tmp, r1, r2);
+#endif
+ b(L, mi);
+ }
+
+ void branch_if_any_negative_32(Register r1, Register r2, Register r3, Register tmp, Label& L) {
+ orr_32(tmp, r1, r2);
+#ifdef AARCH64
+ orr_32(tmp, tmp, r3);
+ tst_32(tmp, tmp);
+#else
+ orrs(tmp, tmp, r3);
+#endif
+ b(L, mi);
+ }
+
+ void add_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) {
+#ifdef AARCH64
+ add(dst, r1, r2, ex_sxtw, shift);
+#else
+ add(dst, r1, AsmOperand(r2, lsl, shift));
+#endif
+ }
+
+ void sub_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) {
+#ifdef AARCH64
+ sub(dst, r1, r2, ex_sxtw, shift);
+#else
+ sub(dst, r1, AsmOperand(r2, lsl, shift));
+#endif
+ }
+
+
+ // klass oop manipulations if compressed
+
+#ifdef AARCH64
+ void load_klass(Register dst_klass, Register src_oop);
+#else
+ void load_klass(Register dst_klass, Register src_oop, AsmCondition cond = al);
+#endif // AARCH64
+
+ void store_klass(Register src_klass, Register dst_oop);
+
+#ifdef AARCH64
+ void store_klass_gap(Register dst);
+#endif // AARCH64
+
+ // oop manipulations
+
+ void load_heap_oop(Register dst, Address src);
+ void store_heap_oop(Register src, Address dst);
+ void store_heap_oop(Address dst, Register src) {
+ store_heap_oop(src, dst);
+ }
+ void store_heap_oop_null(Register src, Address dst);
+
+#ifdef AARCH64
+ void encode_heap_oop(Register dst, Register src);
+ void encode_heap_oop(Register r) {
+ encode_heap_oop(r, r);
+ }
+ void decode_heap_oop(Register dst, Register src);
+ void decode_heap_oop(Register r) {
+ decode_heap_oop(r, r);
+ }
+
+#ifdef COMPILER2
+ void encode_heap_oop_not_null(Register dst, Register src);
+ void decode_heap_oop_not_null(Register dst, Register src);
+
+ void set_narrow_klass(Register dst, Klass* k);
+ void set_narrow_oop(Register dst, jobject obj);
+#endif
+
+ void encode_klass_not_null(Register r);
+ void encode_klass_not_null(Register dst, Register src);
+ void decode_klass_not_null(Register r);
+ void decode_klass_not_null(Register dst, Register src);
+
+ void reinit_heapbase();
+
+#ifdef ASSERT
+ void verify_heapbase(const char* msg);
+#endif // ASSERT
+
+ static int instr_count_for_mov_slow(intptr_t c);
+ static int instr_count_for_mov_slow(address addr);
+ static int instr_count_for_decode_klass_not_null();
+#endif // AARCH64
+
+ void ldr_global_ptr(Register reg, address address_of_global);
+ void ldr_global_s32(Register reg, address address_of_global);
+ void ldrb_global(Register reg, address address_of_global);
+
+ // address_placeholder_instruction is invalid instruction and is used
+ // as placeholder in code for address of label
+ enum { address_placeholder_instruction = 0xFFFFFFFF };
+
+ void emit_address(Label& L) {
+ assert(!L.is_bound(), "otherwise address will not be patched");
+ target(L); // creates relocation which will be patched later
+
+ assert ((offset() & (wordSize-1)) == 0, "should be aligned by word size");
+
+#ifdef AARCH64
+ emit_int32(address_placeholder_instruction);
+ emit_int32(address_placeholder_instruction);
+#else
+ AbstractAssembler::emit_address((address)address_placeholder_instruction);
+#endif
+ }
+
+ void b(address target, AsmCondition cond = al) {
+ Assembler::b(target, cond); \
+ }
+ void b(Label& L, AsmCondition cond = al) {
+ // internal jumps
+ Assembler::b(target(L), cond);
+ }
+
+ void bl(address target NOT_AARCH64_ARG(AsmCondition cond = al)) {
+ Assembler::bl(target NOT_AARCH64_ARG(cond));
+ }
+ void bl(Label& L NOT_AARCH64_ARG(AsmCondition cond = al)) {
+ // internal calls
+ Assembler::bl(target(L) NOT_AARCH64_ARG(cond));
+ }
+
+#ifndef AARCH64
+ void adr(Register dest, Label& L, AsmCondition cond = al) {
+ int delta = target(L) - pc() - 8;
+ if (delta >= 0) {
+ add(dest, PC, delta, cond);
+ } else {
+ sub(dest, PC, -delta, cond);
+ }
+ }
+#endif // !AARCH64
+
+ // Variable-length jump and calls. We now distinguish only the
+ // patchable case from the other cases. Patchable must be
+ // distinguised from relocable. Relocable means the generated code
+ // containing the jump/call may move. Patchable means that the
+ // targeted address may be changed later.
+
+ // Non patchable versions.
+ // - used only for relocInfo::runtime_call_type and relocInfo::none
+ // - may use relative or absolute format (do not use relocInfo::none
+ // if the generated code may move)
+ // - the implementation takes into account switch to THUMB mode if the
+ // destination is a THUMB address
+ // - the implementation supports far targets
+ //
+ // To reduce regression risk, scratch still defaults to noreg on
+ // arm32. This results in patchable instructions. However, if
+ // patching really matters, the call sites should be modified and
+ // use patchable_call or patchable_jump. If patching is not required
+ // and if a register can be cloberred, it should be explicitly
+ // specified to allow future optimizations.
+ void jump(address target,
+ relocInfo::relocType rtype = relocInfo::runtime_call_type,
+ Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)
+#ifndef AARCH64
+ , AsmCondition cond = al
+#endif
+ );
+
+ void call(address target,
+ RelocationHolder rspec
+ NOT_AARCH64_ARG(AsmCondition cond = al));
+
+ void call(address target,
+ relocInfo::relocType rtype = relocInfo::runtime_call_type
+ NOT_AARCH64_ARG(AsmCondition cond = al)) {
+ call(target, Relocation::spec_simple(rtype) NOT_AARCH64_ARG(cond));
+ }
+
+ void jump(AddressLiteral dest) {
+ jump(dest.target(), dest.reloc());
+ }
+#ifndef AARCH64
+ void jump(address dest, relocInfo::relocType rtype, AsmCondition cond) {
+ jump(dest, rtype, Rtemp, cond);
+ }
+#endif
+
+ void call(AddressLiteral dest) {
+ call(dest.target(), dest.reloc());
+ }
+
+ // Patchable version:
+ // - set_destination can be used to atomically change the target
+ //
+ // The targets for patchable_jump and patchable_call must be in the
+ // code cache.
+ // [ including possible extensions of the code cache, like AOT code ]
+ //
+ // To reduce regression risk, scratch still defaults to noreg on
+ // arm32. If a register can be cloberred, it should be explicitly
+ // specified to allow future optimizations.
+ void patchable_jump(address target,
+ relocInfo::relocType rtype = relocInfo::runtime_call_type,
+ Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)
+#ifndef AARCH64
+ , AsmCondition cond = al
+#endif
+ );
+
+ // patchable_call may scratch Rtemp
+ int patchable_call(address target,
+ RelocationHolder const& rspec,
+ bool c2 = false);
+
+ int patchable_call(address target,
+ relocInfo::relocType rtype,
+ bool c2 = false) {
+ return patchable_call(target, Relocation::spec_simple(rtype), c2);
+ }
+
+#if defined(AARCH64) && defined(COMPILER2)
+ static int call_size(address target, bool far, bool patchable);
+#endif
+
+#ifdef AARCH64
+ static bool page_reachable_from_cache(address target);
+#endif
+ static bool _reachable_from_cache(address target);
+ static bool _cache_fully_reachable();
+ bool cache_fully_reachable();
+ bool reachable_from_cache(address target);
+
+ void zero_extend(Register rd, Register rn, int bits);
+ void sign_extend(Register rd, Register rn, int bits);
+
+ inline void zap_high_non_significant_bits(Register r) {
+#ifdef AARCH64
+ if(ZapHighNonSignificantBits) {
+ movk(r, 0xBAAD, 48);
+ movk(r, 0xF00D, 32);
+ }
+#endif
+ }
+
+#ifndef AARCH64
+ void long_move(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmCondition cond = al);
+ void long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, Register count);
+ void long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, int count);
+
+ void atomic_cas(Register tmpreg1, Register tmpreg2, Register oldval, Register newval, Register base, int offset);
+ void atomic_cas_bool(Register oldval, Register newval, Register base, int offset, Register tmpreg);
+ void atomic_cas64(Register temp_lo, Register temp_hi, Register temp_result, Register oldval_lo, Register oldval_hi, Register newval_lo, Register newval_hi, Register base, int offset);
+#endif // !AARCH64
+
+ void cas_for_lock_acquire(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false);
+ void cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false);
+
+#ifndef PRODUCT
+ // Preserves flags and all registers.
+ // On SMP the updated value might not be visible to external observers without a sychronization barrier
+ void cond_atomic_inc32(AsmCondition cond, int* counter_addr);
+#endif // !PRODUCT
+
+ // unconditional non-atomic increment
+ void inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2);
+ void inc_counter(int* counter_addr, Register tmpreg1, Register tmpreg2) {
+ inc_counter((address) counter_addr, tmpreg1, tmpreg2);
+ }
+
+ void pd_patch_instruction(address branch, address target);
+
+ // Loading and storing values by size and signed-ness;
+ // size must not exceed wordSize (i.e. 8-byte values are not supported on 32-bit ARM);
+ // each of these calls generates exactly one load or store instruction,
+ // so src can be pre- or post-indexed address.
+#ifdef AARCH64
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
+ void store_sized_value(Register src, Address dst, size_t size_in_bytes);
+#else
+ // 32-bit ARM variants also support conditional execution
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, AsmCondition cond = al);
+ void store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond = al);
+#endif
+
+ void lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ Register itable_index,
+ Register method_result,
+ Register temp_reg1,
+ Register temp_reg2,
+ Label& L_no_such_interface);
+
+ // Compare char[] arrays aligned to 4 bytes.
+ void char_arrays_equals(Register ary1, Register ary2,
+ Register limit, Register result,
+ Register chr1, Register chr2, Label& Ldone);
+
+
+ void floating_cmp(Register dst);
+
+ // improved x86 portability (minimizing source code changes)
+
+ void ldr_literal(Register rd, AddressLiteral addr) {
+ relocate(addr.rspec());
+#ifdef AARCH64
+ ldr(rd, addr.target());
+#else
+ ldr(rd, Address(PC, addr.target() - pc() - 8));
+#endif
+ }
+
+ void lea(Register Rd, AddressLiteral addr) {
+ // Never dereferenced, as on x86 (lval status ignored)
+ mov_address(Rd, addr.target(), addr.rspec());
+ }
+
+ void restore_default_fp_mode();
+
+#ifdef COMPILER2
+#ifdef AARCH64
+ // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+ void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3);
+ void fast_unlock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3);
+#else
+ void fast_lock(Register obj, Register box, Register scratch, Register scratch2);
+ void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
+#endif
+#endif
+
+#ifdef AARCH64
+
+#define F(mnemonic) \
+ void mnemonic(Register rt, address target) { \
+ Assembler::mnemonic(rt, target); \
+ } \
+ void mnemonic(Register rt, Label& L) { \
+ Assembler::mnemonic(rt, target(L)); \
+ }
+
+ F(cbz_w);
+ F(cbnz_w);
+ F(cbz);
+ F(cbnz);
+
+#undef F
+
+#define F(mnemonic) \
+ void mnemonic(Register rt, int bit, address target) { \
+ Assembler::mnemonic(rt, bit, target); \
+ } \
+ void mnemonic(Register rt, int bit, Label& L) { \
+ Assembler::mnemonic(rt, bit, target(L)); \
+ }
+
+ F(tbz);
+ F(tbnz);
+#undef F
+
+#endif // AARCH64
+
+};
+
+
+// The purpose of this class is to build several code fragments of the same size
+// in order to allow fast table branch.
+
+class FixedSizeCodeBlock VALUE_OBJ_CLASS_SPEC {
+public:
+ FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled);
+ ~FixedSizeCodeBlock();
+
+private:
+ MacroAssembler* _masm;
+ address _start;
+ int _size_in_instrs;
+ bool _enabled;
+};
+
+
+#endif // CPU_ARM_VM_MACROASSEMBLER_ARM_HPP
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/macroAssembler_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP
+#define CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+#include "runtime/handles.inline.hpp"
+
+inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
+ int instr = *(int*)branch;
+ int new_offset = (int)(target - branch NOT_AARCH64(- 8));
+ assert((new_offset & 3) == 0, "bad alignment");
+
+#ifdef AARCH64
+ if ((instr & (0x1f << 26)) == (0b00101 << 26)) {
+ // Unconditional B or BL
+ assert (is_offset_in_range(new_offset, 26), "offset is too large");
+ *(int*)branch = (instr & ~right_n_bits(26)) | encode_offset(new_offset, 26, 0);
+ } else if ((instr & (0xff << 24)) == (0b01010100 << 24) && (instr & (1 << 4)) == 0) {
+ // Conditional B
+ assert (is_offset_in_range(new_offset, 19), "offset is too large");
+ *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
+ } else if ((instr & (0b111111 << 25)) == (0b011010 << 25)) {
+ // Compare & branch CBZ/CBNZ
+ assert (is_offset_in_range(new_offset, 19), "offset is too large");
+ *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
+ } else if ((instr & (0b111111 << 25)) == (0b011011 << 25)) {
+ // Test & branch TBZ/TBNZ
+ assert (is_offset_in_range(new_offset, 14), "offset is too large");
+ *(int*)branch = (instr & ~(right_n_bits(14) << 5)) | encode_offset(new_offset, 14, 5);
+ } else if ((instr & (0b111011 << 24)) == (0b011000 << 24)) {
+ // LDR (literal)
+ unsigned opc = ((unsigned)instr >> 30);
+ assert (opc != 0b01 || ((uintx)target & 7) == 0, "ldr target should be aligned");
+ assert (is_offset_in_range(new_offset, 19), "offset is too large");
+ *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
+ } else if (((instr & (1 << 31)) == 0) && ((instr & (0b11111 << 24)) == (0b10000 << 24))) {
+ // ADR
+ assert (is_imm_in_range(new_offset, 21, 0), "offset is too large");
+ instr = (instr & ~(right_n_bits(2) << 29)) | (new_offset & 3) << 29;
+ *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_imm(new_offset >> 2, 19, 0, 5);
+ } else if((unsigned int)instr == address_placeholder_instruction) {
+ // address
+ assert (*(unsigned int *)(branch + InstructionSize) == address_placeholder_instruction, "address placeholder occupies two instructions");
+ *(intx*)branch = (intx)target;
+ } else {
+ ::tty->print_cr("=============== instruction: 0x%x ================\n", instr);
+ Unimplemented(); // TODO-AARCH64
+ }
+#else
+ if ((instr & 0x0e000000) == 0x0a000000) {
+ // B or BL instruction
+ assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint");
+ *(int*)branch = (instr & 0xff000000) | ((unsigned int)new_offset << 6 >> 8);
+ } else if((unsigned int)instr == address_placeholder_instruction) {
+ // address
+ *(int*)branch = (int)target;
+ } else if ((instr & 0x0fff0000) == 0x028f0000 || ((instr & 0x0fff0000) == 0x024f0000)) {
+ // ADR
+ int encoding = 0x8 << 20; // ADD
+ if (new_offset < 0) {
+ encoding = 0x4 << 20; // SUB
+ new_offset = -new_offset;
+ }
+ AsmOperand o(new_offset);
+ *(int*)branch = (instr & 0xff0ff000) | encoding | o.encoding();
+ } else {
+ // LDR Rd, [PC, offset] instruction
+ assert((instr & 0x0f7f0000) == 0x051f0000, "Must be ldr_literal");
+ assert(new_offset < 4096 && new_offset > -4096, "encoding constraint");
+ if (new_offset >= 0) {
+ *(int*)branch = (instr & 0xff0ff000) | 9 << 20 | new_offset;
+ } else {
+ *(int*)branch = (instr & 0xff0ff000) | 1 << 20 | -new_offset;
+ }
+ }
+#endif // AARCH64
+}
+
+#endif // CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/metaspaceShared_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "memory/metaspaceShared.hpp"
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument. Example:
+//
+// oop obj;
+// int size = obj->klass()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no relationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#define __ masm->
+
+void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
+ void** vtable,
+ char** md_top,
+ char* md_end,
+ char** mc_top,
+ char* mc_end) {
+ intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+ *(intptr_t *)(*md_top) = vtable_bytes;
+ *md_top += sizeof(intptr_t);
+ void** dummy_vtable = (void**)*md_top;
+ *vtable = dummy_vtable;
+ *md_top += vtable_bytes;
+
+ CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+
+ for (int i = 0; i < vtbl_list_size; ++i) {
+ Label common_code;
+ for (int j = 0; j < num_virtuals; ++j) {
+ dummy_vtable[num_virtuals * i + j] = (void*) __ pc();
+ __ mov(Rtemp, j); // Rtemp contains an index of a virtual method in the table
+ __ b(common_code);
+ }
+
+ InlinedAddress vtable_address((address)&vtbl_list[i]);
+ __ bind(common_code);
+ const Register tmp2 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R4);
+ assert_different_registers(Rtemp, tmp2);
+#ifndef AARCH64
+ __ push(tmp2);
+#endif // !AARCH64
+ // Do not use ldr_global since the code must be portable across all ARM architectures
+ __ ldr_literal(tmp2, vtable_address);
+ __ ldr(tmp2, Address(tmp2)); // get correct vtable address
+ __ ldr(Rtemp, Address::indexed_ptr(tmp2, Rtemp)); // get real method pointer
+ __ str(tmp2, Address(R0)); // update vtable. R0 = "this"
+#ifndef AARCH64
+ __ pop(tmp2);
+#endif // !AARCH64
+ __ jump(Rtemp);
+ __ bind_literal(vtable_address);
+ }
+
+ __ flush();
+ *mc_top = (char*) __ pc();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/methodHandles_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,587 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// This file mirror as much as possible methodHandles_x86.cpp to ease
+// cross platform development for JSR292.
+// Last synchronization: changeset f8c9417e3571
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp1, Register temp2) {
+ if (VerifyMethodHandles) {
+ verify_klass(_masm, klass_reg, temp1, temp2, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+ "MH argument is a Class");
+ }
+ __ ldr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+ assert(x != 0, "%s should be nonzero", xname);
+ return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+ Register obj, Register temp1, Register temp2, SystemDictionary::WKID klass_id,
+ const char* error_message) {
+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
+ KlassHandle klass = SystemDictionary::well_known_klass(klass_id);
+ Label L_ok, L_bad;
+ BLOCK_COMMENT("verify_klass {");
+ __ verify_oop(obj);
+ __ cbz(obj, L_bad);
+ __ load_klass(temp1, obj);
+ __ lea(temp2, ExternalAddress((address) klass_addr));
+ __ ldr(temp2, temp2); // the cmpptr on x86 dereferences the AddressLiteral (not lea)
+ __ cmp(temp1, temp2);
+ __ b(L_ok, eq);
+ intptr_t super_check_offset = klass->super_check_offset();
+ __ ldr(temp1, Address(temp1, super_check_offset));
+ __ cmp(temp1, temp2);
+ __ b(L_ok, eq);
+
+ __ bind(L_bad);
+ __ stop(error_message);
+ __ BIND(L_ok);
+ BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+ Label L;
+ BLOCK_COMMENT("verify_ref_kind {");
+ __ ldr_u32(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+ __ logical_shift_right(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+ __ andr(temp, temp, (unsigned)java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+ __ cmp(temp, ref_kind);
+ __ b(L, eq);
+ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+ if (ref_kind == JVM_REF_invokeVirtual ||
+ ref_kind == JVM_REF_invokeSpecial)
+ // could do this for all ref_kinds, but would explode assembly code size
+ trace_method_handle(_masm, buf);
+ __ stop(buf);
+ }
+ BLOCK_COMMENT("} verify_ref_kind");
+ __ bind(L);
+}
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, bool for_compiler_entry) {
+ Label L_no_such_method;
+ __ cbz(Rmethod, L_no_such_method);
+
+ // Note: JVMTI overhead seems small enough compared to invocation
+ // cost and is not worth the complexity or code size overhead of
+ // supporting several variants of each adapter.
+ if (!for_compiler_entry && (JvmtiExport::can_post_interpreter_events())) {
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+ // compiled code in threads for which the event is enabled. Check here for
+ // interp_only_mode if these events CAN be enabled.
+ __ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
+#ifdef AARCH64
+ Label L;
+ __ cbz(Rtemp, L);
+ __ indirect_jump(Address(Rmethod, Method::interpreter_entry_offset()), Rtemp);
+ __ bind(L);
+#else
+ __ cmp(Rtemp, 0);
+ __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()), ne);
+#endif // AARCH64
+ }
+ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
+ Method::from_interpreted_offset();
+
+ __ indirect_jump(Address(Rmethod, entry_offset), Rtemp);
+
+ __ bind(L_no_such_method);
+ // throw exception
+ __ jump(StubRoutines::throw_AbstractMethodError_entry(), relocInfo::runtime_call_type, Rtemp);
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register tmp,
+ bool for_compiler_entry) {
+ BLOCK_COMMENT("jump_to_lambda_form {");
+ // This is the initial entry point of a lazy method handle.
+ // After type checking, it picks up the invoker from the LambdaForm.
+ assert_different_registers(recv, tmp, Rmethod);
+
+ // Load the invoker, as MH -> MH.form -> LF.vmentry
+ __ load_heap_oop(tmp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+ __ verify_oop(tmp);
+
+ __ load_heap_oop(tmp, Address(tmp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+ __ verify_oop(tmp);
+
+ // the following assumes that a Method* is normally compressed in the vmtarget field:
+ __ ldr(Rmethod, Address(tmp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+
+ if (VerifyMethodHandles && !for_compiler_entry) {
+ // make sure recv is already on stack
+ __ ldr(tmp, Address(Rmethod, Method::const_offset()));
+ __ load_sized_value(tmp,
+ Address(tmp, ConstMethod::size_of_parameters_offset()),
+ sizeof(u2), /*is_signed*/ false);
+ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+ Label L;
+ __ ldr(tmp, __ receiver_argument_address(Rparams, tmp, tmp));
+ __ cmp(tmp, recv);
+ __ b(L, eq);
+ __ stop("receiver not on stack");
+ __ bind(L);
+ }
+
+ jump_from_method_handle(_masm, for_compiler_entry);
+ BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+ vmIntrinsics::ID iid) {
+ const bool not_for_compiler_entry = false; // this is the interpreter entry
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+ if (iid == vmIntrinsics::_invokeGeneric ||
+ iid == vmIntrinsics::_compiledLambdaForm) {
+ // Perhaps surprisingly, the user-visible names, and linkToCallSite, are not directly used.
+ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+ // They all require an extra argument.
+ __ should_not_reach_here(); // empty stubs make SG sick
+ return NULL;
+ }
+
+ // Rmethod: Method*
+ // Rparams (SP on 32-bit ARM): pointer to parameters
+ // Rsender_sp (R4/R19): sender SP (must preserve; see prepare_to_jump_from_interpreted)
+ // R5_mh: receiver method handle (must load from sp[MethodTypeForm.vmslots])
+ // R1, R2, Rtemp: garbage temp, blown away
+
+ // Use same name as x86 to ease future merges
+ Register rdx_temp = R2_tmp;
+ Register rdx_param_size = rdx_temp; // size of parameters
+ Register rax_temp = R1_tmp;
+ Register rcx_mh = R5_mh; // MH receiver; dies quickly and is recycled
+ Register rbx_method = Rmethod; // eventual target of this invocation
+ Register rdi_temp = Rtemp;
+
+ // here's where control starts out:
+ __ align(CodeEntryAlignment);
+ address entry_point = __ pc();
+
+ if (VerifyMethodHandles) {
+ Label L;
+ BLOCK_COMMENT("verify_intrinsic_id {");
+ __ ldrh(rdi_temp, Address(rbx_method, Method::intrinsic_id_offset_in_bytes()));
+ __ sub_slow(rdi_temp, rdi_temp, (int) iid);
+ __ cbz(rdi_temp, L);
+ if (iid == vmIntrinsics::_linkToVirtual ||
+ iid == vmIntrinsics::_linkToSpecial) {
+ // could do this for all kinds, but would explode assembly code size
+ trace_method_handle(_masm, "bad Method*::intrinsic_id");
+ }
+ __ stop("bad Method*::intrinsic_id");
+ __ bind(L);
+ BLOCK_COMMENT("} verify_intrinsic_id");
+ }
+
+ // First task: Find out how big the argument list is.
+ Address rdx_first_arg_addr;
+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ __ ldr(rdx_param_size, Address(rbx_method, Method::const_offset()));
+ __ load_sized_value(rdx_param_size,
+ Address(rdx_param_size, ConstMethod::size_of_parameters_offset()),
+ sizeof(u2), /*is_signed*/ false);
+ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+ rdx_first_arg_addr = __ receiver_argument_address(Rparams, rdx_param_size, rdi_temp);
+ } else {
+ DEBUG_ONLY(rdx_param_size = noreg);
+ }
+
+ if (!is_signature_polymorphic_static(iid)) {
+ __ ldr(rcx_mh, rdx_first_arg_addr);
+ DEBUG_ONLY(rdx_param_size = noreg);
+ }
+
+ // rdx_first_arg_addr is live!
+
+ trace_method_handle_interpreter_entry(_masm, iid);
+
+ if (iid == vmIntrinsics::_invokeBasic) {
+ generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry);
+
+ } else {
+ // Adjust argument list by popping the trailing MemberName argument.
+ Register rcx_recv = noreg;
+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+ __ ldr(rcx_recv = rcx_mh, rdx_first_arg_addr);
+ DEBUG_ONLY(rdx_param_size = noreg);
+ }
+ Register rbx_member = rbx_method; // MemberName ptr; incoming method ptr is dead now
+#ifdef AARCH64
+ __ ldr(rbx_member, Address(Rparams, Interpreter::stackElementSize, post_indexed));
+#else
+ __ pop(rbx_member);
+#endif
+ generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry);
+ }
+ return entry_point;
+}
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+ vmIntrinsics::ID iid,
+ Register receiver_reg,
+ Register member_reg,
+ bool for_compiler_entry) {
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+ // Use same name as x86 to ease future merges
+ Register rbx_method = Rmethod; // eventual target of this invocation
+ // temps used in this code are not used in *either* compiled or interpreted calling sequences
+ Register temp1 = (for_compiler_entry ? saved_last_sp_register() : R1_tmp);
+ Register temp2 = AARCH64_ONLY(R9) NOT_AARCH64(R8);
+ Register temp3 = Rtemp; // R12/R16
+ Register temp4 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R5);
+ if (for_compiler_entry) {
+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+#ifdef AARCH64
+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+#else
+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
+ assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
+#endif // AARCH64
+ }
+ assert_different_registers(temp1, temp2, temp3, receiver_reg);
+ assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
+ if (!for_compiler_entry)
+ assert_different_registers(temp1, temp2, temp3, temp4, saved_last_sp_register()); // don't trash lastSP
+
+ if (iid == vmIntrinsics::_invokeBasic) {
+ // indirect through MH.form.exactInvoker.vmtarget
+ jump_to_lambda_form(_masm, receiver_reg, temp3, for_compiler_entry);
+
+ } else {
+ // The method is a member invoker used by direct method handles.
+ if (VerifyMethodHandles) {
+ // make sure the trailing argument really is a MemberName (caller responsibility)
+ verify_klass(_masm, member_reg, temp2, temp3, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
+ "MemberName required for invokeVirtual etc.");
+ }
+
+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+ Address member_vmtarget(member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+
+ Register temp1_recv_klass = temp1;
+ if (iid != vmIntrinsics::_linkToStatic) {
+ if (iid == vmIntrinsics::_linkToSpecial) {
+ // Don't actually load the klass; just null-check the receiver.
+ __ null_check(receiver_reg, temp3);
+ } else {
+ // load receiver klass itself
+ __ null_check(receiver_reg, temp3, oopDesc::klass_offset_in_bytes());
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+ BLOCK_COMMENT("check_receiver {");
+ // The receiver for the MemberName must be in receiver_reg.
+ // Check the receiver against the MemberName.clazz
+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+ // Did not load it above...
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+ // Check the receiver against the MemberName.clazz
+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+ Label L_ok;
+ Register temp2_defc = temp2;
+ __ load_heap_oop(temp2_defc, member_clazz);
+ load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
+ __ verify_klass_ptr(temp2_defc);
+#ifdef AARCH64
+ // TODO-AARCH64
+ __ b(L_ok);
+#else
+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, noreg, L_ok);
+#endif
+ // If we get here, the type check failed!
+ __ stop("receiver class disagrees with MemberName.clazz");
+ __ bind(L_ok);
+ }
+ BLOCK_COMMENT("} check_receiver");
+ }
+ if (iid == vmIntrinsics::_linkToSpecial ||
+ iid == vmIntrinsics::_linkToStatic) {
+ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass
+ }
+
+ // Live registers at this point:
+ // member_reg - MemberName that was the extra argument
+ // temp1_recv_klass - klass of stacked receiver, if needed
+
+ Label L_incompatible_class_change_error;
+ switch (iid) {
+ case vmIntrinsics::_linkToSpecial:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+ }
+ __ ldr(Rmethod, member_vmtarget);
+ break;
+
+ case vmIntrinsics::_linkToStatic:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+ }
+ __ ldr(Rmethod, member_vmtarget);
+ break;
+
+ case vmIntrinsics::_linkToVirtual:
+ {
+ // same as TemplateTable::invokevirtual,
+ // minus the CP setup and profiling:
+
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+ }
+
+ // pick out the vtable index from the MemberName, and then we can discard it:
+ Register temp2_index = temp2;
+ __ ldr(temp2_index, member_vmindex);
+
+ if (VerifyMethodHandles) {
+ Label L_index_ok;
+ __ cmp(temp2_index, 0);
+ __ b(L_index_ok, ge);
+ __ stop("no virtual index");
+ __ bind(L_index_ok);
+ }
+
+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+ // at this point. And VerifyMethodHandles has already checked clazz, if needed.
+
+ // get target Method* & entry point
+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, Rmethod);
+ break;
+ }
+
+ case vmIntrinsics::_linkToInterface:
+ {
+ // same as TemplateTable::invokeinterface
+ // (minus the CP setup and profiling, with different argument motion)
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+ }
+
+ Register temp3_intf = temp3;
+ __ load_heap_oop(temp3_intf, member_clazz);
+ load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
+ __ verify_klass_ptr(temp3_intf);
+
+ Register rbx_index = rbx_method;
+ __ ldr(rbx_index, member_vmindex);
+ if (VerifyMethodHandles) {
+ Label L;
+ __ cmp(rbx_index, 0);
+ __ b(L, ge);
+ __ stop("invalid vtable index for MH.invokeInterface");
+ __ bind(L);
+ }
+
+ // given intf, index, and recv klass, dispatch to the implementation method
+ Label L_no_such_interface;
+ __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+ // note: next two args must be the same:
+ rbx_index, rbx_method,
+ temp2, temp4,
+ L_incompatible_class_change_error);
+ break;
+ }
+
+ default:
+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
+ break;
+ }
+
+ // Live at this point:
+ // Rmethod (target method)
+ // Rsender_sp, Rparams (if interpreted)
+ // register arguments (if compiled)
+
+ // After figuring out which concrete method to call, jump into it.
+ __ verify_method_ptr(Rmethod);
+ jump_from_method_handle(_masm, for_compiler_entry);
+
+ if (iid == vmIntrinsics::_linkToInterface) {
+ __ bind(L_incompatible_class_change_error);
+ __ jump(StubRoutines::throw_IncompatibleClassChangeError_entry(), relocInfo::runtime_call_type, Rtemp);
+ }
+ }
+}
+
+
+#ifndef PRODUCT
+enum {
+ ARG_LIMIT = 255, SLOP = 4,
+ // use this parameter for checking for garbage stack movements:
+ UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
+ // the slop defends against false alarms due to fencepost errors
+};
+
+#ifdef AARCH64
+const int trace_mh_nregs = 32; // R0-R30, PC
+#else
+const int trace_mh_nregs = 15;
+const Register trace_mh_regs[trace_mh_nregs] =
+ {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
+#endif // AARCH64
+
+void trace_method_handle_stub(const char* adaptername,
+ intptr_t* saved_regs,
+ intptr_t* saved_bp,
+ oop mh) {
+ // called as a leaf from native code: do not block the JVM!
+ bool has_mh = (strstr(adaptername, "/static") == NULL &&
+ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH
+ intptr_t* entry_sp = (intptr_t*) &saved_regs[trace_mh_nregs]; // just after the saved regs
+ intptr_t* saved_sp = (intptr_t*) saved_regs[Rsender_sp->encoding()]; // save of Rsender_sp
+ intptr_t* last_sp = (intptr_t*) saved_bp[AARCH64_ONLY(frame::interpreter_frame_stack_top_offset) NOT_AARCH64(frame::interpreter_frame_last_sp_offset)];
+ intptr_t* base_sp = last_sp;
+
+ intptr_t mh_reg = (intptr_t)saved_regs[R5_mh->encoding()];
+ const char* mh_reg_name = "R5_mh";
+ if (!has_mh) mh_reg_name = "R5";
+ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=(" PTR_FORMAT "+" INTX_FORMAT ") stack_size=" INTX_FORMAT " bp=" PTR_FORMAT,
+ adaptername, mh_reg_name, mh_reg,
+ (intptr_t)entry_sp, (intptr_t)saved_sp - (intptr_t)entry_sp, (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp);
+
+ if (last_sp != saved_sp && last_sp != NULL)
+ tty->print_cr("*** last_sp=" INTPTR_FORMAT, p2i(last_sp));
+ if (Verbose) {
+ tty->print(" reg dump: ");
+ int i;
+ for (i = 0; i < trace_mh_nregs; i++) {
+ if (i > 0 && i % AARCH64_ONLY(2) NOT_AARCH64(4) == 0)
+ tty->print("\n + dump: ");
+#ifdef AARCH64
+ const char* reg_name = (i == trace_mh_nregs-1) ? "pc" : as_Register(i)->name();
+#else
+ const char* reg_name = trace_mh_regs[i]->name();
+#endif
+ tty->print(" %s: " INTPTR_FORMAT, reg_name, p2i((void *)saved_regs[i]));
+ }
+ tty->cr();
+ }
+
+ if (Verbose) {
+ // dump last frame (from JavaThread::print_frame_layout)
+
+ // Note: code is robust but the dumped informationm may not be
+ // 100% correct, particularly with respect to the dumped
+ // "unextended_sp". Getting it right for all trace_method_handle
+ // call paths is not worth the complexity/risk. The correct slot
+ // will be identified by *Rsender_sp anyway in the dump.
+ JavaThread* p = JavaThread::active();
+
+ ResourceMark rm;
+ PRESERVE_EXCEPTION_MARK;
+ FrameValues values;
+
+ intptr_t* dump_fp = (intptr_t *) saved_bp;
+ address dump_pc = (address) saved_regs[trace_mh_nregs-2]; // LR (with LR,PC last in saved_regs)
+ frame dump_frame((intptr_t *)entry_sp, dump_fp, dump_pc);
+
+ dump_frame.describe(values, 1);
+ // mark Rsender_sp if seems valid
+ if (has_mh) {
+ if ((saved_sp >= entry_sp - UNREASONABLE_STACK_MOVE) && (saved_sp < dump_fp)) {
+ values.describe(-1, saved_sp, "*Rsender_sp");
+ }
+ }
+
+ // Note: the unextended_sp may not be correct
+ tty->print_cr(" stack layout:");
+ values.print(p);
+ }
+ if (Verbose) {
+ if (has_mh && mh->is_oop()) {
+ mh->print();
+ if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+ java_lang_invoke_MethodHandle::form(mh)->print();
+ }
+ }
+ }
+}
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
+ if (!TraceMethodHandles) return;
+ BLOCK_COMMENT("trace_method_handle {");
+ // register saving
+ // must correspond to trace_mh_nregs and trace_mh_regs defined above
+ int push_size = __ save_all_registers();
+ assert(trace_mh_nregs*wordSize == push_size,"saved register count mismatch");
+
+ __ mov_slow(R0, adaptername);
+ __ mov(R1, SP); // entry_sp (after pushes)
+ __ mov(R2, FP);
+ if (R5_mh != R3) {
+ assert_different_registers(R0, R1, R2, R5_mh);
+ __ mov(R3, R5_mh);
+ }
+
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), R0, R1, R2, R3);
+
+ __ restore_all_registers();
+ BLOCK_COMMENT("} trace_method_handle");
+}
+#endif //PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/methodHandles_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+enum /* platform_dependent_constants */ {
+ adapter_code_size = 18000 NOT_PRODUCT(+ 30000)
+};
+
+// Additional helper methods for MethodHandles code generation:
+public:
+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp1, Register temp2);
+
+ static void verify_klass(MacroAssembler* _masm,
+ Register obj, Register temp1, Register temp2, SystemDictionary::WKID klass_id,
+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+ // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+ // Takes care of special dispatch from single stepping too.
+ // Rmethod should contain target methodOop.
+ static void jump_from_method_handle(MacroAssembler* _masm, bool for_compiler_entry);
+
+ static void jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register tmp,
+ bool for_compiler_entry);
+
+ static Register saved_last_sp_register() {
+ // Should be in sharedRuntime, not here.
+ return Rsender_sp;
+ }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/nativeInst_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_NATIVEINST_ARM_HPP
+#define CPU_ARM_VM_NATIVEINST_ARM_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+
+#ifdef AARCH64
+#include "nativeInst_arm_64.hpp"
+#else
+#include "nativeInst_arm_32.hpp"
+#endif
+
+
+#endif // CPU_ARM_VM_NATIVEINST_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/nativeInst_arm_32.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/codeCache.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#include "code/icBuffer.hpp"
+
+int NativeMovRegMem::offset() const {
+ switch (kind()) {
+ case instr_ldr_str:
+ return encoding() & 0xfff;
+ case instr_ldrh_strh:
+ return (encoding() & 0x0f) | ((encoding() >> 4) & 0xf0);
+ case instr_fld_fst:
+ return (encoding() & 0xff) << 2;
+ default:
+ ShouldNotReachHere();
+ return 0;
+ }
+}
+
+void NativeMovRegMem::set_offset(int x) {
+ assert(x >= 0 && x < 65536, "encoding constraint");
+ const int Rt = Rtemp->encoding();
+
+ // If offset is too large to be placed into single ldr/str instruction, we replace
+ // ldr Rd, [Rn, #offset]
+ // nop
+ // with
+ // add Rtemp, Rn, #offset_hi
+ // ldr Rd, [Rtemp, #offset_lo]
+ switch (kind()) {
+ case instr_ldr_str:
+ if (x < 4096) {
+ set_encoding((encoding() & 0xfffff000) | x);
+ } else {
+ NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address());
+ assert(next->is_nop(), "must be");
+ next->set_encoding((encoding() & 0xfff0f000) | Rt << 16 | (x & 0xfff));
+ this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 12 | 0xe2800a00);
+ }
+ break;
+ case instr_ldrh_strh:
+ if (x < 256) {
+ set_encoding((encoding() & 0xfffff0f0) | (x & 0x0f) | (x & 0xf0) << 4);
+ } else {
+ NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address());
+ assert(next->is_nop(), "must be");
+ next->set_encoding((encoding() & 0xfff0f0f0) | Rt << 16 | (x & 0x0f) | (x & 0xf0) << 4);
+ this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 8 | 0xe2800c00);
+ }
+ break;
+ case instr_fld_fst:
+ if (x < 1024) {
+ set_encoding((encoding() & 0xffffff00) | (x >> 2));
+ } else {
+ NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address());
+ assert(next->is_nop(), "must be");
+ next->set_encoding((encoding() & 0xfff0ff00) | Rt << 16 | ((x >> 2) & 0xff));
+ this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 10 | 0xe2800b00);
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+intptr_t NativeMovConstReg::data() const {
+ RawNativeInstruction* next = next_raw();
+ if (is_movw()) {
+ // Oop embedded in movw/movt instructions
+ assert(VM_Version::supports_movw(), "must be");
+ return (this->encoding() & 0x00000fff) | (this->encoding() & 0x000f0000) >> 4 |
+ (next->encoding() & 0x00000fff) << 16 | (next->encoding() & 0x000f0000) << 12;
+ } else {
+ // Oop is loaded from oops section or inlined in the code
+ int oop_offset;
+ if (is_ldr_literal()) {
+ // ldr Rd, [PC, #offset]
+ oop_offset = ldr_offset();
+ } else {
+ assert(next->is_ldr(), "must be");
+ oop_offset = (this->encoding() & 0xff) << 12 | (next->encoding() & 0xfff);
+ if (is_add_pc()) {
+ // add Rd, PC, #offset_hi
+ // ldr Rd, [Rd, #offset_lo]
+ assert(next->encoding() & (1 << 23), "sign mismatch");
+ // offset OK (both positive)
+ } else {
+ assert(is_sub_pc(), "must be");
+ // sub Rd, PC, #offset_hi
+ // ldr Rd, [Rd, -#offset_lo]
+ assert(!(next->encoding() & (1 << 23)), "sign mismatch");
+ // negative offsets
+ oop_offset = -oop_offset;
+ }
+ }
+ return *(int*)(instruction_address() + 8 + oop_offset);
+ }
+}
+
+void NativeMovConstReg::set_data(intptr_t x, address pc) {
+ // Find and replace the oop corresponding to this instruction in oops section
+ RawNativeInstruction* next = next_raw();
+ oop* oop_addr = NULL;
+ Metadata** metadata_addr = NULL;
+ CodeBlob* cb = CodeCache::find_blob(instruction_address());
+ if (cb != NULL) {
+ nmethod* nm = cb->as_nmethod_or_null();
+ if (nm != NULL) {
+ RelocIterator iter(nm, instruction_address(), next->instruction_address());
+ while (iter.next()) {
+ if (iter.type() == relocInfo::oop_type) {
+ oop_addr = iter.oop_reloc()->oop_addr();
+ *oop_addr = cast_to_oop(x);
+ break;
+ } else if (iter.type() == relocInfo::metadata_type) {
+ metadata_addr = iter.metadata_reloc()->metadata_addr();
+ *metadata_addr = (Metadata*)x;
+ break;
+ }
+ }
+ }
+ }
+
+ if (is_movw()) {
+ // data embedded in movw/movt instructions
+ assert(VM_Version::supports_movw(), "must be");
+ unsigned int lo = (unsigned int)x;
+ unsigned int hi = (unsigned int)(x >> 16);
+ this->set_encoding((this->encoding() & 0xfff0f000) | (lo & 0xf000) << 4 | (lo & 0xfff));
+ next->set_encoding((next->encoding() & 0xfff0f000) | (hi & 0xf000) << 4 | (hi & 0xfff));
+ } else if (oop_addr == NULL & metadata_addr == NULL) {
+ // A static ldr_literal (without oop or metadata relocation)
+ assert(is_ldr_literal(), "must be");
+ int offset = ldr_offset();
+ oop_addr = (oop*)(instruction_address() + 8 + offset);
+ *oop_addr = cast_to_oop(x);
+ } else {
+ // data is loaded from oop or metadata section
+ int offset;
+
+ address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr;
+
+ if(pc == 0) {
+ offset = addr - instruction_address() - 8;
+ } else {
+ offset = addr - pc - 8;
+ }
+
+ int sign = (offset >= 0) ? (1 << 23) : 0;
+ int delta = (offset >= 0) ? offset : (-offset);
+ assert(delta < 0x100000, "within accessible range");
+ if (is_ldr_literal()) {
+ // fix the ldr with the real offset to the oop/metadata table
+ assert(next->is_nop(), "must be");
+ if (delta < 4096) {
+ // ldr Rd, [PC, #offset]
+ set_encoding((encoding() & 0xff7ff000) | delta | sign);
+ assert(ldr_offset() == offset, "check encoding");
+ } else {
+ int cc = encoding() & 0xf0000000;
+ int Rd = (encoding() >> 12) & 0xf;
+ int Rt = Rd;
+ assert(Rt != 0xf, "Illegal destination register"); // or fix by using Rtemp
+ // move the ldr, fixing delta_lo and the source register
+ next->set_encoding((encoding() & 0xff70f000) | (Rt << 16) | (delta & 0xfff) | sign);
+ assert(next->is_ldr(), "must be");
+ if (offset > 0) {
+ // add Rt, PC, #delta_hi
+ // ldr Rd, [Rt, #delta_lo]
+ this->set_encoding((Rt << 12) | (delta >> 12) | 0x028f0a00 | cc);
+ assert(is_add_pc(), "must be");
+ } else {
+ // sub Rt, PC, #delta_hi
+ // ldr Rd, [Rt, -#delta_lo]
+ this->set_encoding((Rt << 12) | (delta >> 12) | 0x024f0a00 | cc);
+ assert(is_sub_pc(), "must be");
+ }
+ }
+ } else {
+ assert(is_pc_rel(), "must be");
+ assert(next->is_ldr(), "must be");
+ if (offset > 0) {
+ // add Rt, PC, #delta_hi
+ this->set_encoding((this->encoding() & 0xf00ff000) | 0x02800a00 | (delta >> 12));
+ assert(is_add_pc(), "must be");
+ } else {
+ // sub Rt, PC, #delta_hi
+ this->set_encoding((this->encoding() & 0xf00ff000) | 0x02400a00 | (delta >> 12));
+ assert(is_sub_pc(), "must be");
+ }
+ // ldr Rd, Rt, #delta_lo (or -#delta_lo)
+ next->set_encoding((next->encoding() & 0xff7ff000) | (delta & 0xfff) | sign);
+ }
+ }
+}
+
+void NativeMovConstReg::set_pc_relative_offset(address addr, address pc) {
+ int offset;
+ if (pc == 0) {
+ offset = addr - instruction_address() - 8;
+ } else {
+ offset = addr - pc - 8;
+ }
+
+ RawNativeInstruction* next = next_raw();
+
+ int sign = (offset >= 0) ? (1 << 23) : 0;
+ int delta = (offset >= 0) ? offset : (-offset);
+ assert(delta < 0x100000, "within accessible range");
+ if (is_ldr_literal()) {
+ if (delta < 4096) {
+ // ldr Rd, [PC, #offset]
+ set_encoding((encoding() & 0xff7ff000) | delta | sign);
+ assert(ldr_offset() == offset, "check encoding");
+ } else {
+ assert(next->is_nop(), "must be");
+ int cc = encoding() & 0xf0000000;
+ int Rd = (encoding() >> 12) & 0xf;
+ int Rt = Rd;
+ assert(Rt != 0xf, "Illegal destination register"); // or fix by using Rtemp
+ // move the ldr, fixing delta_lo and the source register
+ next->set_encoding((encoding() & 0xff70f000) | (Rt << 16) | (delta & 0xfff) | sign);
+ assert(next->is_ldr(), "must be");
+ if (offset > 0) {
+ // add Rt, PC, #delta_hi
+ // ldr Rd, [Rt, #delta_lo]
+ this->set_encoding((Rt << 12) | (delta >> 12) | 0x028f0a00 | cc);
+ assert(is_add_pc(), "must be");
+ } else {
+ // sub Rt, PC, #delta_hi
+ // ldr Rd, [Rt, -#delta_lo]
+ this->set_encoding((Rt << 12) | (delta >> 12) | 0x024f0a00 | cc);
+ assert(is_sub_pc(), "must be");
+ }
+ }
+ } else {
+ assert(is_pc_rel(), "must be");
+ assert(next->is_ldr(), "must be");
+ if (offset > 0) {
+ // add Rt, PC, #delta_hi
+ this->set_encoding((this->encoding() & 0xf00ff000) | 0x02800a00 | (delta >> 12));
+ assert(is_add_pc(), "must be");
+ } else {
+ // sub Rt, PC, #delta_hi
+ this->set_encoding((this->encoding() & 0xf00ff000) | 0x02400a00 | (delta >> 12));
+ assert(is_sub_pc(), "must be");
+ }
+ // ldr Rd, Rt, #delta_lo (or -#delta_lo)
+ next->set_encoding((next->encoding() & 0xff7ff000) | (delta & 0xfff) | sign);
+ }
+}
+
+void RawNativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
+}
+
+void RawNativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be");
+ int *a = (int *)verified_entry;
+ a[0] = zombie_illegal_instruction; // always illegal
+ ICache::invalidate_range((address)&a[0], sizeof a[0]);
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+ int offset = (int)(entry - code_pos - 8);
+ assert(offset < 0x2000000 && offset > -0x2000000, "encoding constraint");
+ nativeInstruction_at(code_pos)->set_encoding(0xea000000 | ((unsigned int)offset << 6 >> 8));
+}
+
+static address raw_call_for(address return_address) {
+ CodeBlob* cb = CodeCache::find_blob(return_address);
+ nmethod* nm = cb->as_nmethod_or_null();
+ if (nm == NULL) {
+ ShouldNotReachHere();
+ return NULL;
+ }
+ // Look back 4 instructions, to allow for ic_call
+ address begin = MAX2(return_address - 4*NativeInstruction::instruction_size, nm->code_begin());
+ RelocIterator iter(nm, begin, return_address);
+ while (iter.next()) {
+ Relocation* reloc = iter.reloc();
+ if (reloc->is_call()) {
+ address call = reloc->addr();
+ if (nativeInstruction_at(call)->is_call()) {
+ if (nativeCall_at(call)->return_address() == return_address) {
+ return call;
+ }
+ } else {
+ // Some "calls" are really jumps
+ assert(nativeInstruction_at(call)->is_jump(), "must be call or jump");
+ }
+ }
+ }
+ return NULL;
+}
+
+bool RawNativeCall::is_call_before(address return_address) {
+ return (raw_call_for(return_address) != NULL);
+}
+
+NativeCall* rawNativeCall_before(address return_address) {
+ address call = raw_call_for(return_address);
+ assert(call != NULL, "must be");
+ return nativeCall_at(call);
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/nativeInst_arm_32.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_NATIVEINST_ARM_32_HPP
+#define CPU_ARM_VM_NATIVEINST_ARM_32_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "code/codeCache.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+#include "runtime/thread.hpp"
+#include "register_arm.hpp"
+
+// -------------------------------------------------------------------
+
+// Some experimental projects extend the ARM back-end by implementing
+// what the front-end usually assumes is a single native instruction
+// with a sequence of instructions.
+//
+// The 'Raw' variants are the low level initial code (usually one
+// instruction wide but some of them were already composed
+// instructions). They should be used only by the back-end.
+//
+// The non-raw classes are the front-end entry point, hiding potential
+// back-end extensions or the actual instructions size.
+class NativeInstruction;
+
+class RawNativeInstruction VALUE_OBJ_CLASS_SPEC {
+ public:
+
+ enum ARM_specific {
+ instruction_size = Assembler::InstructionSize
+ };
+
+ enum InstructionKind {
+ instr_ldr_str = 0x50,
+ instr_ldrh_strh = 0x10,
+ instr_fld_fst = 0xd0
+ };
+
+ // illegal instruction used by NativeJump::patch_verified_entry
+ // permanently undefined (UDF): 0xe << 28 | 0b1111111 << 20 | 0b1111 << 4
+ static const int zombie_illegal_instruction = 0xe7f000f0;
+
+ static int decode_rotated_imm12(int encoding) {
+ int base = encoding & 0xff;
+ int right_rotation = (encoding & 0xf00) >> 7;
+ int left_rotation = 32 - right_rotation;
+ int val = (base >> right_rotation) | (base << left_rotation);
+ return val;
+ }
+
+ address addr_at(int offset) const { return (address)this + offset; }
+ address instruction_address() const { return addr_at(0); }
+ address next_raw_instruction_address() const { return addr_at(instruction_size); }
+
+ static RawNativeInstruction* at(address address) {
+ return (RawNativeInstruction*)address;
+ }
+ RawNativeInstruction* next_raw() const {
+ return at(next_raw_instruction_address());
+ }
+
+ public:
+ int encoding() const { return *(int*)this; }
+
+ void set_encoding(int value) {
+ int old = *(int*)this;
+ if (old != value) {
+ *(int*)this = value;
+ ICache::invalidate_word((address)this);
+ }
+ }
+
+ InstructionKind kind() const {
+ return (InstructionKind) ((encoding() >> 20) & 0xf2);
+ }
+
+ bool is_nop() const { return encoding() == (int)0xe1a00000; }
+ bool is_b() const { return (encoding() & 0x0f000000) == 0x0a000000; }
+ bool is_bx() const { return (encoding() & 0x0ffffff0) == 0x012fff10; }
+ bool is_bl() const { return (encoding() & 0x0f000000) == 0x0b000000; }
+ bool is_blx() const { return (encoding() & 0x0ffffff0) == 0x012fff30; }
+ bool is_fat_call() const {
+ return (is_add_lr() && next_raw()->is_jump());
+ }
+ bool is_ldr_call() const {
+ return (is_add_lr() && next_raw()->is_ldr_pc());
+ }
+ bool is_jump() const { return is_b() || is_ldr_pc(); }
+ bool is_call() const { return is_bl() || is_fat_call(); }
+ bool is_branch() const { return is_b() || is_bl(); }
+ bool is_far_branch() const { return is_movw() || is_ldr_literal(); }
+ bool is_ldr_literal() const {
+ // ldr Rx, [PC, #offset] for positive or negative offsets
+ return (encoding() & 0x0f7f0000) == 0x051f0000;
+ }
+ bool is_ldr() const {
+ // ldr Rd, [Rn, #offset] for positive or negative offsets
+ return (encoding() & 0x0f700000) == 0x05100000;
+ }
+ int ldr_offset() const {
+ assert(is_ldr(), "must be");
+ int offset = encoding() & 0xfff;
+ if (encoding() & (1 << 23)) {
+ // positive offset
+ } else {
+ // negative offset
+ offset = -offset;
+ }
+ return offset;
+ }
+ // is_ldr_pc: ldr PC, PC, #offset
+ bool is_ldr_pc() const { return (encoding() & 0x0f7ff000) == 0x051ff000; }
+ // is_setting_pc(): ldr PC, Rxx, #offset
+ bool is_setting_pc() const { return (encoding() & 0x0f70f000) == 0x0510f000; }
+ bool is_add_lr() const { return (encoding() & 0x0ffff000) == 0x028fe000; }
+ bool is_add_pc() const { return (encoding() & 0x0fff0000) == 0x028f0000; }
+ bool is_sub_pc() const { return (encoding() & 0x0fff0000) == 0x024f0000; }
+ bool is_pc_rel() const { return is_add_pc() || is_sub_pc(); }
+ bool is_movw() const { return (encoding() & 0x0ff00000) == 0x03000000; }
+ bool is_movt() const { return (encoding() & 0x0ff00000) == 0x03400000; }
+ // c2 doesn't use fixed registers for safepoint poll address
+ bool is_safepoint_poll() const { return (encoding() & 0xfff0ffff) == 0xe590c000; }
+ // For unit tests
+ static void test() {}
+
+};
+
+inline RawNativeInstruction* rawNativeInstruction_at(address address) {
+ return (RawNativeInstruction*)address;
+}
+
+// Base class exported to the front-end
+class NativeInstruction: public RawNativeInstruction {
+public:
+ static NativeInstruction* at(address address) {
+ return (NativeInstruction*)address;
+ }
+
+public:
+ // No need to consider indirections while parsing NativeInstruction
+ address next_instruction_address() const {
+ return next_raw_instruction_address();
+ }
+
+ // next() is no longer defined to avoid confusion.
+ //
+ // The front end and most classes except for those defined in nativeInst_arm
+ // or relocInfo_arm should only use next_instruction_address(), skipping
+ // over composed instruction and ignoring back-end extensions.
+ //
+ // The back-end can use next_raw() when it knows the instruction sequence
+ // and only wants to skip a single native instruction.
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+ return (NativeInstruction*)address;
+}
+
+// -------------------------------------------------------------------
+// Raw b() or bl() instructions, not used by the front-end.
+class RawNativeBranch: public RawNativeInstruction {
+ public:
+
+ address destination(int adj = 0) const {
+ return instruction_address() + (encoding() << 8 >> 6) + 8 + adj;
+ }
+
+ void set_destination(address dest) {
+ int new_offset = (int)(dest - instruction_address() - 8);
+ assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint");
+ set_encoding((encoding() & 0xff000000) | ((unsigned int)new_offset << 6 >> 8));
+ }
+};
+
+inline RawNativeBranch* rawNativeBranch_at(address address) {
+ assert(rawNativeInstruction_at(address)->is_branch(), "must be");
+ return (RawNativeBranch*)address;
+}
+
+class NativeBranch: public RawNativeBranch {
+};
+
+inline NativeBranch* nativeBranch_at(address address) {
+ return (NativeBranch *) rawNativeBranch_at(address);
+}
+
+// -------------------------------------------------------------------
+// NativeGeneralJump is for patchable internal (near) jumps
+// It is used directly by the front-end and must be a single instruction wide
+// (to support patching to other kind of instructions).
+class NativeGeneralJump: public RawNativeInstruction {
+ public:
+
+ address jump_destination() const {
+ return rawNativeBranch_at(instruction_address())->destination();
+ }
+
+ void set_jump_destination(address dest) {
+ return rawNativeBranch_at(instruction_address())->set_destination(dest);
+ }
+
+ static void insert_unconditional(address code_pos, address entry);
+
+ static void replace_mt_safe(address instr_addr, address code_buffer) {
+ assert(((int)instr_addr & 3) == 0 && ((int)code_buffer & 3) == 0, "must be aligned");
+ // Writing a word is atomic on ARM, so no MT-safe tricks are needed
+ rawNativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer);
+ }
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+ assert(rawNativeInstruction_at(address)->is_jump(), "must be");
+ return (NativeGeneralJump*)address;
+}
+
+// -------------------------------------------------------------------
+class RawNativeJump: public NativeInstruction {
+ public:
+
+ address jump_destination(int adj = 0) const {
+ address a;
+ if (is_b()) {
+ a = rawNativeBranch_at(instruction_address())->destination(adj);
+ // Jump destination -1 is encoded as a jump to self
+ if (a == instruction_address()) {
+ return (address)-1;
+ }
+ } else {
+ assert(is_ldr_pc(), "must be");
+ int offset = this->ldr_offset();
+ a = *(address*)(instruction_address() + 8 + offset);
+ }
+ return a;
+ }
+
+ void set_jump_destination(address dest) {
+ address a;
+ if (is_b()) {
+ // Jump destination -1 is encoded as a jump to self
+ if (dest == (address)-1) {
+ dest = instruction_address();
+ }
+ rawNativeBranch_at(instruction_address())->set_destination(dest);
+ } else {
+ assert(is_ldr_pc(), "must be");
+ int offset = this->ldr_offset();
+ *(address*)(instruction_address() + 8 + offset) = dest;
+ OrderAccess::storeload(); // overkill if caller holds lock?
+ }
+ }
+
+ static void check_verified_entry_alignment(address entry, address verified_entry);
+
+ static void patch_verified_entry(address entry, address verified_entry, address dest);
+
+};
+
+inline RawNativeJump* rawNativeJump_at(address address) {
+ assert(rawNativeInstruction_at(address)->is_jump(), "must be");
+ return (RawNativeJump*)address;
+}
+
+// -------------------------------------------------------------------
+class RawNativeCall: public NativeInstruction {
+ // See IC calls in LIR_Assembler::ic_call(): ARM v5/v6 doesn't use a
+ // single bl for IC calls.
+
+ public:
+
+ address return_address() const {
+ if (is_bl()) {
+ return addr_at(instruction_size);
+ } else {
+ assert(is_fat_call(), "must be");
+ int offset = encoding() & 0xff;
+ return addr_at(offset + 8);
+ }
+ }
+
+ address destination(int adj = 0) const {
+ if (is_bl()) {
+ return rawNativeBranch_at(instruction_address())->destination(adj);
+ } else {
+ assert(is_add_lr(), "must be"); // fat_call
+ RawNativeJump *next = rawNativeJump_at(next_raw_instruction_address());
+ return next->jump_destination(adj);
+ }
+ }
+
+ void set_destination(address dest) {
+ if (is_bl()) {
+ return rawNativeBranch_at(instruction_address())->set_destination(dest);
+ } else {
+ assert(is_add_lr(), "must be"); // fat_call
+ RawNativeJump *next = rawNativeJump_at(next_raw_instruction_address());
+ return next->set_jump_destination(dest);
+ }
+ }
+
+ void set_destination_mt_safe(address dest) {
+ assert(CodeCache::contains(dest), "external destination might be too far");
+ set_destination(dest);
+ }
+
+ void verify() {
+ assert(RawNativeInstruction::is_call() || (!VM_Version::supports_movw() && RawNativeInstruction::is_jump()), "must be");
+ }
+
+ void verify_alignment() {
+ // Nothing to do on ARM
+ }
+
+ static bool is_call_before(address return_address);
+};
+
+inline RawNativeCall* rawNativeCall_at(address address) {
+ assert(rawNativeInstruction_at(address)->is_call(), "must be");
+ return (RawNativeCall*)address;
+}
+
+NativeCall* rawNativeCall_before(address return_address);
+
+// -------------------------------------------------------------------
+// NativeMovRegMem need not be extended with indirection support.
+// (field access patching is handled differently in that case)
+class NativeMovRegMem: public NativeInstruction {
+ public:
+
+ int offset() const;
+ void set_offset(int x);
+
+ void add_offset_in_bytes(int add_offset) {
+ set_offset(offset() + add_offset);
+ }
+
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+ NativeMovRegMem* instr = (NativeMovRegMem*)address;
+ assert(instr->kind() == NativeInstruction::instr_ldr_str ||
+ instr->kind() == NativeInstruction::instr_ldrh_strh ||
+ instr->kind() == NativeInstruction::instr_fld_fst, "must be");
+ return instr;
+}
+
+// -------------------------------------------------------------------
+// NativeMovConstReg is primarily for loading oops and metadata
+class NativeMovConstReg: public NativeInstruction {
+ public:
+
+ intptr_t data() const;
+ void set_data(intptr_t x, address pc = 0);
+ bool is_pc_relative() {
+ return !is_movw();
+ }
+ void set_pc_relative_offset(address addr, address pc);
+ address next_instruction_address() const {
+ // NOTE: CompiledStaticCall::set_to_interpreted() calls this but
+ // are restricted to single-instruction ldr. No need to jump over
+ // several instructions.
+ assert(is_ldr_literal(), "Should only use single-instructions load");
+ return next_raw_instruction_address();
+ }
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+ NativeInstruction* ni = nativeInstruction_at(address);
+ assert(ni->is_ldr_literal() || ni->is_pc_rel() ||
+ ni->is_movw() && VM_Version::supports_movw(), "must be");
+ return (NativeMovConstReg*)address;
+}
+
+// -------------------------------------------------------------------
+// Front end classes, hiding experimental back-end extensions.
+
+// Extension to support indirections
+class NativeJump: public RawNativeJump {
+ public:
+};
+
+inline NativeJump* nativeJump_at(address address) {
+ assert(nativeInstruction_at(address)->is_jump(), "must be");
+ return (NativeJump*)address;
+}
+
+class NativeCall: public RawNativeCall {
+public:
+ // NativeCall::next_instruction_address() is used only to define the
+ // range where to look for the relocation information. We need not
+ // walk over composed instructions (as long as the relocation information
+ // is associated to the first instruction).
+ address next_instruction_address() const {
+ return next_raw_instruction_address();
+ }
+
+};
+
+inline NativeCall* nativeCall_at(address address) {
+ assert(nativeInstruction_at(address)->is_call() ||
+ (!VM_Version::supports_movw() && nativeInstruction_at(address)->is_jump()), "must be");
+ return (NativeCall*)address;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+ return (NativeCall *) rawNativeCall_before(return_address);
+}
+
+#endif // CPU_ARM_VM_NATIVEINST_ARM_32_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/nativeInst_arm_64.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/codeCache.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/klass.inline.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+void RawNativeInstruction::verify() {
+ // make sure code pattern is actually an instruction address
+ address addr = instruction_address();
+ if (addr == NULL || ((intptr_t)addr & (instruction_size - 1)) != 0) {
+ fatal("not an instruction address");
+ }
+}
+
+void NativeMovRegMem::set_offset(int x) {
+ int scale = get_offset_scale();
+ assert((x & right_n_bits(scale)) == 0, "offset should be aligned");
+ guarantee((x >> 24) == 0, "encoding constraint");
+
+ if (Assembler::is_unsigned_imm_in_range(x, 12, scale)) {
+ set_unsigned_imm(x, 12, get_offset_scale(), 10);
+ return;
+ }
+
+ // If offset is too large to be placed into single ldr/str instruction, we replace
+ // ldr/str Rt, [Rn, #offset]
+ // nop
+ // with
+ // add LR, Rn, #offset_hi
+ // ldr/str Rt, [LR, #offset_lo]
+
+ // Note: Rtemp cannot be used as a temporary register as it could be used
+ // for value being stored (see LIR_Assembler::reg2mem).
+ // Patchable NativeMovRegMem instructions are generated in LIR_Assembler::mem2reg and LIR_Assembler::reg2mem
+ // which do not use LR, so it is free. Also, it does not conflict with LR usages in c1_LIRGenerator_arm.cpp.
+ const int tmp = LR->encoding();
+ const int rn = (encoding() >> 5) & 0x1f;
+
+ NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address());
+ assert(next->is_nop(), "must be");
+
+ next->set_encoding((encoding() & 0xffc0001f) | Assembler::encode_unsigned_imm((x & 0xfff), 12, scale, 10) | tmp << 5);
+ this->set_encoding(0x91400000 | Assembler::encode_unsigned_imm((x >> 12), 12, 0, 10) | rn << 5 | tmp);
+}
+
+intptr_t NativeMovConstReg::_data() const {
+#ifdef COMPILER2
+ if (is_movz()) {
+ // narrow constant or ic call cached value
+ RawNativeInstruction* ni = next_raw();
+ assert(ni->is_movk(), "movz;movk expected");
+ uint lo16 = (encoding() >> 5) & 0xffff;
+ intptr_t hi = 0;
+ int i = 0;
+ while (ni->is_movk() && i < 3) {
+ uint hi16 = (ni->encoding() >> 5) & 0xffff;
+ int shift = ((ni->encoding() >> 21) & 0x3) << 4;
+ hi |= (intptr_t)hi16 << shift;
+ ni = ni->next_raw();
+ ++i;
+ }
+ return lo16 | hi;
+ }
+#endif
+ return (intptr_t)(nativeLdrLiteral_at(instruction_address())->literal_value());
+}
+
+static void raw_set_data(RawNativeInstruction* si, intptr_t x, oop* oop_addr, Metadata** metadata_addr) {
+#ifdef COMPILER2
+ if (si->is_movz()) {
+ // narrow constant or ic call cached value
+ uintptr_t nx = 0;
+ int val_size = 32;
+ if (oop_addr != NULL) {
+ narrowOop encoded_oop = oopDesc::encode_heap_oop(*oop_addr);
+ nx = encoded_oop;
+ } else if (metadata_addr != NULL) {
+ assert((*metadata_addr)->is_klass(), "expected Klass");
+ narrowKlass encoded_k = Klass::encode_klass((Klass *)*metadata_addr);
+ nx = encoded_k;
+ } else {
+ nx = x;
+ val_size = 64;
+ }
+ RawNativeInstruction* ni = si->next_raw();
+ uint lo16 = nx & 0xffff;
+ int shift = 16;
+ int imm16 = 0xffff << 5;
+ si->set_encoding((si->encoding() & ~imm16) | (lo16 << 5));
+ while (shift < val_size) {
+ assert(ni->is_movk(), "movk expected");
+ assert((((ni->encoding() >> 21) & 0x3) << 4) == shift, "wrong shift");
+ uint hi16 = (nx >> shift) & 0xffff;
+ ni->set_encoding((ni->encoding() & ~imm16) | (hi16 << 5));
+ shift += 16;
+ ni = ni->next_raw();
+ }
+ return;
+ }
+#endif
+
+ assert(si->is_ldr_literal(), "should be");
+
+ if (oop_addr == NULL && metadata_addr == NULL) {
+ // A static ldr_literal without oop_relocation
+ nativeLdrLiteral_at(si->instruction_address())->set_literal_value((address)x);
+ } else {
+ // Oop is loaded from oops section
+ address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr;
+ int offset = addr - si->instruction_address();
+
+ assert((((intptr_t)addr) & 0x7) == 0, "target address should be aligned");
+ assert((offset & 0x3) == 0, "offset should be aligned");
+
+ guarantee(Assembler::is_offset_in_range(offset, 19), "offset is not in range");
+ nativeLdrLiteral_at(si->instruction_address())->set_literal_address(si->instruction_address() + offset);
+ }
+}
+
+void NativeMovConstReg::set_data(intptr_t x) {
+ // Find and replace the oop corresponding to this instruction in oops section
+ oop* oop_addr = NULL;
+ Metadata** metadata_addr = NULL;
+ CodeBlob* cb = CodeCache::find_blob(instruction_address());
+ {
+ nmethod* nm = cb->as_nmethod_or_null();
+ if (nm != NULL) {
+ RelocIterator iter(nm, instruction_address(), next_raw()->instruction_address());
+ while (iter.next()) {
+ if (iter.type() == relocInfo::oop_type) {
+ oop_addr = iter.oop_reloc()->oop_addr();
+ *oop_addr = cast_to_oop(x);
+ break;
+ } else if (iter.type() == relocInfo::metadata_type) {
+ metadata_addr = iter.metadata_reloc()->metadata_addr();
+ *metadata_addr = (Metadata*)x;
+ break;
+ }
+ }
+ }
+ }
+ raw_set_data(adjust(this), x, oop_addr, metadata_addr);
+}
+
+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
+}
+
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be");
+
+ NativeInstruction* instr = nativeInstruction_at(verified_entry);
+ assert(instr->is_nop() || instr->encoding() == zombie_illegal_instruction, "required for MT-safe patching");
+ instr->set_encoding(zombie_illegal_instruction);
+}
+
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+ assert (nativeInstruction_at(instr_addr)->is_b(), "MT-safe patching of arbitrary instructions is not allowed");
+ assert (nativeInstruction_at(code_buffer)->is_nop(), "MT-safe patching of arbitrary instructions is not allowed");
+ nativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer);
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+ // Insert at code_pos unconditional B instruction jumping to entry
+ intx offset = entry - code_pos;
+ assert (Assembler::is_offset_in_range(offset, 26), "offset is out of range");
+
+ NativeInstruction* instr = nativeInstruction_at(code_pos);
+ assert (instr->is_b() || instr->is_nop(), "MT-safe patching of arbitrary instructions is not allowed");
+
+ instr->set_encoding(0x5 << 26 | Assembler::encode_offset(offset, 26, 0));
+}
+
+static address call_for(address return_address) {
+ CodeBlob* cb = CodeCache::find_blob(return_address);
+ nmethod* nm = cb->as_nmethod_or_null();
+ if (nm == NULL) {
+ ShouldNotReachHere();
+ return NULL;
+ }
+
+ // Look back 8 instructions (for LIR_Assembler::ic_call and MacroAssembler::patchable_call)
+ address begin = return_address - 8*NativeInstruction::instruction_size;
+ if (begin < nm->code_begin()) {
+ begin = nm->code_begin();
+ }
+ RelocIterator iter(nm, begin, return_address);
+ while (iter.next()) {
+ Relocation* reloc = iter.reloc();
+ if (reloc->is_call()) {
+ address call = reloc->addr();
+ if (nativeInstruction_at(call)->is_call()) {
+ if (nativeCall_at(call)->return_address() == return_address) {
+ return call;
+ }
+ }
+ }
+ }
+
+ return NULL;
+}
+
+bool NativeCall::is_call_before(address return_address) {
+ return (call_for(return_address) != NULL);
+}
+
+NativeCall* nativeCall_before(address return_address) {
+ assert(NativeCall::is_call_before(return_address), "must be");
+ return nativeCall_at(call_for(return_address));
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/nativeInst_arm_64.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_NATIVEINST_ARM_64_HPP
+#define CPU_ARM_VM_NATIVEINST_ARM_64_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "code/codeCache.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+// -------------------------------------------------------------------
+
+// Some experimental projects extend the ARM back-end by implementing
+// what the front-end usually assumes is a single native instruction
+// with a sequence of instructions.
+//
+// The 'Raw' variants are the low level initial code (usually one
+// instruction wide but some of them were already composed
+// instructions). They should be used only by the back-end.
+//
+// The non-raw classes are the front-end entry point, hiding potential
+// back-end extensions or the actual instructions size.
+class NativeInstruction;
+
+class RawNativeInstruction VALUE_OBJ_CLASS_SPEC {
+ public:
+
+ enum ARM_specific {
+ instruction_size = Assembler::InstructionSize,
+ instruction_size_in_bits = instruction_size * BitsPerByte,
+ };
+
+ // illegal instruction used by NativeJump::patch_verified_entry
+ static const int zombie_illegal_instruction = 0xd4000542; // hvc #42
+
+ address addr_at(int offset) const { return (address)this + offset; }
+ address instruction_address() const { return addr_at(0); }
+ address next_raw_instruction_address() const { return addr_at(instruction_size); }
+
+ static RawNativeInstruction* at(address address) {
+ return (RawNativeInstruction*)address;
+ }
+
+ RawNativeInstruction* next_raw() const {
+ return at(next_raw_instruction_address());
+ }
+
+ int encoding() const {
+ return *(int*)this;
+ }
+
+ void set_encoding(int value) {
+ int old = encoding();
+ if (old != value) {
+ *(int*)this = value;
+ ICache::invalidate_word((address)this);
+ }
+ }
+
+ bool is_nop() const { return encoding() == (int)0xd503201f; }
+ bool is_b() const { return (encoding() & 0xfc000000) == 0x14000000; } // unconditional branch
+ bool is_b_cond() const { return (encoding() & 0xff000010) == 0x54000000; } // conditional branch
+ bool is_bl() const { return (encoding() & 0xfc000000) == 0x94000000; }
+ bool is_br() const { return (encoding() & 0xfffffc1f) == 0xd61f0000; }
+ bool is_blr() const { return (encoding() & 0xfffffc1f) == 0xd63f0000; }
+ bool is_ldr_literal() const { return (encoding() & 0xff000000) == 0x58000000; }
+ bool is_adr_aligned() const { return (encoding() & 0xff000000) == 0x10000000; } // adr Xn, <label>, where label is aligned to 4 bytes (address of instruction).
+ bool is_adr_aligned_lr() const { return (encoding() & 0xff00001f) == 0x1000001e; } // adr LR, <label>, where label is aligned to 4 bytes (address of instruction).
+
+ bool is_ldr_str_gp_reg_unsigned_imm() const { return (encoding() & 0x3f000000) == 0x39000000; } // ldr/str{b, sb, h, sh, _w, sw} Rt, [Rn, #imm]
+ bool is_ldr_str_fp_reg_unsigned_imm() const { return (encoding() & 0x3f000000) == 0x3D000000; } // ldr/str Rt(SIMD), [Rn, #imm]
+ bool is_ldr_str_reg_unsigned_imm() const { return is_ldr_str_gp_reg_unsigned_imm() || is_ldr_str_fp_reg_unsigned_imm(); }
+
+ bool is_stp_preindex() const { return (encoding() & 0xffc00000) == 0xa9800000; } // stp Xt1, Xt2, [Xn, #imm]!
+ bool is_ldp_postindex() const { return (encoding() & 0xffc00000) == 0xa8c00000; } // ldp Xt1, Xt2, [Xn] #imm
+ bool is_mov_sp() const { return (encoding() & 0xfffffc00) == 0x91000000; } // mov <Xn|SP>, <Xm|SP>
+ bool is_movn() const { return (encoding() & 0x7f800000) == 0x12800000; }
+ bool is_movz() const { return (encoding() & 0x7f800000) == 0x52800000; }
+ bool is_movk() const { return (encoding() & 0x7f800000) == 0x72800000; }
+ bool is_orr_imm() const { return (encoding() & 0x7f800000) == 0x32000000; }
+ bool is_cmp_rr() const { return (encoding() & 0x7fe00000) == 0x6b000000; }
+ bool is_csel() const { return (encoding() & 0x7fe00000) == 0x1a800000; }
+ bool is_sub_shift() const { return (encoding() & 0x7f200000) == 0x4b000000; } // sub Rd, Rn, shift (Rm, imm)
+ bool is_mov() const { return (encoding() & 0x7fe0ffe0) == 0x2a0003e0; } // mov Rd, Rm (orr Rd, ZR, shift (Rm, 0))
+ bool is_tst() const { return (encoding() & 0x7f20001f) == 0x6a00001f; } // tst Rn, shift (Rm, imm) (ands ZR, Rn, shift(Rm, imm))
+ bool is_lsr_imm() const { return (encoding() & 0x7f807c00) == 0x53007c00; } // lsr Rd, Rn, imm (ubfm Rd, Rn, imm, 31/63)
+
+ bool is_far_jump() const { return is_ldr_literal() && next_raw()->is_br(); }
+ bool is_fat_call() const {
+ return
+#ifdef COMPILER2
+ (is_blr() && next_raw()->is_b()) ||
+#endif
+ (is_adr_aligned_lr() && next_raw()->is_br());
+ }
+ bool is_far_call() const {
+ return is_ldr_literal() && next_raw()->is_fat_call();
+ }
+
+ bool is_ic_near_call() const { return is_adr_aligned_lr() && next_raw()->is_b(); }
+ bool is_ic_far_call() const { return is_adr_aligned_lr() && next_raw()->is_ldr_literal() && next_raw()->next_raw()->is_br(); }
+ bool is_ic_call() const { return is_ic_near_call() || is_ic_far_call(); }
+
+ bool is_jump() const { return is_b() || is_far_jump(); }
+ bool is_call() const { return is_bl() || is_far_call() || is_ic_call(); }
+ bool is_branch() const { return is_b() || is_bl(); }
+
+ // c2 doesn't use fixed registers for safepoint poll address
+ bool is_safepoint_poll() const {
+ return true;
+ }
+
+ bool is_save_all_registers(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ if (!current->is_stp_preindex()) return false; current = current->next_raw();
+ for (int i = 28; i >= 0; i -= 2) {
+ if (!current->is_stp_preindex()) return false; current = current->next_raw();
+ }
+
+ if (!current->is_adr_aligned()) return false; current = current->next_raw();
+ if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
+ if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
+
+ *next = (RawNativeInstruction*) current;
+ return true;
+ }
+
+ bool is_restore_all_registers(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ for (int i = 0; i <= 28; i += 2) {
+ if (!current->is_ldp_postindex()) return false; current = current->next_raw();
+ }
+ if (!current->is_ldp_postindex()) return false; current = current->next_raw();
+
+ *next = (RawNativeInstruction*) current;
+ return true;
+ }
+
+ const RawNativeInstruction* skip_bind_literal() const {
+ const RawNativeInstruction* current = this;
+ if (((uintptr_t)current) % wordSize != 0) {
+ assert(current->is_nop(), "should be");
+ current = current->next_raw();
+ }
+ assert(((uintptr_t)current) % wordSize == 0, "should be"); // bound literal should be aligned
+ current = current->next_raw()->next_raw();
+ return current;
+ }
+
+ bool is_stop(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ if (!current->is_save_all_registers(¤t)) return false;
+ if (!current->is_ldr_literal()) return false; current = current->next_raw();
+ if (!current->is_mov_sp()) return false; current = current->next_raw();
+ if (!current->is_ldr_literal()) return false; current = current->next_raw();
+ if (!current->is_br()) return false; current = current->next_raw();
+
+ current = current->skip_bind_literal();
+ current = current->skip_bind_literal();
+
+ *next = (RawNativeInstruction*) current;
+ return true;
+ }
+
+ bool is_mov_slow(const RawNativeInstruction** next = NULL) const {
+ const RawNativeInstruction* current = this;
+
+ if (current->is_orr_imm()) {
+ current = current->next_raw();
+
+ } else if (current->is_movn() || current->is_movz()) {
+ current = current->next_raw();
+ int movkCount = 0;
+ while (current->is_movk()) {
+ movkCount++;
+ if (movkCount > 3) return false;
+ current = current->next_raw();
+ }
+
+ } else {
+ return false;
+ }
+
+ if (next != NULL) {
+ *next = (RawNativeInstruction*)current;
+ }
+ return true;
+ }
+
+#ifdef ASSERT
+ void skip_verify_heapbase(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ if (CheckCompressedOops) {
+ if (!current->is_ldr_str_gp_reg_unsigned_imm()) return; current = current->next_raw();
+ if (!current->is_stp_preindex()) return; current = current->next_raw();
+ // NOTE: temporary workaround, remove with m6-01?
+ // skip saving condition flags
+ current = current->next_raw();
+ current = current->next_raw();
+
+ if (!current->is_mov_slow(¤t)) return;
+ if (!current->is_cmp_rr()) return; current = current->next_raw();
+ if (!current->is_b_cond()) return; current = current->next_raw();
+ if (!current->is_stop(¤t)) return;
+
+#ifdef COMPILER2
+ if (current->is_nop()) current = current->next_raw();
+#endif
+ // NOTE: temporary workaround, remove with m6-01?
+ // skip restoring condition flags
+ current = current->next_raw();
+ current = current->next_raw();
+
+ if (!current->is_ldp_postindex()) return; current = current->next_raw();
+ if (!current->is_ldr_str_gp_reg_unsigned_imm()) return; current = current->next_raw();
+ }
+
+ *next = (RawNativeInstruction*) current;
+ }
+#endif // ASSERT
+
+ bool is_ldr_global_ptr(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ if (!current->is_mov_slow(¤t)) return false;
+ if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
+
+ *next = (RawNativeInstruction*) current;
+ return true;
+ }
+
+ void skip_verify_oop(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ if (VerifyOops) {
+ if (!current->is_save_all_registers(¤t)) return;
+
+ if (current->is_mov()) {
+ current = current->next_raw();
+ }
+
+ if (!current->is_mov_sp()) return; current = current->next_raw();
+ if (!current->is_ldr_literal()) return; current = current->next_raw();
+ if (!current->is_ldr_global_ptr(¤t)) return;
+ if (!current->is_blr()) return; current = current->next_raw();
+ if (!current->is_restore_all_registers(¤t)) return;
+ if (!current->is_b()) return; current = current->next_raw();
+
+ current = current->skip_bind_literal();
+ }
+
+ *next = (RawNativeInstruction*) current;
+ }
+
+ void skip_encode_heap_oop(const RawNativeInstruction** next) const {
+ const RawNativeInstruction* current = this;
+
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+#ifdef ASSERT
+ current->skip_verify_heapbase(¤t);
+#endif // ASSERT
+ current->skip_verify_oop(¤t);
+
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ if (!current->is_lsr_imm()) return; current = current->next_raw();
+ } else {
+ if (current->is_mov()) {
+ current = current->next_raw();
+ }
+ }
+ } else {
+ if (!current->is_tst()) return; current = current->next_raw();
+ if (!current->is_csel()) return; current = current->next_raw();
+ if (!current->is_sub_shift()) return; current = current->next_raw();
+ if (Universe::narrow_oop_shift() != 0) {
+ if (!current->is_lsr_imm()) return; current = current->next_raw();
+ }
+ }
+
+ *next = (RawNativeInstruction*) current;
+ }
+
+ void verify();
+
+ // For unit tests
+ static void test() {}
+
+ private:
+
+ void check_bits_range(int bits, int scale, int low_bit) const {
+ assert((0 <= low_bit) && (0 < bits) && (low_bit + bits <= instruction_size_in_bits), "invalid bits range");
+ assert((0 <= scale) && (scale <= 4), "scale is out of range");
+ }
+
+ void set_imm(int imm_encoding, int bits, int low_bit) {
+ int imm_mask = right_n_bits(bits) << low_bit;
+ assert((imm_encoding & ~imm_mask) == 0, "invalid imm encoding");
+ set_encoding((encoding() & ~imm_mask) | imm_encoding);
+ }
+
+ protected:
+
+ // Returns signed immediate from [low_bit .. low_bit + bits - 1] bits of this instruction, scaled by given scale.
+ int get_signed_imm(int bits, int scale, int low_bit) const {
+ check_bits_range(bits, scale, low_bit);
+ int high_bits_to_clean = (instruction_size_in_bits - (low_bit + bits));
+ return encoding() << high_bits_to_clean >> (high_bits_to_clean + low_bit) << scale;
+ }
+
+ // Puts given signed immediate into the [low_bit .. low_bit + bits - 1] bits of this instruction.
+ void set_signed_imm(int value, int bits, int scale, int low_bit) {
+ set_imm(Assembler::encode_imm(value, bits, scale, low_bit), bits, low_bit);
+ }
+
+ // Returns unsigned immediate from [low_bit .. low_bit + bits - 1] bits of this instruction, scaled by given scale.
+ int get_unsigned_imm(int bits, int scale, int low_bit) const {
+ check_bits_range(bits, scale, low_bit);
+ return ((encoding() >> low_bit) & right_n_bits(bits)) << scale;
+ }
+
+ // Puts given unsigned immediate into the [low_bit .. low_bit + bits - 1] bits of this instruction.
+ void set_unsigned_imm(int value, int bits, int scale, int low_bit) {
+ set_imm(Assembler::encode_unsigned_imm(value, bits, scale, low_bit), bits, low_bit);
+ }
+
+ int get_signed_offset(int bits, int low_bit) const {
+ return get_signed_imm(bits, 2, low_bit);
+ }
+
+ void set_signed_offset(int offset, int bits, int low_bit) {
+ set_signed_imm(offset, bits, 2, low_bit);
+ }
+};
+
+inline RawNativeInstruction* rawNativeInstruction_at(address address) {
+ RawNativeInstruction* instr = RawNativeInstruction::at(address);
+#ifdef ASSERT
+ instr->verify();
+#endif // ASSERT
+ return instr;
+}
+
+// -------------------------------------------------------------------
+
+// Load/store register (unsigned scaled immediate)
+class NativeMovRegMem: public RawNativeInstruction {
+ private:
+ int get_offset_scale() const {
+ return get_unsigned_imm(2, 0, 30);
+ }
+
+ public:
+ int offset() const {
+ return get_unsigned_imm(12, get_offset_scale(), 10);
+ }
+
+ void set_offset(int x);
+
+ void add_offset_in_bytes(int add_offset) {
+ set_offset(offset() + add_offset);
+ }
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+ const RawNativeInstruction* instr = rawNativeInstruction_at(address);
+
+#ifdef COMPILER1
+ // NOP required for C1 patching
+ if (instr->is_nop()) {
+ instr = instr->next_raw();
+ }
+#endif
+
+ instr->skip_encode_heap_oop(&instr);
+
+ assert(instr->is_ldr_str_reg_unsigned_imm(), "must be");
+ return (NativeMovRegMem*)instr;
+}
+
+// -------------------------------------------------------------------
+
+class NativeInstruction : public RawNativeInstruction {
+public:
+ static NativeInstruction* at(address address) {
+ return (NativeInstruction*)address;
+ }
+
+public:
+ // No need to consider indirections while parsing NativeInstruction
+ address next_instruction_address() const {
+ return next_raw_instruction_address();
+ }
+
+ // next() is no longer defined to avoid confusion.
+ //
+ // The front end and most classes except for those defined in nativeInst_arm
+ // or relocInfo_arm should only use next_instruction_address(), skipping
+ // over composed instruction and ignoring back-end extensions.
+ //
+ // The back-end can use next_raw() when it knows the instruction sequence
+ // and only wants to skip a single native instruction.
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+ NativeInstruction* instr = NativeInstruction::at(address);
+#ifdef ASSERT
+ instr->verify();
+#endif // ASSERT
+ return instr;
+}
+
+// -------------------------------------------------------------------
+class NativeInstructionLdrLiteral: public NativeInstruction {
+ public:
+ address literal_address() {
+ address la = instruction_address() + get_signed_offset(19, 5);
+ assert(la != instruction_address(), "literal points to instruction");
+ return la;
+ }
+
+ address after_literal_address() {
+ return literal_address() + wordSize;
+ }
+
+ void set_literal_address(address addr, address pc) {
+ assert(is_ldr_literal(), "must be");
+ int opc = (encoding() >> 30) & 0x3;
+ assert (opc != 0b01 || addr == pc || ((uintx)addr & 7) == 0, "ldr target should be aligned");
+ set_signed_offset(addr - pc, 19, 5);
+ }
+
+ void set_literal_address(address addr) {
+ set_literal_address(addr, instruction_address());
+ }
+
+ address literal_value() {
+ return *(address*)literal_address();
+ }
+
+ void set_literal_value(address dest) {
+ *(address*)literal_address() = dest;
+ }
+};
+
+inline NativeInstructionLdrLiteral* nativeLdrLiteral_at(address address) {
+ assert(nativeInstruction_at(address)->is_ldr_literal(), "must be");
+ return (NativeInstructionLdrLiteral*)address;
+}
+
+// -------------------------------------------------------------------
+// Common class for branch instructions with 26-bit immediate offset: B (unconditional) and BL
+class NativeInstructionBranchImm26: public NativeInstruction {
+ public:
+ address destination(int adj = 0) const {
+ return instruction_address() + get_signed_offset(26, 0) + adj;
+ }
+
+ void set_destination(address dest) {
+ intptr_t offset = (intptr_t)(dest - instruction_address());
+ assert((offset & 0x3) == 0, "should be aligned");
+ set_signed_offset(offset, 26, 0);
+ }
+};
+
+inline NativeInstructionBranchImm26* nativeB_at(address address) {
+ assert(nativeInstruction_at(address)->is_b(), "must be");
+ return (NativeInstructionBranchImm26*)address;
+}
+
+inline NativeInstructionBranchImm26* nativeBL_at(address address) {
+ assert(nativeInstruction_at(address)->is_bl(), "must be");
+ return (NativeInstructionBranchImm26*)address;
+}
+
+// -------------------------------------------------------------------
+class NativeInstructionAdrLR: public NativeInstruction {
+ public:
+ // Returns address which is loaded into LR by this instruction.
+ address target_lr_value() {
+ return instruction_address() + get_signed_offset(19, 5);
+ }
+};
+
+inline NativeInstructionAdrLR* nativeAdrLR_at(address address) {
+ assert(nativeInstruction_at(address)->is_adr_aligned_lr(), "must be");
+ return (NativeInstructionAdrLR*)address;
+}
+
+// -------------------------------------------------------------------
+class RawNativeCall: public NativeInstruction {
+ public:
+
+ address return_address() const {
+ if (is_bl()) {
+ return next_raw_instruction_address();
+
+ } else if (is_far_call()) {
+#ifdef COMPILER2
+ if (next_raw()->is_blr()) {
+ // ldr_literal; blr; ret_addr: b skip_literal;
+ return addr_at(2 * instruction_size);
+ }
+#endif
+ assert(next_raw()->is_adr_aligned_lr() && next_raw()->next_raw()->is_br(), "must be");
+ return nativeLdrLiteral_at(instruction_address())->after_literal_address();
+
+ } else if (is_ic_call()) {
+ return nativeAdrLR_at(instruction_address())->target_lr_value();
+
+ } else {
+ ShouldNotReachHere();
+ return NULL;
+ }
+ }
+
+ address destination(int adj = 0) const {
+ if (is_bl()) {
+ return nativeBL_at(instruction_address())->destination(adj);
+
+ } else if (is_far_call()) {
+ return nativeLdrLiteral_at(instruction_address())->literal_value();
+
+ } else if (is_adr_aligned_lr()) {
+ RawNativeInstruction *next = next_raw();
+ if (next->is_b()) {
+ // ic_near_call
+ return nativeB_at(next->instruction_address())->destination(adj);
+ } else if (next->is_far_jump()) {
+ // ic_far_call
+ return nativeLdrLiteral_at(next->instruction_address())->literal_value();
+ }
+ }
+ ShouldNotReachHere();
+ return NULL;
+ }
+
+ void set_destination(address dest) {
+ if (is_bl()) {
+ nativeBL_at(instruction_address())->set_destination(dest);
+ return;
+ }
+ if (is_far_call()) {
+ nativeLdrLiteral_at(instruction_address())->set_literal_value(dest);
+ OrderAccess::storeload(); // overkill if caller holds lock?
+ return;
+ }
+ if (is_adr_aligned_lr()) {
+ RawNativeInstruction *next = next_raw();
+ if (next->is_b()) {
+ // ic_near_call
+ nativeB_at(next->instruction_address())->set_destination(dest);
+ return;
+ }
+ if (next->is_far_jump()) {
+ // ic_far_call
+ nativeLdrLiteral_at(next->instruction_address())->set_literal_value(dest);
+ OrderAccess::storeload(); // overkill if caller holds lock?
+ return;
+ }
+ }
+ ShouldNotReachHere();
+ }
+
+ void set_destination_mt_safe(address dest) {
+ assert(CodeCache::contains(dest), "call target should be from code cache (required by ic_call and patchable_call)");
+ set_destination(dest);
+ }
+
+ void verify() {
+ assert(RawNativeInstruction::is_call(), "should be");
+ }
+
+ void verify_alignment() {
+ // Nothing to do on ARM
+ }
+};
+
+inline RawNativeCall* rawNativeCall_at(address address) {
+ RawNativeCall * call = (RawNativeCall*)address;
+ call->verify();
+ return call;
+}
+
+class NativeCall: public RawNativeCall {
+ public:
+
+ // NativeCall::next_instruction_address() is used only to define the
+ // range where to look for the relocation information. We need not
+ // walk over composed instructions (as long as the relocation information
+ // is associated to the first instruction).
+ address next_instruction_address() const {
+ return next_raw_instruction_address();
+ }
+
+ static bool is_call_before(address return_address);
+};
+
+inline NativeCall* nativeCall_at(address address) {
+ NativeCall * call = (NativeCall*)address;
+ call->verify();
+ return call;
+}
+
+NativeCall* nativeCall_before(address return_address);
+
+// -------------------------------------------------------------------
+class NativeGeneralJump: public NativeInstruction {
+ public:
+
+ address jump_destination() const {
+ return nativeB_at(instruction_address())->destination();
+ }
+
+ static void replace_mt_safe(address instr_addr, address code_buffer);
+
+ static void insert_unconditional(address code_pos, address entry);
+
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+ assert(nativeInstruction_at(address)->is_b(), "must be");
+ return (NativeGeneralJump*)address;
+}
+
+// -------------------------------------------------------------------
+class RawNativeJump: public NativeInstruction {
+ public:
+
+ address jump_destination(int adj = 0) const {
+ if (is_b()) {
+ address a = nativeB_at(instruction_address())->destination(adj);
+ // Jump destination -1 is encoded as a jump to self
+ if (a == instruction_address()) {
+ return (address)-1;
+ }
+ return a;
+ } else {
+ assert(is_far_jump(), "should be");
+ return nativeLdrLiteral_at(instruction_address())->literal_value();
+ }
+ }
+
+ void set_jump_destination(address dest) {
+ if (is_b()) {
+ // Jump destination -1 is encoded as a jump to self
+ if (dest == (address)-1) {
+ dest = instruction_address();
+ }
+ nativeB_at(instruction_address())->set_destination(dest);
+ } else {
+ assert(is_far_jump(), "should be");
+ nativeLdrLiteral_at(instruction_address())->set_literal_value(dest);
+ }
+ }
+};
+
+inline RawNativeJump* rawNativeJump_at(address address) {
+ assert(rawNativeInstruction_at(address)->is_jump(), "must be");
+ return (RawNativeJump*)address;
+}
+
+// -------------------------------------------------------------------
+class NativeMovConstReg: public NativeInstruction {
+
+ NativeMovConstReg *adjust() const {
+ return (NativeMovConstReg *)adjust(this);
+ }
+
+ public:
+
+ static RawNativeInstruction *adjust(const RawNativeInstruction *ni) {
+#ifdef COMPILER1
+ // NOP required for C1 patching
+ if (ni->is_nop()) {
+ return ni->next_raw();
+ }
+#endif
+ return (RawNativeInstruction *)ni;
+ }
+
+ intptr_t _data() const;
+ void set_data(intptr_t x);
+
+ intptr_t data() const {
+ return adjust()->_data();
+ }
+
+ bool is_pc_relative() {
+ return adjust()->is_ldr_literal();
+ }
+
+ void _set_pc_relative_offset(address addr, address pc) {
+ assert(is_ldr_literal(), "must be");
+ nativeLdrLiteral_at(instruction_address())->set_literal_address(addr, pc);
+ }
+
+ void set_pc_relative_offset(address addr, address pc) {
+ NativeMovConstReg *ni = adjust();
+ int dest_adj = ni->instruction_address() - instruction_address();
+ ni->_set_pc_relative_offset(addr, pc + dest_adj);
+ }
+
+ address _next_instruction_address() const {
+#ifdef COMPILER2
+ if (is_movz()) {
+ // narrow constant
+ RawNativeInstruction* ni = next_raw();
+ assert(ni->is_movk(), "movz;movk expected");
+ return ni->next_raw_instruction_address();
+ }
+#endif
+ assert(is_ldr_literal(), "must be");
+ return NativeInstruction::next_raw_instruction_address();
+ }
+
+ address next_instruction_address() const {
+ return adjust()->_next_instruction_address();
+ }
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+ RawNativeInstruction* ni = rawNativeInstruction_at(address);
+
+ ni = NativeMovConstReg::adjust(ni);
+
+ assert(ni->is_mov_slow() || ni->is_ldr_literal(), "must be");
+ return (NativeMovConstReg*)address;
+}
+
+// -------------------------------------------------------------------
+class NativeJump: public RawNativeJump {
+ public:
+
+ static void check_verified_entry_alignment(address entry, address verified_entry);
+
+ static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+inline NativeJump* nativeJump_at(address address) {
+ assert(nativeInstruction_at(address)->is_jump(), "must be");
+ return (NativeJump*)address;
+}
+
+#endif // CPU_ARM_VM_NATIVEINST_ARM_64_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/registerMap_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_REGISTERMAP_ARM_HPP
+#define CPU_ARM_VM_REGISTERMAP_ARM_HPP
+
+// machine-dependent implemention for register maps
+ friend class frame;
+
+ private:
+ // This is the hook for finding a register in an "well-known" location,
+ // such as a register block of a predetermined format.
+ // Since there is none, we just return NULL.
+ // See registerMap_sparc.hpp for an example of grabbing registers
+ // from register save areas of a standard layout.
+ address pd_location(VMReg reg) const {return NULL;}
+
+ // no PD state to clear or copy:
+ void pd_clear() {}
+ void pd_initialize() {}
+ void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_ARM_VM_REGISTERMAP_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/register_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_arm.hpp"
+#include "utilities/debug.hpp"
+
+const int ConcreteRegisterImpl::max_gpr = ConcreteRegisterImpl::num_gpr;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::num_fpr +
+ ConcreteRegisterImpl::max_gpr;
+
+const char* RegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+#ifdef AARCH64
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "fp", "lr", "xzr", "sp"
+#else
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6",
+#if (FP_REG_NUM == 7)
+ "fp",
+#else
+ "r7",
+#endif
+ "r8", "r9", "r10",
+#if (FP_REG_NUM == 11)
+ "fp",
+#else
+ "r11",
+#endif
+ "r12", "sp", "lr", "pc"
+#endif // AARCH64
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+#ifdef AARCH64
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+#else
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+ "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+ "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+ "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"
+#ifdef COMPILER2
+ ,"s32", "s33?","s34", "s35?","s36", "s37?","s38", "s39?",
+ "s40", "s41?","s42", "s43?","s44", "s45?","s46", "s47?",
+ "s48", "s49?","s50", "s51?","s52", "s53?","s54", "s55?",
+ "s56", "s57?","s58", "s59?","s60", "s61?","s62", "s63?"
+#endif
+#endif // AARCH64
+ };
+ return is_valid() ? names[encoding()] : "fnoreg";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/register_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,570 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_REGISTER_ARM_HPP
+#define CPU_ARM_VM_REGISTER_ARM_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_arm.hpp"
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// These are declared ucontext.h
+#undef R0
+#undef R1
+#undef R2
+#undef R3
+#undef R4
+#undef R5
+#undef R6
+#undef R7
+#undef R8
+#undef R9
+#undef R10
+#undef R11
+#undef R12
+#undef R13
+#undef R14
+#undef R15
+
+#define R(r) ((Register)(r))
+
+/////////////////////////////////
+// Support for different ARM ABIs
+// Note: default ABI is for linux
+
+
+// R9_IS_SCRATCHED
+//
+// The ARM ABI does not guarantee that R9 is callee saved.
+// Set R9_IS_SCRATCHED to 1 to ensure it is properly saved/restored by
+// the caller.
+#ifndef R9_IS_SCRATCHED
+// Default: R9 is callee saved
+#define R9_IS_SCRATCHED 0
+#endif
+
+#ifndef AARCH64
+// FP_REG_NUM
+//
+// The ARM ABI does not state which register is used for the frame pointer.
+// Note: for the ABIs we are currently aware of, FP is currently
+// either R7 or R11. Code may have to be extended if a third register
+// register must be supported (see altFP_7_11).
+#ifndef FP_REG_NUM
+// Default: FP is R11
+#define FP_REG_NUM 11
+#endif
+#endif // AARCH64
+
+// ALIGN_WIDE_ARGUMENTS
+//
+// The ARM ABI requires 64-bits arguments to be aligned on 4 words
+// or on even registers. Set ALIGN_WIDE_ARGUMENTS to 1 for that behavior.
+//
+// Unfortunately, some platforms do not endorse that part of the ABI.
+//
+// We are aware of one which expects 64-bit arguments to only be 4
+// bytes aligned and can for instance use R3 + a stack slot for such
+// an argument.
+//
+// This is the behavor implemented if (ALIGN_WIDE_ARGUMENTS == 0)
+#ifndef ALIGN_WIDE_ARGUMENTS
+// Default: align on 8 bytes and avoid using <r3+stack>
+#define ALIGN_WIDE_ARGUMENTS 1
+#endif
+
+#define R0 ((Register)0)
+#define R1 ((Register)1)
+#define R2 ((Register)2)
+#define R3 ((Register)3)
+#define R4 ((Register)4)
+#define R5 ((Register)5)
+#define R6 ((Register)6)
+#define R7 ((Register)7)
+#define R8 ((Register)8)
+#define R9 ((Register)9)
+#define R10 ((Register)10)
+#define R11 ((Register)11)
+#define R12 ((Register)12)
+#define R13 ((Register)13)
+#define R14 ((Register)14)
+#define R15 ((Register)15)
+
+#ifdef AARCH64
+
+#define R16 ((Register)16)
+#define R17 ((Register)17)
+#define R18 ((Register)18)
+#define R19 ((Register)19)
+#define R20 ((Register)20)
+#define R21 ((Register)21)
+#define R22 ((Register)22)
+#define R23 ((Register)23)
+#define R24 ((Register)24)
+#define R25 ((Register)25)
+#define R26 ((Register)26)
+#define R27 ((Register)27)
+#define R28 ((Register)28)
+#define R29 ((Register)29)
+#define R30 ((Register)30)
+#define ZR ((Register)31)
+#define SP ((Register)32)
+
+#define FP R29
+#define LR R30
+
+#define altFP_7_11 R7
+
+#else // !AARCH64
+
+#define FP ((Register)FP_REG_NUM)
+
+// Safe use of registers which may be FP on some platforms.
+//
+// altFP_7_11: R7 if not equal to FP, else R11 (the default FP)
+//
+// Note: add additional altFP_#_11 for each register potentially used
+// as FP on supported ABIs (and replace R# by altFP_#_11). altFP_#_11
+// must be #define to R11 if and only if # is FP_REG_NUM.
+#if (FP_REG_NUM == 7)
+#define altFP_7_11 ((Register)11)
+#else
+#define altFP_7_11 ((Register)7)
+#endif
+#define SP R13
+#define LR R14
+#define PC R15
+
+#endif // !AARCH64
+
+
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+inline Register as_Register(int encoding) {
+ return (Register)(intptr_t)encoding;
+}
+
+class RegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+#ifdef AARCH64
+ number_of_gprs = 31,
+ zr_sp_encoding = 31,
+#endif
+ number_of_registers = AARCH64_ONLY(number_of_gprs + 2) NOT_AARCH64(16)
+ };
+
+ Register successor() const { return as_Register(encoding() + 1); }
+
+ inline friend Register as_Register(int encoding);
+
+ VMReg as_VMReg();
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
+ const char* name() const;
+
+#ifdef AARCH64
+ int encoding_with_zr() const { assert (is_valid_gpr_or_zr(), "invalid register"); return (this == ZR) ? zr_sp_encoding : value(); }
+ int encoding_with_sp() const { assert (is_valid_gpr_or_sp(), "invalid register"); return (this == SP) ? zr_sp_encoding : value(); }
+#endif
+
+ // testers
+ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+
+#ifdef AARCH64
+ bool is_valid_gpr() const { return (0 <= value() && value() < number_of_gprs); }
+ bool is_valid_gpr_or_zr() const { return is_valid_gpr() || (this == ZR); }
+ bool is_valid_gpr_or_sp() const { return is_valid_gpr() || (this == SP); }
+#endif
+};
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+ return (FloatRegister)(intptr_t)encoding;
+}
+
+class FloatRegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+#ifdef AARCH64
+ number_of_registers = 32
+#else
+ number_of_registers = NOT_COMPILER2(32) COMPILER2_PRESENT(64)
+#endif
+ };
+
+ inline friend FloatRegister as_FloatRegister(int encoding);
+
+ VMReg as_VMReg();
+
+ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
+ bool is_valid() const { return 0 <= (intx)this && (intx)this < number_of_registers; }
+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+
+ const char* name() const;
+
+#ifndef AARCH64
+ int hi_bits() const {
+ return (encoding() >> 1) & 0xf;
+ }
+
+ int lo_bit() const {
+ return encoding() & 1;
+ }
+
+ int hi_bit() const {
+ return encoding() >> 5;
+ }
+#endif // !AARCH64
+};
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
+
+#ifdef AARCH64
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V0, ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V1, ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V2, ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V3, ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V4, ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V5, ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V6, ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V7, ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V8, ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V9, ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V10, (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V11, (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V12, (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V13, (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V14, (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V15, (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V16, (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V17, (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V18, (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V19, (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V20, (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V21, (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V22, (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V23, (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V24, (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V25, (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V26, (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V27, (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V28, (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V29, (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V30, (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, V31, (31));
+
+#define S0 V0
+#define S1_reg V1
+#define Stemp V31
+
+#define D0 V0
+#define D1 V1
+
+#else // AARCH64
+
+/*
+ * S1-S6 are named with "_reg" suffix to avoid conflict with
+ * constants defined in sharedRuntimeTrig.cpp
+ */
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S0, ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S1_reg, ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S2_reg, ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S3_reg, ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S4_reg, ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S5_reg, ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S6_reg, ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S7, ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S8, ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S9, ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S10, (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S11, (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S12, (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S13, (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S14, (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S15, (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S16, (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S17, (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S18, (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S19, (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S20, (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S21, (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S22, (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S23, (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S24, (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S25, (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S26, (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S27, (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S28, (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S29, (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S30, (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, S31, (31));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, Stemp, (30));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D0, ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D1, ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D2, ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D3, ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D4, ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D5, ( 10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D6, ( 12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D7, ( 14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D8, ( 16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D9, ( 18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D10, ( 20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D11, ( 22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D12, ( 24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D13, ( 26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D14, ( 28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D15, (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D16, (32));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D17, (34));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D18, (36));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D19, (38));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D20, (40));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D21, (42));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D22, (44));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D23, (46));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D24, (48));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D25, (50));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D26, (52));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D27, (54));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D28, (56));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D29, (58));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D30, (60));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, D31, (62));
+
+#endif // AARCH64
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+ log_vmregs_per_word = LogBytesPerWord - LogBytesPerInt, // VMRegs are of 4-byte size
+#ifdef COMPILER2
+ log_bytes_per_fpr = AARCH64_ONLY(4) NOT_AARCH64(2), // quad vectors
+#else
+ log_bytes_per_fpr = AARCH64_ONLY(3) NOT_AARCH64(2), // double vectors
+#endif
+ log_words_per_fpr = log_bytes_per_fpr - LogBytesPerWord,
+ words_per_fpr = 1 << log_words_per_fpr,
+ log_vmregs_per_fpr = log_bytes_per_fpr - LogBytesPerInt,
+ log_vmregs_per_gpr = log_vmregs_per_word,
+ vmregs_per_gpr = 1 << log_vmregs_per_gpr,
+ vmregs_per_fpr = 1 << log_vmregs_per_fpr,
+
+ num_gpr = RegisterImpl::number_of_registers << log_vmregs_per_gpr,
+ max_gpr0 = num_gpr,
+ num_fpr = FloatRegisterImpl::number_of_registers << log_vmregs_per_fpr,
+ max_fpr0 = max_gpr0 + num_fpr,
+ number_of_registers = num_gpr + num_fpr +
+ // TODO-AARCH64 revise
+ 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers
+ };
+
+ static const int max_gpr;
+ static const int max_fpr;
+};
+
+// TODO-AARCH64 revise the following definitions
+
+class VFPSystemRegisterImpl;
+typedef VFPSystemRegisterImpl* VFPSystemRegister;
+class VFPSystemRegisterImpl : public AbstractRegisterImpl {
+ public:
+ int encoding() const { return value(); }
+};
+
+#define FPSID ((VFPSystemRegister)0)
+#define FPSCR ((VFPSystemRegister)1)
+#define MVFR0 ((VFPSystemRegister)0x6)
+#define MVFR1 ((VFPSystemRegister)0x7)
+
+/*
+ * Register definitions shared across interpreter and compiler
+ */
+#define Rexception_obj AARCH64_ONLY(R19) NOT_AARCH64(R4)
+#define Rexception_pc AARCH64_ONLY(R20) NOT_AARCH64(R5)
+
+#ifdef AARCH64
+#define Rheap_base R27
+#endif // AARCH64
+
+/*
+ * Interpreter register definitions common to C++ and template interpreters.
+ */
+#ifdef AARCH64
+#define Rlocals R23
+#define Rmethod R26
+#define Rthread R28
+#define Rtemp R16
+#define Rtemp2 R17
+#else
+#define Rlocals R8
+#define Rmethod R9
+#define Rthread R10
+#define Rtemp R12
+#endif // AARCH64
+
+// Interpreter calling conventions
+
+#define Rparams AARCH64_ONLY(R8) NOT_AARCH64(SP)
+#define Rsender_sp AARCH64_ONLY(R19) NOT_AARCH64(R4)
+
+// JSR292
+// Note: R5_mh is needed only during the call setup, including adapters
+// This does not seem to conflict with Rexception_pc
+// In case of issues, R3 might be OK but adapters calling the runtime would have to save it
+#define R5_mh R5 // MethodHandle register, used during the call setup
+#define Rmh_SP_save FP // for C1
+
+/*
+ * C++ Interpreter Register Defines
+ */
+#define Rsave0 R4
+#define Rsave1 R5
+#define Rsave2 R6
+#define Rstate altFP_7_11 // R7 or R11
+#define Ricklass R8
+
+/*
+ * TemplateTable Interpreter Register Usage
+ */
+
+// Temporary registers
+#define R0_tmp R0
+#define R1_tmp R1
+#define R2_tmp R2
+#define R3_tmp R3
+#define R4_tmp R4
+#define R5_tmp R5
+#define R12_tmp R12
+#define LR_tmp LR
+
+#define S0_tmp S0
+#define S1_tmp S1_reg
+
+#define D0_tmp D0
+#define D1_tmp D1
+
+// Temporary registers saved across VM calls (according to C calling conventions)
+#define Rtmp_save0 AARCH64_ONLY(R19) NOT_AARCH64(R4)
+#define Rtmp_save1 AARCH64_ONLY(R20) NOT_AARCH64(R5)
+
+// Cached TOS value
+#define R0_tos R0
+
+#ifndef AARCH64
+#define R0_tos_lo R0
+#define R1_tos_hi R1
+#endif
+
+#define S0_tos S0
+#define D0_tos D0
+
+// Dispatch table
+#define RdispatchTable AARCH64_ONLY(R22) NOT_AARCH64(R6)
+
+// Bytecode pointer
+#define Rbcp AARCH64_ONLY(R24) NOT_AARCH64(altFP_7_11)
+
+// Pre-loaded next bytecode for the dispatch
+#define R3_bytecode R3
+
+// Conventions between bytecode templates and stubs
+#define R2_ClassCastException_obj R2
+#define R4_ArrayIndexOutOfBounds_index R4
+
+// Interpreter expression stack top
+#define Rstack_top AARCH64_ONLY(R25) NOT_AARCH64(SP)
+
+/*
+ * Linux 32-bit ARM C ABI Register calling conventions
+ *
+ * REG use callee/caller saved
+ *
+ * R0 First argument reg caller
+ * result register
+ * R1 Second argument reg caller
+ * result register
+ * R2 Third argument reg caller
+ * R3 Fourth argument reg caller
+ *
+ * R4 - R8 Local variable registers callee
+ * R9
+ * R10, R11 Local variable registers callee
+ *
+ * R12 (IP) Scratch register used in inter-procedural calling
+ * R13 (SP) Stack Pointer callee
+ * R14 (LR) Link register
+ * R15 (PC) Program Counter
+ *
+ * TODO-AARCH64: document AArch64 ABI
+ *
+ */
+#define c_rarg0 R0
+#define c_rarg1 R1
+#define c_rarg2 R2
+#define c_rarg3 R3
+
+#ifdef AARCH64
+#define c_rarg4 R4
+#define c_rarg5 R5
+#define c_rarg6 R6
+#define c_rarg7 R7
+#endif
+
+#ifdef AARCH64
+#define GPR_PARAMS 8
+#define FPR_PARAMS 8
+#else
+#define GPR_PARAMS 4
+#endif
+
+
+// Java ABI
+// XXX Is this correct?
+#define j_rarg0 c_rarg0
+#define j_rarg1 c_rarg1
+#define j_rarg2 c_rarg2
+#define j_rarg3 c_rarg3
+
+#ifdef AARCH64
+#define j_rarg4 c_rarg4
+#define j_rarg5 c_rarg5
+#define j_rarg6 c_rarg6
+#define j_rarg7 c_rarg7
+#endif
+
+#endif // CPU_ARM_VM_REGISTER_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/register_definitions_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "interp_masm_arm.hpp"
+#include "register_arm.hpp"
+
+REGISTER_DEFINITION(Register, noreg);
+REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+#ifdef AARCH64
+
+REGISTER_DEFINITION(FloatRegister, V0);
+REGISTER_DEFINITION(FloatRegister, V1);
+REGISTER_DEFINITION(FloatRegister, V2);
+REGISTER_DEFINITION(FloatRegister, V3);
+REGISTER_DEFINITION(FloatRegister, V4);
+REGISTER_DEFINITION(FloatRegister, V5);
+REGISTER_DEFINITION(FloatRegister, V6);
+REGISTER_DEFINITION(FloatRegister, V7);
+REGISTER_DEFINITION(FloatRegister, V8);
+REGISTER_DEFINITION(FloatRegister, V9);
+REGISTER_DEFINITION(FloatRegister, V10);
+REGISTER_DEFINITION(FloatRegister, V11);
+REGISTER_DEFINITION(FloatRegister, V12);
+REGISTER_DEFINITION(FloatRegister, V13);
+REGISTER_DEFINITION(FloatRegister, V14);
+REGISTER_DEFINITION(FloatRegister, V15);
+REGISTER_DEFINITION(FloatRegister, V16);
+REGISTER_DEFINITION(FloatRegister, V17);
+REGISTER_DEFINITION(FloatRegister, V18);
+REGISTER_DEFINITION(FloatRegister, V19);
+REGISTER_DEFINITION(FloatRegister, V20);
+REGISTER_DEFINITION(FloatRegister, V21);
+REGISTER_DEFINITION(FloatRegister, V22);
+REGISTER_DEFINITION(FloatRegister, V23);
+REGISTER_DEFINITION(FloatRegister, V24);
+REGISTER_DEFINITION(FloatRegister, V25);
+REGISTER_DEFINITION(FloatRegister, V26);
+REGISTER_DEFINITION(FloatRegister, V27);
+REGISTER_DEFINITION(FloatRegister, V28);
+REGISTER_DEFINITION(FloatRegister, V29);
+REGISTER_DEFINITION(FloatRegister, V30);
+REGISTER_DEFINITION(FloatRegister, V31);
+
+#else // AARCH64
+
+REGISTER_DEFINITION(FloatRegister, S0);
+REGISTER_DEFINITION(FloatRegister, S1_reg);
+REGISTER_DEFINITION(FloatRegister, S2_reg);
+REGISTER_DEFINITION(FloatRegister, S3_reg);
+REGISTER_DEFINITION(FloatRegister, S4_reg);
+REGISTER_DEFINITION(FloatRegister, S5_reg);
+REGISTER_DEFINITION(FloatRegister, S6_reg);
+REGISTER_DEFINITION(FloatRegister, S7);
+REGISTER_DEFINITION(FloatRegister, S8);
+REGISTER_DEFINITION(FloatRegister, S9);
+REGISTER_DEFINITION(FloatRegister, S10);
+REGISTER_DEFINITION(FloatRegister, S11);
+REGISTER_DEFINITION(FloatRegister, S12);
+REGISTER_DEFINITION(FloatRegister, S13);
+REGISTER_DEFINITION(FloatRegister, S14);
+REGISTER_DEFINITION(FloatRegister, S15);
+REGISTER_DEFINITION(FloatRegister, S16);
+REGISTER_DEFINITION(FloatRegister, S17);
+REGISTER_DEFINITION(FloatRegister, S18);
+REGISTER_DEFINITION(FloatRegister, S19);
+REGISTER_DEFINITION(FloatRegister, S20);
+REGISTER_DEFINITION(FloatRegister, S21);
+REGISTER_DEFINITION(FloatRegister, S22);
+REGISTER_DEFINITION(FloatRegister, S23);
+REGISTER_DEFINITION(FloatRegister, S24);
+REGISTER_DEFINITION(FloatRegister, S25);
+REGISTER_DEFINITION(FloatRegister, S26);
+REGISTER_DEFINITION(FloatRegister, S27);
+REGISTER_DEFINITION(FloatRegister, S28);
+REGISTER_DEFINITION(FloatRegister, S29);
+REGISTER_DEFINITION(FloatRegister, S30);
+REGISTER_DEFINITION(FloatRegister, S31);
+REGISTER_DEFINITION(FloatRegister, Stemp);
+REGISTER_DEFINITION(FloatRegister, D0);
+REGISTER_DEFINITION(FloatRegister, D1);
+REGISTER_DEFINITION(FloatRegister, D2);
+REGISTER_DEFINITION(FloatRegister, D3);
+REGISTER_DEFINITION(FloatRegister, D4);
+REGISTER_DEFINITION(FloatRegister, D5);
+REGISTER_DEFINITION(FloatRegister, D6);
+REGISTER_DEFINITION(FloatRegister, D7);
+REGISTER_DEFINITION(FloatRegister, D8);
+REGISTER_DEFINITION(FloatRegister, D9);
+REGISTER_DEFINITION(FloatRegister, D10);
+REGISTER_DEFINITION(FloatRegister, D11);
+REGISTER_DEFINITION(FloatRegister, D12);
+REGISTER_DEFINITION(FloatRegister, D13);
+REGISTER_DEFINITION(FloatRegister, D14);
+REGISTER_DEFINITION(FloatRegister, D15);
+REGISTER_DEFINITION(FloatRegister, D16);
+REGISTER_DEFINITION(FloatRegister, D17);
+REGISTER_DEFINITION(FloatRegister, D18);
+REGISTER_DEFINITION(FloatRegister, D19);
+REGISTER_DEFINITION(FloatRegister, D20);
+REGISTER_DEFINITION(FloatRegister, D21);
+REGISTER_DEFINITION(FloatRegister, D22);
+REGISTER_DEFINITION(FloatRegister, D23);
+REGISTER_DEFINITION(FloatRegister, D24);
+REGISTER_DEFINITION(FloatRegister, D25);
+REGISTER_DEFINITION(FloatRegister, D26);
+REGISTER_DEFINITION(FloatRegister, D27);
+REGISTER_DEFINITION(FloatRegister, D28);
+REGISTER_DEFINITION(FloatRegister, D29);
+REGISTER_DEFINITION(FloatRegister, D30);
+REGISTER_DEFINITION(FloatRegister, D31);
+
+#endif //AARCH64
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/relocInfo_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+
+ NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+#if defined(AARCH64) && defined(COMPILER2)
+ if (ni->is_movz()) {
+ assert(type() == relocInfo::oop_type, "!");
+ if (verify_only) {
+ uintptr_t d = ni->data();
+ guarantee((d >> 32) == 0, "not narrow oop");
+ narrowOop no = d;
+ oop o = oopDesc::decode_heap_oop(no);
+ guarantee(cast_from_oop<intptr_t>(o) == (intptr_t)x, "instructions must match");
+ } else {
+ ni->set_data((intptr_t)x);
+ }
+ return;
+ }
+#endif
+ if (verify_only) {
+ guarantee(ni->data() == (intptr_t)(x + o), "instructions must match");
+ } else {
+ ni->set_data((intptr_t)(x + o));
+ }
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+ address pc = addr();
+
+ int adj = 0;
+ if (orig_addr != NULL) {
+ // We just moved this call instruction from orig_addr to addr().
+ // This means that, when relative, its target will appear to have grown by addr() - orig_addr.
+ adj = orig_addr - pc;
+ }
+
+ RawNativeInstruction* ni = rawNativeInstruction_at(pc);
+
+#if (!defined(AARCH64))
+ if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) {
+ // On arm32, skip the optional 'add LR, PC, #offset'
+ // (Allowing the jump support code to handle fat_call)
+ pc = ni->next_raw_instruction_address();
+ ni = nativeInstruction_at(pc);
+ }
+#endif
+
+ if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) {
+ // For arm32, fat_call are handled by is_jump for the new 'ni',
+ // requiring only to support is_bl.
+ //
+ // For AARCH64, skipping a leading adr is not sufficient
+ // to reduce calls to a simple bl.
+ return rawNativeCall_at(pc)->destination(adj);
+ }
+
+ if (ni->is_jump()) {
+ return rawNativeJump_at(pc)->jump_destination(adj);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+void Relocation::pd_set_call_destination(address x) {
+ address pc = addr();
+ NativeInstruction* ni = nativeInstruction_at(pc);
+
+#if (!defined(AARCH64))
+ if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) {
+ // On arm32, skip the optional 'add LR, PC, #offset'
+ // (Allowing the jump support code to handle fat_call)
+ pc = ni->next_raw_instruction_address();
+ ni = nativeInstruction_at(pc);
+ }
+#endif
+
+ if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) {
+ // For arm32, fat_call are handled by is_jump for the new 'ni',
+ // requiring only to support is_bl.
+ //
+ // For AARCH64, skipping a leading adr is not sufficient
+ // to reduce calls to a simple bl.
+ rawNativeCall_at(pc)->set_destination(x);
+ return;
+ }
+
+ if (ni->is_jump()) { // raw jump
+ rawNativeJump_at(pc)->set_jump_destination(x);
+ return;
+ }
+ ShouldNotReachHere();
+}
+
+
+address* Relocation::pd_address_in_code() {
+ return (address*)addr();
+}
+
+address Relocation::pd_get_address_from_code() {
+ return *pd_address_in_code();
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+ assert(! addr_in_const(), "Do not use");
+#ifdef AARCH64
+#ifdef COMPILER2
+ NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+ if (ni->is_movz()) {
+ return;
+ }
+#endif
+ set_value(x);
+#else
+ if (!VM_Version::supports_movw()) {
+ set_value(x);
+#ifdef ASSERT
+ } else {
+ // the movw/movt data should be correct
+ NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+ assert(ni->is_movw(), "not a movw");
+ // The following assert should be correct but the shared code
+ // currently 'fixes' the metadata instructions before the
+ // metadata_table is copied in the new method (see
+ // JDK-8042845). This means that 'x' (which comes from the table)
+ // does not match the value inlined in the code (which is
+ // correct). Failure can be temporarily ignored since the code is
+ // correct and the table is copied shortly afterward.
+ //
+ // assert(ni->data() == (int)x, "metadata relocation mismatch");
+#endif
+ }
+#endif // !AARCH64
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/relocInfo_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_RELOCINFO_ARM_HPP
+#define CPU_ARM_VM_RELOCINFO_ARM_HPP
+
+ private:
+
+ enum {
+ offset_unit = 4,
+ format_width = 0
+ };
+
+#endif // CPU_ARM_VM_RELOCINFO_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/runtime_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/assembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_arm.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_arm.inline.hpp"
+#endif
+
+#define __ masm->
+
+//------------------------------ generate_exception_blob ---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in sparc.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jmp.
+//
+// Arguments:
+// Rexception_obj (R4/R19): exception oop
+// Rexception_pc (R5/R20): exception pc
+//
+// Results:
+// Rexception_obj (R4/R19): exception oop
+// O1: exception pc in caller or ???
+// destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//
+void OptoRuntime::generate_exception_blob() {
+ // allocate space for code
+ ResourceMark rm;
+ int pad = VerifyThread ? 256 : 0;// Extra slop space for more verify code
+
+ // setup code generation tools
+ // Measured 8/7/03 at 256 in 32bit debug build (no VerifyThread)
+ // Measured 8/7/03 at 528 in 32bit debug build (VerifyThread)
+ CodeBuffer buffer("exception_blob", 600+pad, 512);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+
+ int framesize_in_words = 2; // FP + LR
+ int framesize_in_bytes = framesize_in_words * wordSize;
+ int framesize_in_slots = framesize_in_bytes / sizeof(jint);
+
+ int start = __ offset();
+
+ __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
+
+ // This call does all the hard work. It checks if an exception catch
+ // exists in the method.
+ // If so, it returns the handler address.
+ // If the nmethod has been deoptimized and it had a handler the handler
+ // address is the deopt blob unpack_with_exception entry.
+ //
+ // If no handler exists it prepares for stack-unwinding, restoring the callee-save
+ // registers of the frame being removed.
+ //
+ __ mov(LR, Rexception_pc);
+ __ raw_push(FP, LR);
+ int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
+
+ __ mov(R0, Rthread);
+
+ // This call can block at exit and nmethod can be deoptimized at that
+ // point. If the nmethod had a catch point we would jump to the
+ // now deoptimized catch point and fall thru the vanilla deopt
+ // path and lose the exception
+ // Sure would be simpler if this call didn't block!
+ __ call(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C));
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+
+ // Set an oopmap for the call site. This oopmap will only be used if we
+ // are unwinding the stack. Hence, all locations will be dead.
+ // Callee-saved registers will be the same as the frame above (i.e.,
+ // handle_exception_stub), since they were restored when we got the
+ // exception.
+
+ OopMapSet *oop_maps = new OopMapSet();
+ oop_maps->add_gc_map(pc_offset - start, new OopMap(framesize_in_slots, 0));
+
+ __ reset_last_Java_frame(Rtemp);
+
+ __ raw_pop(FP, LR);
+
+ // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site.
+ __ ldr(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset()));
+#ifdef AARCH64
+ Label skip;
+ __ cbz(Rtemp, skip);
+ __ mov(SP, Rmh_SP_save);
+ __ bind(skip);
+#else
+ __ cmp(Rtemp, 0);
+ __ mov(SP, Rmh_SP_save, ne);
+#endif
+
+ // R0 contains handler address
+ // Since this may be the deopt blob we must set R5 to look like we returned
+ // from the original pc that threw the exception
+
+ __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); // R5/R20
+
+ __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); // R4/R19
+ __ mov(Rtemp, 0);
+#ifdef ASSERT
+ __ str(Rtemp, Address(Rthread, JavaThread::exception_handler_pc_offset()));
+ __ str(Rtemp, Address(Rthread, JavaThread::exception_pc_offset()));
+#endif
+ // Clear the exception oop so GC no longer processes it as a root.
+ __ str(Rtemp, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ jump(R0);
+
+ // -------------
+ // make sure all code is generated
+ masm->flush();
+
+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize_in_words);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/sharedRuntime_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,2501 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "logging/log.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_arm.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+#ifdef SHARK
+#include "compiler/compileBroker.hpp"
+#include "shark/sharkCompiler.hpp"
+#endif
+
+#define __ masm->
+
+class RegisterSaver {
+public:
+
+ // Special registers:
+ // 32-bit ARM 64-bit ARM
+ // Rthread: R10 R28
+ // LR: R14 R30
+
+ // Rthread is callee saved in the C ABI and never changed by compiled code:
+ // no need to save it.
+
+ // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
+ // The one at LR_offset is a return address that is needed by stack walking.
+ // A c2 method uses LR as a standard register so it may be live when we
+ // branch to the runtime. The slot at R14/R30_offset is for the value of LR
+ // in case it's live in the method we are coming from.
+
+#ifdef AARCH64
+
+ //
+ // On AArch64 registers save area has the following layout:
+ //
+ // |---------------------|
+ // | return address (LR) |
+ // | FP |
+ // |---------------------|
+ // | V31 |
+ // | ... |
+ // | V0 |
+ // |---------------------|
+ // | padding |
+ // | R30 (LR live value) |
+ // |---------------------|
+ // | R27 |
+ // | ... |
+ // | R0 |
+ // |---------------------| <-- SP
+ //
+
+ enum RegisterLayout {
+ number_of_saved_gprs = 28,
+ number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
+ words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
+
+ R0_offset = 0,
+ R30_offset = R0_offset + number_of_saved_gprs,
+ D0_offset = R30_offset + 2,
+ FP_offset = D0_offset + number_of_saved_fprs * words_per_fpr,
+ LR_offset = FP_offset + 1,
+
+ reg_save_size = LR_offset + 1,
+ };
+
+ static const int Rmethod_offset;
+ static const int Rtemp_offset;
+
+#else
+
+ enum RegisterLayout {
+ fpu_save_size = FloatRegisterImpl::number_of_registers,
+#ifndef __SOFTFP__
+ D0_offset = 0,
+#endif
+ R0_offset = fpu_save_size,
+ R1_offset,
+ R2_offset,
+ R3_offset,
+ R4_offset,
+ R5_offset,
+ R6_offset,
+#if (FP_REG_NUM != 7)
+ // if not saved as FP
+ R7_offset,
+#endif
+ R8_offset,
+ R9_offset,
+#if (FP_REG_NUM != 11)
+ // if not saved as FP
+ R11_offset,
+#endif
+ R12_offset,
+ R14_offset,
+ FP_offset,
+ LR_offset,
+ reg_save_size,
+
+ Rmethod_offset = R9_offset,
+ Rtemp_offset = R12_offset,
+ };
+
+ // all regs but Rthread (R10), FP (R7 or R11), SP and PC
+ // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
+#define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
+
+#endif // AARCH64
+
+ // When LR may be live in the nmethod from which we are comming
+ // then lr_saved is true, the return address is saved before the
+ // call to save_live_register by the caller and LR contains the
+ // live value.
+
+ static OopMap* save_live_registers(MacroAssembler* masm,
+ int* total_frame_words,
+ bool lr_saved = false);
+ static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
+
+};
+
+
+#ifdef AARCH64
+const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
+const int RegisterSaver::Rtemp_offset = RegisterSaver::R0_offset + Rtemp->encoding();
+#endif // AARCH64
+
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
+ int* total_frame_words,
+ bool lr_saved) {
+ *total_frame_words = reg_save_size;
+
+ OopMapSet *oop_maps = new OopMapSet();
+ OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
+
+#ifdef AARCH64
+ assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
+
+ if (lr_saved) {
+ // LR was stashed here, so that jump could use it as a scratch reg
+ __ ldr(LR, Address(SP, 0));
+ // There are two words on the stack top:
+ // [SP + 0]: placeholder for FP
+ // [SP + wordSize]: saved return address
+ __ str(FP, Address(SP, 0));
+ } else {
+ __ raw_push(FP, LR);
+ }
+
+ __ sub(SP, SP, (reg_save_size - 2) * wordSize);
+
+ for (int i = 0; i < number_of_saved_gprs; i += 2) {
+ int offset = R0_offset + i;
+ __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
+ map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
+ }
+
+ __ str(R30, Address(SP, R30_offset * wordSize));
+ map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
+
+ for (int i = 0; i < number_of_saved_fprs; i += 2) {
+ int offset1 = D0_offset + i * words_per_fpr;
+ int offset2 = offset1 + words_per_fpr;
+ Address base(SP, offset1 * wordSize);
+ if (words_per_fpr == 2) {
+ // pair of "wide" quad vector registers
+ __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
+ } else {
+ // pair of double vector registers
+ __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
+ }
+ map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
+ }
+#else
+ if (lr_saved) {
+ __ push(RegisterSet(FP));
+ } else {
+ __ push(RegisterSet(FP) | RegisterSet(LR));
+ }
+ __ push(SAVED_BASE_REGS);
+ if (HaveVFP) {
+ if (VM_Version::has_vfp3_32()) {
+ __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
+ } else {
+ if (FloatRegisterImpl::number_of_registers > 32) {
+ assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
+ __ sub(SP, SP, 32 * wordSize);
+ }
+ }
+ __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
+ } else {
+ __ sub(SP, SP, fpu_save_size * wordSize);
+ }
+
+ int i;
+ int j=0;
+ for (i = R0_offset; i <= R9_offset; i++) {
+ if (j == FP_REG_NUM) {
+ // skip the FP register, managed below.
+ j++;
+ }
+ map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
+ j++;
+ }
+ assert(j == R10->encoding(), "must be");
+#if (FP_REG_NUM != 11)
+ // add R11, if not managed as FP
+ map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
+#endif
+ map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
+ if (HaveVFP) {
+ for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
+ map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
+ map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
+ }
+ }
+#endif // AARCH64
+
+ return map;
+}
+
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
+#ifdef AARCH64
+ for (int i = 0; i < number_of_saved_gprs; i += 2) {
+ __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
+ }
+
+ __ ldr(R30, Address(SP, R30_offset * wordSize));
+
+ for (int i = 0; i < number_of_saved_fprs; i += 2) {
+ Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
+ if (words_per_fpr == 2) {
+ // pair of "wide" quad vector registers
+ __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
+ } else {
+ // pair of double vector registers
+ __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
+ }
+ }
+
+ __ add(SP, SP, (reg_save_size - 2) * wordSize);
+
+ if (restore_lr) {
+ __ raw_pop(FP, LR);
+ } else {
+ __ ldr(FP, Address(SP, 0));
+ }
+#else
+ if (HaveVFP) {
+ __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
+ if (VM_Version::has_vfp3_32()) {
+ __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
+ } else {
+ if (FloatRegisterImpl::number_of_registers > 32) {
+ assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
+ __ add(SP, SP, 32 * wordSize);
+ }
+ }
+ } else {
+ __ add(SP, SP, fpu_save_size * wordSize);
+ }
+ __ pop(SAVED_BASE_REGS);
+ if (restore_lr) {
+ __ pop(RegisterSet(FP) | RegisterSet(LR));
+ } else {
+ __ pop(RegisterSet(FP));
+ }
+#endif // AARCH64
+}
+
+#ifdef AARCH64
+
+static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
+ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
+ __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
+ } else {
+ __ raw_push(R0, ZR);
+ }
+}
+
+static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
+ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
+ __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
+ } else {
+ __ raw_pop(R0, ZR);
+ }
+}
+
+static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
+ __ raw_push(R0, R1);
+ __ raw_push(R2, R3);
+ __ raw_push(R4, R5);
+ __ raw_push(R6, R7);
+
+ assert(FPR_PARAMS == 8, "adjust this code");
+ assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
+
+ if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
+ if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
+ if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
+ if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
+}
+
+static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
+ assert(FPR_PARAMS == 8, "adjust this code");
+ assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
+
+ if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
+ if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
+ if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
+ if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
+
+ __ raw_pop(R6, R7);
+ __ raw_pop(R4, R5);
+ __ raw_pop(R2, R3);
+ __ raw_pop(R0, R1);
+}
+
+#else // AARCH64
+
+static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
+#ifdef __ABI_HARD__
+ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
+ __ sub(SP, SP, 8);
+ __ fstd(D0, Address(SP));
+ return;
+ }
+#endif // __ABI_HARD__
+ __ raw_push(R0, R1);
+}
+
+static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
+#ifdef __ABI_HARD__
+ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
+ __ fldd(D0, Address(SP));
+ __ add(SP, SP, 8);
+ return;
+ }
+#endif // __ABI_HARD__
+ __ raw_pop(R0, R1);
+}
+
+static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
+ // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
+ __ push(RegisterSet(R0, R3));
+
+#ifdef __ABI_HARD__
+ // preserve arguments
+ // Likely not needed as the locking code won't probably modify volatile FP registers,
+ // but there is no way to guarantee that
+ if (fp_regs_in_arguments) {
+ // convert fp_regs_in_arguments to a number of double registers
+ int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
+ __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
+ }
+#endif // __ ABI_HARD__
+}
+
+static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
+#ifdef __ABI_HARD__
+ if (fp_regs_in_arguments) {
+ int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
+ __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
+ }
+#endif // __ABI_HARD__
+
+ __ pop(RegisterSet(R0, R3));
+}
+
+#endif // AARCH64
+
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+// All vector registers are saved by default on ARM.
+bool SharedRuntime::is_wide_vector(int size) {
+ return false;
+}
+
+size_t SharedRuntime::trampoline_size() {
+ return 16;
+}
+
+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
+ InlinedAddress dest(destination);
+ __ indirect_jump(dest, Rtemp);
+ __ bind_literal(dest);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ VMRegPair *regs2,
+ int total_args_passed) {
+ assert(regs2 == NULL, "not needed on arm");
+#ifdef AARCH64
+ int slot = 0; // counted in 32-bit VMReg slots
+ int reg = 0;
+ int fp_reg = 0;
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_SHORT:
+ case T_CHAR:
+ case T_BYTE:
+ case T_BOOLEAN:
+ case T_INT:
+ if (reg < GPR_PARAMS) {
+ Register r = as_Register(reg);
+ regs[i].set1(r->as_VMReg());
+ reg++;
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot));
+ slot+=2;
+ }
+ break;
+ case T_LONG:
+ assert(sig_bt[i+1] == T_VOID, "missing Half" );
+ // fall through
+ case T_ARRAY:
+ case T_OBJECT:
+ case T_ADDRESS:
+ if (reg < GPR_PARAMS) {
+ Register r = as_Register(reg);
+ regs[i].set2(r->as_VMReg());
+ reg++;
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(slot));
+ slot+=2;
+ }
+ break;
+ case T_FLOAT:
+ if (fp_reg < FPR_PARAMS) {
+ FloatRegister r = as_FloatRegister(fp_reg);
+ regs[i].set1(r->as_VMReg());
+ fp_reg++;
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot));
+ slot+=2;
+ }
+ break;
+ case T_DOUBLE:
+ assert(sig_bt[i+1] == T_VOID, "missing Half" );
+ if (fp_reg < FPR_PARAMS) {
+ FloatRegister r = as_FloatRegister(fp_reg);
+ regs[i].set2(r->as_VMReg());
+ fp_reg++;
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(slot));
+ slot+=2;
+ }
+ break;
+ case T_VOID:
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+ regs[i].set_bad();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ return slot;
+
+#else // AARCH64
+
+ int slot = 0;
+ int ireg = 0;
+#ifdef __ABI_HARD__
+ int fp_slot = 0;
+ int single_fpr_slot = 0;
+#endif // __ABI_HARD__
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_SHORT:
+ case T_CHAR:
+ case T_BYTE:
+ case T_BOOLEAN:
+ case T_INT:
+ case T_ARRAY:
+ case T_OBJECT:
+ case T_ADDRESS:
+#ifndef __ABI_HARD__
+ case T_FLOAT:
+#endif // !__ABI_HARD__
+ if (ireg < 4) {
+ Register r = as_Register(ireg);
+ regs[i].set1(r->as_VMReg());
+ ireg++;
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot));
+ slot++;
+ }
+ break;
+ case T_LONG:
+#ifndef __ABI_HARD__
+ case T_DOUBLE:
+#endif // !__ABI_HARD__
+ assert(sig_bt[i+1] == T_VOID, "missing Half" );
+ if (ireg <= 2) {
+#if (ALIGN_WIDE_ARGUMENTS == 1)
+ if(ireg & 1) ireg++; // Aligned location required
+#endif
+ Register r1 = as_Register(ireg);
+ Register r2 = as_Register(ireg + 1);
+ regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
+ ireg += 2;
+#if (ALIGN_WIDE_ARGUMENTS == 0)
+ } else if (ireg == 3) {
+ // uses R3 + one stack slot
+ Register r = as_Register(ireg);
+ regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
+ ireg += 1;
+ slot += 1;
+#endif
+ } else {
+ if (slot & 1) slot++; // Aligned location required
+ regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
+ slot += 2;
+ ireg = 4;
+ }
+ break;
+ case T_VOID:
+ regs[i].set_bad();
+ break;
+#ifdef __ABI_HARD__
+ case T_FLOAT:
+ if ((fp_slot < 16)||(single_fpr_slot & 1)) {
+ if ((single_fpr_slot & 1) == 0) {
+ single_fpr_slot = fp_slot;
+ fp_slot += 2;
+ }
+ FloatRegister r = as_FloatRegister(single_fpr_slot);
+ single_fpr_slot++;
+ regs[i].set1(r->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot));
+ slot++;
+ }
+ break;
+ case T_DOUBLE:
+ assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
+ if (fp_slot <= 14) {
+ FloatRegister r1 = as_FloatRegister(fp_slot);
+ FloatRegister r2 = as_FloatRegister(fp_slot+1);
+ regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
+ fp_slot += 2;
+ } else {
+ if(slot & 1) slot++;
+ regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
+ slot += 2;
+ single_fpr_slot = 16;
+ }
+ break;
+#endif // __ABI_HARD__
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ return slot;
+#endif // AARCH64
+}
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ int total_args_passed,
+ int is_outgoing) {
+#ifdef AARCH64
+ // C calling convention on AArch64 is good enough.
+ return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
+#else
+#ifdef __SOFTFP__
+ // soft float is the same as the C calling convention.
+ return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
+#endif // __SOFTFP__
+ (void) is_outgoing;
+ int slot = 0;
+ int ireg = 0;
+ int freg = 0;
+ int single_fpr = 0;
+
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_SHORT:
+ case T_CHAR:
+ case T_BYTE:
+ case T_BOOLEAN:
+ case T_INT:
+ case T_ARRAY:
+ case T_OBJECT:
+ case T_ADDRESS:
+ if (ireg < 4) {
+ Register r = as_Register(ireg++);
+ regs[i].set1(r->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot++));
+ }
+ break;
+ case T_FLOAT:
+ // C2 utilizes S14/S15 for mem-mem moves
+ if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
+ if ((single_fpr & 1) == 0) {
+ single_fpr = freg;
+ freg += 2;
+ }
+ FloatRegister r = as_FloatRegister(single_fpr++);
+ regs[i].set1(r->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(slot++));
+ }
+ break;
+ case T_DOUBLE:
+ // C2 utilizes S14/S15 for mem-mem moves
+ if (freg <= 14 COMPILER2_PRESENT(-2)) {
+ FloatRegister r1 = as_FloatRegister(freg);
+ FloatRegister r2 = as_FloatRegister(freg + 1);
+ regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
+ freg += 2;
+ } else {
+ // Keep internally the aligned calling convention,
+ // ignoring ALIGN_WIDE_ARGUMENTS
+ if (slot & 1) slot++;
+ regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
+ slot += 2;
+ single_fpr = 16;
+ }
+ break;
+ case T_LONG:
+ // Keep internally the aligned calling convention,
+ // ignoring ALIGN_WIDE_ARGUMENTS
+ if (ireg <= 2) {
+ if (ireg & 1) ireg++;
+ Register r1 = as_Register(ireg);
+ Register r2 = as_Register(ireg + 1);
+ regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
+ ireg += 2;
+ } else {
+ if (slot & 1) slot++;
+ regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
+ slot += 2;
+ ireg = 4;
+ }
+ break;
+ case T_VOID:
+ regs[i].set_bad();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ if (slot & 1) slot++;
+ return slot;
+#endif // AARCH64
+}
+
+static void patch_callers_callsite(MacroAssembler *masm) {
+ Label skip;
+
+ __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
+ __ cbz(Rtemp, skip);
+
+#ifdef AARCH64
+ push_param_registers(masm, FPR_PARAMS);
+ __ raw_push(LR, ZR);
+#else
+ // Pushing an even number of registers for stack alignment.
+ // Selecting R9, which had to be saved anyway for some platforms.
+ __ push(RegisterSet(R0, R3) | R9 | LR);
+#endif // AARCH64
+
+ __ mov(R0, Rmethod);
+ __ mov(R1, LR);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
+
+#ifdef AARCH64
+ __ raw_pop(LR, ZR);
+ pop_param_registers(masm, FPR_PARAMS);
+#else
+ __ pop(RegisterSet(R0, R3) | R9 | LR);
+#endif // AARCH64
+
+ __ bind(skip);
+}
+
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+ int total_args_passed, int comp_args_on_stack,
+ const BasicType *sig_bt, const VMRegPair *regs) {
+ // TODO: ARM - May be can use ldm to load arguments
+ const Register tmp = Rtemp; // avoid erasing R5_mh
+
+ // Next assert may not be needed but safer. Extra analysis required
+ // if this there is not enough free registers and we need to use R5 here.
+ assert_different_registers(tmp, R5_mh);
+
+ // 6243940 We might end up in handle_wrong_method if
+ // the callee is deoptimized as we race thru here. If that
+ // happens we don't want to take a safepoint because the
+ // caller frame will look interpreted and arguments are now
+ // "compiled" so it is much better to make this transition
+ // invisible to the stack walking code. Unfortunately if
+ // we try and find the callee by normal means a safepoint
+ // is possible. So we stash the desired callee in the thread
+ // and the vm will find there should this case occur.
+ Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
+ __ str(Rmethod, callee_target_addr);
+
+#ifdef AARCH64
+
+ assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
+ assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
+
+ if (comp_args_on_stack) {
+ __ sub_slow(SP, SP, round_to(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
+ }
+
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+ continue;
+ }
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
+
+ int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
+ Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
+
+ VMReg r = regs[i].first();
+ bool full_word = regs[i].second()->is_valid();
+
+ if (r->is_stack()) {
+ if (full_word) {
+ __ ldr(tmp, source_addr);
+ __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ __ ldr_w(tmp, source_addr);
+ __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+ }
+ } else if (r->is_Register()) {
+ if (full_word) {
+ __ ldr(r->as_Register(), source_addr);
+ } else {
+ __ ldr_w(r->as_Register(), source_addr);
+ }
+ } else if (r->is_FloatRegister()) {
+ if (sig_bt[i] == T_DOUBLE) {
+ __ ldr_d(r->as_FloatRegister(), source_addr);
+ } else {
+ __ ldr_s(r->as_FloatRegister(), source_addr);
+ }
+ } else {
+ assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
+ }
+ }
+
+ __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
+ __ br(tmp);
+
+#else
+
+ assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
+
+ const Register initial_sp = Rmethod; // temporarily scratched
+
+ // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
+ assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
+
+ __ mov(initial_sp, SP);
+
+ if (comp_args_on_stack) {
+ __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
+ }
+ __ bic(SP, SP, StackAlignmentInBytes - 1);
+
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+ continue;
+ }
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
+ int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
+
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (r_1->is_stack()) {
+ int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
+ if (!r_2->is_valid()) {
+ __ ldr(tmp, Address(initial_sp, arg_offset));
+ __ str(tmp, Address(SP, stack_offset));
+ } else {
+ __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
+ __ str(tmp, Address(SP, stack_offset));
+ __ ldr(tmp, Address(initial_sp, arg_offset));
+ __ str(tmp, Address(SP, stack_offset + wordSize));
+ }
+ } else if (r_1->is_Register()) {
+ if (!r_2->is_valid()) {
+ __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
+ } else {
+ __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
+ __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
+ }
+ } else if (r_1->is_FloatRegister()) {
+#ifdef __SOFTFP__
+ ShouldNotReachHere();
+#endif // __SOFTFP__
+ if (!r_2->is_valid()) {
+ __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
+ } else {
+ __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
+ }
+ } else {
+ assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
+ }
+ }
+
+ // restore Rmethod (scratched for initial_sp)
+ __ ldr(Rmethod, callee_target_addr);
+ __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
+
+#endif // AARCH64
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+ int total_args_passed, int comp_args_on_stack,
+ const BasicType *sig_bt, const VMRegPair *regs,
+ Label& skip_fixup) {
+ // TODO: ARM - May be can use stm to deoptimize arguments
+ const Register tmp = Rtemp;
+
+ patch_callers_callsite(masm);
+ __ bind(skip_fixup);
+
+ __ mov(Rsender_sp, SP); // not yet saved
+
+#ifdef AARCH64
+
+ int extraspace = round_to(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
+ if (extraspace) {
+ __ sub(SP, SP, extraspace);
+ }
+
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+ continue;
+ }
+
+ int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
+ Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
+
+ VMReg r = regs[i].first();
+ bool full_word = regs[i].second()->is_valid();
+
+ if (r->is_stack()) {
+ if (full_word) {
+ __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
+ __ str(tmp, dest_addr);
+ } else {
+ __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
+ __ str_w(tmp, dest_addr);
+ }
+ } else if (r->is_Register()) {
+ if (full_word) {
+ __ str(r->as_Register(), dest_addr);
+ } else {
+ __ str_w(r->as_Register(), dest_addr);
+ }
+ } else if (r->is_FloatRegister()) {
+ if (sig_bt[i] == T_DOUBLE) {
+ __ str_d(r->as_FloatRegister(), dest_addr);
+ } else {
+ __ str_s(r->as_FloatRegister(), dest_addr);
+ }
+ } else {
+ assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
+ }
+ }
+
+ __ mov(Rparams, SP);
+
+ __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
+ __ br(tmp);
+
+#else
+
+ int extraspace = total_args_passed * Interpreter::stackElementSize;
+ if (extraspace) {
+ __ sub_slow(SP, SP, extraspace);
+ }
+
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+ continue;
+ }
+ int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
+
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (r_1->is_stack()) {
+ int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+ if (!r_2->is_valid()) {
+ __ ldr(tmp, Address(SP, arg_offset));
+ __ str(tmp, Address(SP, stack_offset));
+ } else {
+ __ ldr(tmp, Address(SP, arg_offset));
+ __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
+ __ ldr(tmp, Address(SP, arg_offset + wordSize));
+ __ str(tmp, Address(SP, stack_offset));
+ }
+ } else if (r_1->is_Register()) {
+ if (!r_2->is_valid()) {
+ __ str(r_1->as_Register(), Address(SP, stack_offset));
+ } else {
+ __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
+ __ str(r_2->as_Register(), Address(SP, stack_offset));
+ }
+ } else if (r_1->is_FloatRegister()) {
+#ifdef __SOFTFP__
+ ShouldNotReachHere();
+#endif // __SOFTFP__
+ if (!r_2->is_valid()) {
+ __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
+ } else {
+ __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
+ }
+ } else {
+ assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
+ }
+ }
+
+ __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
+
+#endif // AARCH64
+}
+
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs,
+ AdapterFingerPrint* fingerprint) {
+ address i2c_entry = __ pc();
+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+ address c2i_unverified_entry = __ pc();
+ Label skip_fixup;
+ const Register receiver = R0;
+ const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure
+ const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
+
+ __ load_klass(receiver_klass, receiver);
+ __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
+ __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset()));
+ __ cmp(receiver_klass, holder_klass);
+
+#ifdef AARCH64
+ Label ic_miss;
+ __ b(ic_miss, ne);
+ __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
+ __ cbz(Rtemp, skip_fixup);
+ __ bind(ic_miss);
+ __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
+#else
+ __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
+ __ cmp(Rtemp, 0, eq);
+ __ b(skip_fixup, eq);
+ __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
+#endif // AARCH64
+
+ address c2i_entry = __ pc();
+ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+ __ flush();
+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+
+static int reg2offset_in(VMReg r) {
+ // Account for saved FP and LR
+ return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
+}
+
+static int reg2offset_out(VMReg r) {
+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+
+static void verify_oop_args(MacroAssembler* masm,
+ methodHandle method,
+ const BasicType* sig_bt,
+ const VMRegPair* regs) {
+ Register temp_reg = Rmethod; // not part of any compiled calling seq
+ if (VerifyOops) {
+ for (int i = 0; i < method->size_of_parameters(); i++) {
+ if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
+ VMReg r = regs[i].first();
+ assert(r->is_valid(), "bad oop arg");
+ if (r->is_stack()) {
+ __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+ __ verify_oop(temp_reg);
+ } else {
+ __ verify_oop(r->as_Register());
+ }
+ }
+ }
+ }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+ methodHandle method,
+ const BasicType* sig_bt,
+ const VMRegPair* regs) {
+ verify_oop_args(masm, method, sig_bt, regs);
+ vmIntrinsics::ID iid = method->intrinsic_id();
+
+ // Now write the args into the outgoing interpreter space
+ bool has_receiver = false;
+ Register receiver_reg = noreg;
+ int member_arg_pos = -1;
+ Register member_reg = noreg;
+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+ if (ref_kind != 0) {
+ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
+ member_reg = Rmethod; // known to be free at this point
+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+ } else if (iid == vmIntrinsics::_invokeBasic) {
+ has_receiver = true;
+ } else {
+ fatal("unexpected intrinsic id %d", iid);
+ }
+
+ if (member_reg != noreg) {
+ // Load the member_arg into register, if necessary.
+ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+ VMReg r = regs[member_arg_pos].first();
+ if (r->is_stack()) {
+ __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ // no data motion is needed
+ member_reg = r->as_Register();
+ }
+ }
+
+ if (has_receiver) {
+ // Make sure the receiver is loaded into a register.
+ assert(method->size_of_parameters() > 0, "oob");
+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+ VMReg r = regs[0].first();
+ assert(r->is_valid(), "bad receiver arg");
+ if (r->is_stack()) {
+ // Porting note: This assumes that compiled calling conventions always
+ // pass the receiver oop in a register. If this is not true on some
+ // platform, pick a temp and load the receiver from stack.
+ assert(false, "receiver always in a register");
+ receiver_reg = j_rarg0; // known to be free at this point
+ __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ // no data motion is needed
+ receiver_reg = r->as_Register();
+ }
+ }
+
+ // Figure out which address we are really jumping to:
+ MethodHandles::generate_method_handle_dispatch(masm, iid,
+ receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method. The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+ const methodHandle& method,
+ int compile_id,
+ BasicType* in_sig_bt,
+ VMRegPair* in_regs,
+ BasicType ret_type) {
+ if (method->is_method_handle_intrinsic()) {
+ vmIntrinsics::ID iid = method->intrinsic_id();
+ intptr_t start = (intptr_t)__ pc();
+ int vep_offset = ((intptr_t)__ pc()) - start;
+ gen_special_dispatch(masm,
+ method,
+ in_sig_bt,
+ in_regs);
+ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
+ __ flush();
+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
+ return nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ vep_offset,
+ frame_complete,
+ stack_slots / VMRegImpl::slots_per_word,
+ in_ByteSize(-1),
+ in_ByteSize(-1),
+ (OopMapSet*)NULL);
+ }
+ // Arguments for JNI method include JNIEnv and Class if static
+
+ // Usage of Rtemp should be OK since scratched by native call
+
+ bool is_static = method->is_static();
+
+ const int total_in_args = method->size_of_parameters();
+ int total_c_args = total_in_args + 1;
+ if (is_static) {
+ total_c_args++;
+ }
+
+ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+
+ int argc = 0;
+ out_sig_bt[argc++] = T_ADDRESS;
+ if (is_static) {
+ out_sig_bt[argc++] = T_OBJECT;
+ }
+
+ int i;
+ for (i = 0; i < total_in_args; i++) {
+ out_sig_bt[argc++] = in_sig_bt[i];
+ }
+
+ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+ // Since object arguments need to be wrapped, we must preserve space
+ // for those object arguments which come in registers (GPR_PARAMS maximum)
+ // plus one more slot for Klass handle (for static methods)
+ int oop_handle_offset = stack_slots;
+ stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
+
+ // Plus a lock if needed
+ int lock_slot_offset = 0;
+ if (method->is_synchronized()) {
+ lock_slot_offset = stack_slots;
+ assert(sizeof(BasicLock) == wordSize, "adjust this code");
+ stack_slots += VMRegImpl::slots_per_word;
+ }
+
+ // Space to save return address and FP
+ stack_slots += 2 * VMRegImpl::slots_per_word;
+
+ // Calculate the final stack size taking account of alignment
+ stack_slots = round_to(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
+ int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+ int lock_slot_fp_offset = stack_size - 2 * wordSize -
+ lock_slot_offset * VMRegImpl::stack_slot_size;
+
+ // Unverified entry point
+ address start = __ pc();
+
+ // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
+ const Register receiver = R0; // see receiverOpr()
+ __ load_klass(Rtemp, receiver);
+ __ cmp(Rtemp, Ricklass);
+ Label verified;
+
+ __ b(verified, eq); // jump over alignment no-ops too
+ __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
+ __ align(CodeEntryAlignment);
+
+ // Verified entry point
+ __ bind(verified);
+ int vep_offset = __ pc() - start;
+
+#ifdef AARCH64
+ // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
+ __ nop();
+#endif // AARCH64
+
+ if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+ // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
+ // instead of doing a full VM transition once it's been computed.
+ Label slow_case;
+ const Register obj_reg = R0;
+
+ // Unlike for Object.hashCode, System.identityHashCode is static method and
+ // gets object as argument instead of the receiver.
+ if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
+ assert(method->is_static(), "method should be static");
+ // return 0 for null reference input, return val = R0 = obj_reg = 0
+#ifdef AARCH64
+ Label Continue;
+ __ cbnz(obj_reg, Continue);
+ __ ret();
+ __ bind(Continue);
+#else
+ __ cmp(obj_reg, 0);
+ __ bx(LR, eq);
+#endif
+ }
+
+ __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+
+ assert(markOopDesc::unlocked_value == 1, "adjust this code");
+ __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
+
+ if (UseBiasedLocking) {
+ assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
+ __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
+ }
+
+#ifdef AARCH64
+ __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
+ __ b(slow_case, eq);
+ __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
+ __ ret();
+#else
+ __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
+ __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
+ __ bx(LR, ne);
+#endif // AARCH64
+
+ __ bind(slow_case);
+ }
+
+ // Bang stack pages
+ __ arm_stack_overflow_check(stack_size, Rtemp);
+
+ // Setup frame linkage
+ __ raw_push(FP, LR);
+ __ mov(FP, SP);
+ __ sub_slow(SP, SP, stack_size - 2*wordSize);
+
+ int frame_complete = __ pc() - start;
+
+ OopMapSet* oop_maps = new OopMapSet();
+ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+ const int extra_args = is_static ? 2 : 1;
+ int receiver_offset = -1;
+ int fp_regs_in_arguments = 0;
+
+ for (i = total_in_args; --i >= 0; ) {
+ switch (in_sig_bt[i]) {
+ case T_ARRAY:
+ case T_OBJECT: {
+ VMReg src = in_regs[i].first();
+ VMReg dst = out_regs[i + extra_args].first();
+ if (src->is_stack()) {
+ assert(dst->is_stack(), "must be");
+ assert(i != 0, "Incoming receiver is always in a register");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
+ __ cmp(Rtemp, 0);
+#ifdef AARCH64
+ __ add(Rtemp, FP, reg2offset_in(src));
+ __ csel(Rtemp, ZR, Rtemp, eq);
+#else
+ __ add(Rtemp, FP, reg2offset_in(src), ne);
+#endif // AARCH64
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+ } else {
+ int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
+ __ str(src->as_Register(), Address(SP, offset));
+ map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
+ if ((i == 0) && (!is_static)) {
+ receiver_offset = offset;
+ }
+ oop_handle_offset += VMRegImpl::slots_per_word;
+
+#ifdef AARCH64
+ __ cmp(src->as_Register(), 0);
+ __ add(Rtemp, SP, offset);
+ __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
+ if (dst->is_stack()) {
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ }
+#else
+ if (dst->is_stack()) {
+ __ movs(Rtemp, src->as_Register());
+ __ add(Rtemp, SP, offset, ne);
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ } else {
+ __ movs(dst->as_Register(), src->as_Register());
+ __ add(dst->as_Register(), SP, offset, ne);
+ }
+#endif // AARCH64
+ }
+ }
+
+ case T_VOID:
+ break;
+
+#ifdef AARCH64
+ case T_FLOAT:
+ case T_DOUBLE: {
+ VMReg src = in_regs[i].first();
+ VMReg dst = out_regs[i + extra_args].first();
+ if (src->is_stack()) {
+ assert(dst->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ } else {
+ assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
+ assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
+ fp_regs_in_arguments++;
+ }
+ break;
+ }
+#else // AARCH64
+
+#ifdef __SOFTFP__
+ case T_DOUBLE:
+#endif
+ case T_LONG: {
+ VMReg src_1 = in_regs[i].first();
+ VMReg src_2 = in_regs[i].second();
+ VMReg dst_1 = out_regs[i + extra_args].first();
+ VMReg dst_2 = out_regs[i + extra_args].second();
+#if (ALIGN_WIDE_ARGUMENTS == 0)
+ // C convention can mix a register and a stack slot for a
+ // 64-bits native argument.
+
+ // Note: following code should work independently of whether
+ // the Java calling convention follows C convention or whether
+ // it aligns 64-bit values.
+ if (dst_2->is_Register()) {
+ if (src_1->as_Register() != dst_1->as_Register()) {
+ assert(src_1->as_Register() != dst_2->as_Register() &&
+ src_2->as_Register() != dst_2->as_Register(), "must be");
+ __ mov(dst_2->as_Register(), src_2->as_Register());
+ __ mov(dst_1->as_Register(), src_1->as_Register());
+ } else {
+ assert(src_2->as_Register() == dst_2->as_Register(), "must be");
+ }
+ } else if (src_2->is_Register()) {
+ if (dst_1->is_Register()) {
+ // dst mixes a register and a stack slot
+ assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
+ assert(src_1->as_Register() != dst_1->as_Register(), "must be");
+ __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
+ __ mov(dst_1->as_Register(), src_1->as_Register());
+ } else {
+ // registers to stack slots
+ assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
+ __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
+ __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
+ }
+ } else if (src_1->is_Register()) {
+ if (dst_1->is_Register()) {
+ // src and dst must be R3 + stack slot
+ assert(dst_1->as_Register() == src_1->as_Register(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src_2)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
+ } else {
+ // <R3,stack> -> <stack,stack>
+ assert(dst_2->is_stack() && src_2->is_stack(), "must be");
+ __ ldr(LR, Address(FP, reg2offset_in(src_2)));
+ __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
+ __ str(LR, Address(SP, reg2offset_out(dst_2)));
+ }
+ } else {
+ assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
+ __ ldr(LR, Address(FP, reg2offset_in(src_2)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
+ __ str(LR, Address(SP, reg2offset_out(dst_2)));
+ }
+#else // ALIGN_WIDE_ARGUMENTS
+ if (src_1->is_stack()) {
+ assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
+ __ ldr(LR, Address(FP, reg2offset_in(src_2)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
+ __ str(LR, Address(SP, reg2offset_out(dst_2)));
+ } else if (dst_1->is_stack()) {
+ assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
+ __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
+ __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
+ } else if (src_1->as_Register() == dst_1->as_Register()) {
+ assert(src_2->as_Register() == dst_2->as_Register(), "must be");
+ } else {
+ assert(src_1->as_Register() != dst_2->as_Register() &&
+ src_2->as_Register() != dst_2->as_Register(), "must be");
+ __ mov(dst_2->as_Register(), src_2->as_Register());
+ __ mov(dst_1->as_Register(), src_1->as_Register());
+ }
+#endif // ALIGN_WIDE_ARGUMENTS
+ break;
+ }
+
+#if (!defined __SOFTFP__ && !defined __ABI_HARD__)
+ case T_FLOAT: {
+ VMReg src = in_regs[i].first();
+ VMReg dst = out_regs[i + extra_args].first();
+ if (src->is_stack()) {
+ assert(dst->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ } else if (dst->is_stack()) {
+ __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
+ } else {
+ assert(src->is_FloatRegister() && dst->is_Register(), "must be");
+ __ fmrs(dst->as_Register(), src->as_FloatRegister());
+ }
+ break;
+ }
+
+ case T_DOUBLE: {
+ VMReg src_1 = in_regs[i].first();
+ VMReg src_2 = in_regs[i].second();
+ VMReg dst_1 = out_regs[i + extra_args].first();
+ VMReg dst_2 = out_regs[i + extra_args].second();
+ if (src_1->is_stack()) {
+ assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
+ __ ldr(LR, Address(FP, reg2offset_in(src_2)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
+ __ str(LR, Address(SP, reg2offset_out(dst_2)));
+ } else if (dst_1->is_stack()) {
+ assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
+ __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
+#if (ALIGN_WIDE_ARGUMENTS == 0)
+ } else if (dst_2->is_stack()) {
+ assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
+ // double register must go into R3 + one stack slot
+ __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
+#endif
+ } else {
+ assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
+ __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
+ }
+ break;
+ }
+#endif // __SOFTFP__
+
+#ifdef __ABI_HARD__
+ case T_FLOAT: {
+ VMReg src = in_regs[i].first();
+ VMReg dst = out_regs[i + extra_args].first();
+ if (src->is_stack()) {
+ if (dst->is_stack()) {
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ } else {
+ // C2 Java calling convention does not populate S14 and S15, therefore
+ // those need to be loaded from stack here
+ __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
+ fp_regs_in_arguments++;
+ }
+ } else {
+ assert(src->is_FloatRegister(), "must be");
+ fp_regs_in_arguments++;
+ }
+ break;
+ }
+ case T_DOUBLE: {
+ VMReg src_1 = in_regs[i].first();
+ VMReg src_2 = in_regs[i].second();
+ VMReg dst_1 = out_regs[i + extra_args].first();
+ VMReg dst_2 = out_regs[i + extra_args].second();
+ if (src_1->is_stack()) {
+ if (dst_1->is_stack()) {
+ assert(dst_2->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
+ __ ldr(LR, Address(FP, reg2offset_in(src_2)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
+ __ str(LR, Address(SP, reg2offset_out(dst_2)));
+ } else {
+ // C2 Java calling convention does not populate S14 and S15, therefore
+ // those need to be loaded from stack here
+ __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
+ fp_regs_in_arguments += 2;
+ }
+ } else {
+ assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
+ fp_regs_in_arguments += 2;
+ }
+ break;
+ }
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ default: {
+ assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
+ VMReg src = in_regs[i].first();
+ VMReg dst = out_regs[i + extra_args].first();
+ if (src->is_stack()) {
+ assert(dst->is_stack(), "must be");
+ __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
+ __ str(Rtemp, Address(SP, reg2offset_out(dst)));
+ } else if (dst->is_stack()) {
+ __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
+ } else {
+ assert(src->is_Register() && dst->is_Register(), "must be");
+ __ mov(dst->as_Register(), src->as_Register());
+ }
+ }
+ }
+ }
+
+ // Get Klass mirror
+ int klass_offset = -1;
+ if (is_static) {
+ klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
+ __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
+ __ add(c_rarg1, SP, klass_offset);
+ __ str(Rtemp, Address(SP, klass_offset));
+ map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
+ }
+
+ // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
+ int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
+ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
+ oop_maps->add_gc_map(pc_offset, map);
+
+#ifndef AARCH64
+ // Order last_Java_pc store with the thread state transition (to _thread_in_native)
+ __ membar(MacroAssembler::StoreStore, Rtemp);
+#endif // !AARCH64
+
+ // RedefineClasses() tracing support for obsolete method entry
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+ __ save_caller_save_registers();
+ __ mov(R0, Rthread);
+ __ mov_metadata(R1, method());
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
+ __ restore_caller_save_registers();
+ }
+
+ const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
+ const Register sync_obj = AARCH64_ONLY(R21) NOT_AARCH64(R6);
+ const Register disp_hdr = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
+ const Register tmp = AARCH64_ONLY(R23) NOT_AARCH64(R8);
+
+ Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
+ if (method->is_synchronized()) {
+ // The first argument is a handle to sync object (a class or an instance)
+ __ ldr(sync_obj, Address(R1));
+ // Remember the handle for the unlocking code
+ __ mov(sync_handle, R1);
+
+ if(UseBiasedLocking) {
+ __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
+ }
+
+ const Register mark = tmp;
+#ifdef AARCH64
+ __ sub(disp_hdr, FP, lock_slot_fp_offset);
+ assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
+
+ __ ldr(mark, sync_obj);
+
+ // Test if object is already locked
+ assert(markOopDesc::unlocked_value == 1, "adjust this code");
+ __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
+
+ // Check for recursive lock
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+ __ mov(Rtemp, SP);
+ __ sub(Rtemp, mark, Rtemp);
+ intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
+ Assembler::LogicalImmediate imm(mask, false);
+ __ ands(Rtemp, Rtemp, imm);
+ __ b(slow_lock, ne);
+
+ // Recursive locking: store 0 into a lock record
+ __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
+ __ b(lock_done);
+
+ __ bind(fast_lock);
+ __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
+
+ __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
+#else
+ // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
+ // That would be acceptable as either CAS or slow case path is taken in that case
+
+ __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
+ __ sub(disp_hdr, FP, lock_slot_fp_offset);
+ __ tst(mark, markOopDesc::unlocked_value);
+ __ b(fast_lock, ne);
+
+ // Check for recursive lock
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+ // Check independently the low bits and the distance to SP
+ // -1- test low 2 bits
+ __ movs(Rtemp, AsmOperand(mark, lsl, 30));
+ // -2- test (hdr - SP) if the low two bits are 0
+ __ sub(Rtemp, mark, SP, eq);
+ __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
+ // If still 'eq' then recursive locking OK: set displaced header to 0
+ __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
+ __ b(lock_done, eq);
+ __ b(slow_lock);
+
+ __ bind(fast_lock);
+ __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
+
+ __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
+#endif // AARCH64
+
+ __ bind(lock_done);
+ }
+
+ // Get JNIEnv*
+ __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
+
+ // Perform thread state transition
+ __ mov(Rtemp, _thread_in_native);
+#ifdef AARCH64
+ // stlr instruction is used to force all preceding writes to be observed prior to thread state change
+ __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
+ __ stlr_w(Rtemp, Rtemp2);
+#else
+ __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+#endif // AARCH64
+
+ // Finally, call the native method
+ __ call(method->native_function());
+
+ // Set FPSCR/FPCR to a known state
+ if (AlwaysRestoreFPU) {
+ __ restore_default_fp_mode();
+ }
+
+ // Do a safepoint check while thread is in transition state
+ InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
+ Label call_safepoint_runtime, return_to_java;
+ __ mov(Rtemp, _thread_in_native_trans);
+ __ ldr_literal(R2, safepoint_state);
+ __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+
+ // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
+
+ __ ldr_s32(R2, Address(R2));
+ __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
+ __ cmp(R2, SafepointSynchronize::_not_synchronized);
+ __ cond_cmp(R3, 0, eq);
+ __ b(call_safepoint_runtime, ne);
+ __ bind(return_to_java);
+
+ // Perform thread state transition and reguard stack yellow pages if needed
+ Label reguard, reguard_done;
+ __ mov(Rtemp, _thread_in_Java);
+ __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
+ __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+
+ __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
+ __ b(reguard, eq);
+ __ bind(reguard_done);
+
+ Label slow_unlock, unlock_done, retry;
+ if (method->is_synchronized()) {
+ __ ldr(sync_obj, Address(sync_handle));
+
+ if(UseBiasedLocking) {
+ __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
+ // disp_hdr may not have been saved on entry with biased locking
+ __ sub(disp_hdr, FP, lock_slot_fp_offset);
+ }
+
+ // See C1_MacroAssembler::unlock_object() for more comments
+ __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
+ __ cbz(R2, unlock_done);
+
+ __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
+
+ __ bind(unlock_done);
+ }
+
+ // Set last java frame and handle block to zero
+ __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
+ __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
+
+#ifdef AARCH64
+ __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
+ if (CheckJNICalls) {
+ __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
+ }
+
+
+ switch (ret_type) {
+ case T_BOOLEAN:
+ __ tst(R0, 0xff);
+ __ cset(R0, ne);
+ break;
+ case T_CHAR : __ zero_extend(R0, R0, 16); break;
+ case T_BYTE : __ sign_extend(R0, R0, 8); break;
+ case T_SHORT : __ sign_extend(R0, R0, 16); break;
+ case T_INT : // fall through
+ case T_LONG : // fall through
+ case T_VOID : // fall through
+ case T_FLOAT : // fall through
+ case T_DOUBLE : /* nothing to do */ break;
+ case T_OBJECT : // fall through
+ case T_ARRAY : {
+ Label L;
+ __ cbz(R0, L);
+ __ ldr(R0, Address(R0));
+ __ verify_oop(R0);
+ __ bind(L);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+#else
+ __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
+ if (CheckJNICalls) {
+ __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
+ }
+
+ // Unhandle the result
+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+ __ cmp(R0, 0);
+ __ ldr(R0, Address(R0), ne);
+ }
+#endif // AARCH64
+
+ // Any exception pending?
+ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
+ __ mov(SP, FP);
+
+#ifdef AARCH64
+ Label except;
+ __ cbnz(Rtemp, except);
+ __ raw_pop(FP, LR);
+ __ ret();
+
+ __ bind(except);
+ // Pop the frame and forward the exception. Rexception_pc contains return address.
+ __ raw_pop(FP, Rexception_pc);
+#else
+ __ cmp(Rtemp, 0);
+ // Pop the frame and return if no exception pending
+ __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
+ // Pop the frame and forward the exception. Rexception_pc contains return address.
+ __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
+ __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
+#endif // AARCH64
+ __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
+
+ // Safepoint operation and/or pending suspend request is in progress.
+ // Save the return values and call the runtime function by hand.
+ __ bind(call_safepoint_runtime);
+ push_result_registers(masm, ret_type);
+ __ mov(R0, Rthread);
+ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+ pop_result_registers(masm, ret_type);
+ __ b(return_to_java);
+
+ __ bind_literal(safepoint_state);
+
+ // Reguard stack pages. Save native results around a call to C runtime.
+ __ bind(reguard);
+ push_result_registers(masm, ret_type);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+ pop_result_registers(masm, ret_type);
+ __ b(reguard_done);
+
+ if (method->is_synchronized()) {
+ // Locking slow case
+ if(UseBiasedLocking) {
+ __ bind(slow_lock_biased);
+ __ sub(disp_hdr, FP, lock_slot_fp_offset);
+ }
+
+ __ bind(slow_lock);
+
+ push_param_registers(masm, fp_regs_in_arguments);
+
+ // last_Java_frame is already set, so do call_VM manually; no exception can occur
+ __ mov(R0, sync_obj);
+ __ mov(R1, disp_hdr);
+ __ mov(R2, Rthread);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
+
+ pop_param_registers(masm, fp_regs_in_arguments);
+
+ __ b(lock_done);
+
+ // Unlocking slow case
+ __ bind(slow_unlock);
+
+ push_result_registers(masm, ret_type);
+
+ // Clear pending exception before reentering VM.
+ // Can store the oop in register since it is a leaf call.
+ assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
+ __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
+ Register zero = __ zero_register(Rtemp);
+ __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
+ __ mov(R0, sync_obj);
+ __ mov(R1, disp_hdr);
+ __ mov(R2, Rthread);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
+ __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
+
+ pop_result_registers(masm, ret_type);
+
+ __ b(unlock_done);
+ }
+
+ __ flush();
+ return nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ vep_offset,
+ frame_complete,
+ stack_slots / VMRegImpl::slots_per_word,
+ in_ByteSize(is_static ? klass_offset : receiver_offset),
+ in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
+ oop_maps);
+}
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+ int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+#ifdef AARCH64
+ extra_locals_size = round_to(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
+#endif // AARCH64
+ return extra_locals_size;
+}
+
+
+uint SharedRuntime::out_preserve_stack_slots() {
+ return 0;
+}
+
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+ ResourceMark rm;
+#ifdef AARCH64
+ CodeBuffer buffer("deopt_blob", 1024+256, 1);
+#else
+ CodeBuffer buffer("deopt_blob", 1024, 1024);
+#endif
+ int frame_size_in_words;
+ OopMapSet* oop_maps;
+ int reexecute_offset;
+ int exception_in_tls_offset;
+ int exception_offset;
+
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ Label cont;
+ const Register Rkind = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
+ const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
+ const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
+ assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
+
+ address start = __ pc();
+
+ oop_maps = new OopMapSet();
+ // LR saved by caller (can be live in c2 method)
+
+ // A deopt is a case where LR may be live in the c2 nmethod. So it's
+ // not possible to call the deopt blob from the nmethod and pass the
+ // address of the deopt handler of the nmethod in LR. What happens
+ // now is that the caller of the deopt blob pushes the current
+ // address so the deopt blob doesn't have to do it. This way LR can
+ // be preserved, contains the live value from the nmethod and is
+ // saved at R14/R30_offset here.
+ OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
+ __ mov(Rkind, Deoptimization::Unpack_deopt);
+ __ b(cont);
+
+ exception_offset = __ pc() - start;
+
+ // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
+ // exception_in_tls_offset entry point.
+ __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
+ // Force return value to NULL to avoid confusing the escape analysis
+ // logic. Everything is dead here anyway.
+ __ mov(R0, 0);
+
+ exception_in_tls_offset = __ pc() - start;
+
+ // Exception data is in JavaThread structure
+ // Patch the return address of the current frame
+ __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
+ (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
+ {
+ const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
+ __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
+ }
+ __ mov(Rkind, Deoptimization::Unpack_exception);
+ __ b(cont);
+
+ reexecute_offset = __ pc() - start;
+
+ (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
+ __ mov(Rkind, Deoptimization::Unpack_reexecute);
+
+ // Calculate UnrollBlock and save the result in Rublock
+ __ bind(cont);
+ __ mov(R0, Rthread);
+ __ mov(R1, Rkind);
+
+ int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
+ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
+ __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+ oop_maps->add_gc_map(pc_offset, map);
+ __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
+
+ __ mov(Rublock, R0);
+
+ // Reload Rkind from the UnrollBlock (might have changed)
+ __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+ Label noException;
+ __ cmp_32(Rkind, Deoptimization::Unpack_exception); // Was exception pending?
+ __ b(noException, ne);
+ // handle exception case
+#ifdef ASSERT
+ // assert that exception_pc is zero in tls
+ { Label L;
+ __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
+ __ cbz(Rexception_pc, L);
+ __ stop("exception pc should be null");
+ __ bind(L);
+ }
+#endif
+ __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
+ __ verify_oop(Rexception_obj);
+ {
+ const Register Rzero = __ zero_register(Rtemp);
+ __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
+ }
+
+ __ bind(noException);
+
+ // This frame is going away. Fetch return value, so we can move it to
+ // a new frame.
+ __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
+#ifndef AARCH64
+ __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
+#endif // !AARCH64
+#ifndef __SOFTFP__
+ __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
+#endif
+ // pop frame
+ __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
+
+ // Set initial stack state before pushing interpreter frames
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+ __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+ __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+#ifdef AARCH64
+ // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
+ // They are needed for correct stack walking during stack overflow handling.
+ // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
+ __ sub(Rtemp, Rtemp, 2*wordSize);
+ __ add(SP, SP, Rtemp, ex_uxtx);
+ __ raw_pop(FP, LR);
+
+#ifdef ASSERT
+ { Label L;
+ __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+ __ cmp(FP, Rtemp);
+ __ b(L, eq);
+ __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
+ __ bind(L);
+ }
+ { Label L;
+ __ ldr(Rtemp, Address(R2));
+ __ cmp(LR, Rtemp);
+ __ b(L, eq);
+ __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+#else
+ __ add(SP, SP, Rtemp);
+#endif // AARCH64
+
+#ifdef ASSERT
+ // Compilers generate code that bang the stack by as much as the
+ // interpreter would need. So this stack banging should never
+ // trigger a fault. Verify that it does not on non product builds.
+ // See if it is enough stack to push deoptimized frames
+ if (UseStackBanging) {
+#ifndef AARCH64
+ // The compiled method that we are deoptimizing was popped from the stack.
+ // If the stack bang results in a stack overflow, we don't return to the
+ // method that is being deoptimized. The stack overflow exception is
+ // propagated to the caller of the deoptimized method. Need to get the pc
+ // from the caller in LR and restore FP.
+ __ ldr(LR, Address(R2, 0));
+ __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+#endif // !AARCH64
+ __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ arm_stack_overflow_check(R8, Rtemp);
+ }
+#endif
+ __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+
+#ifndef AARCH64
+ // Pick up the initial fp we should save
+ // XXX Note: was ldr(FP, Address(FP));
+
+ // The compiler no longer uses FP as a frame pointer for the
+ // compiled code. It can be used by the allocator in C2 or to
+ // memorize the original SP for JSR292 call sites.
+
+ // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
+ // Deoptimization::fetch_unroll_info computes the right FP value and
+ // stores it in Rublock.initial_info. This has been activated for ARM.
+ __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+#endif // !AARCH64
+
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
+ __ mov(Rsender, SP);
+#ifdef AARCH64
+ __ sub(SP, SP, Rtemp, ex_uxtx);
+#else
+ __ sub(SP, SP, Rtemp);
+#endif // AARCH64
+
+ // Push interpreter frames in a loop
+ Label loop;
+ __ bind(loop);
+ __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
+ __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
+
+ __ raw_push(FP, LR); // create new frame
+ __ mov(FP, SP);
+ __ sub(Rtemp, Rtemp, 2*wordSize);
+
+#ifdef AARCH64
+ __ sub(SP, SP, Rtemp, ex_uxtx);
+#else
+ __ sub(SP, SP, Rtemp);
+#endif // AARCH64
+
+ __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+#ifdef AARCH64
+ __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
+#else
+ __ mov(LR, 0);
+ __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ subs(R8, R8, 1); // decrement counter
+ __ mov(Rsender, SP);
+ __ b(loop, ne);
+
+ // Re-push self-frame
+ __ ldr(LR, Address(R2));
+ __ raw_push(FP, LR);
+ __ mov(FP, SP);
+ __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
+
+ // Restore frame locals after moving the frame
+ __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
+#ifndef AARCH64
+ __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
+#endif // !AARCH64
+
+#ifndef __SOFTFP__
+ __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
+#endif // !__SOFTFP__
+
+#ifndef AARCH64
+#ifdef ASSERT
+ // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
+ { Label L;
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+ __ cmp_32(Rkind, Rtemp);
+ __ b(L, eq);
+ __ stop("Rkind was overwritten");
+ __ bind(L);
+ }
+#endif
+#endif
+
+ // Call unpack_frames with proper arguments
+ __ mov(R0, Rthread);
+ __ mov(R1, Rkind);
+
+ pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
+ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
+ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+ oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
+ __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
+
+ // Collect return values, pop self-frame and jump to interpreter
+ __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
+#ifndef AARCH64
+ __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
+#endif // !AARCH64
+ // Interpreter floats controlled by __SOFTFP__, but compiler
+ // float return value registers controlled by __ABI_HARD__
+ // This matters for vfp-sflt builds.
+#ifndef __SOFTFP__
+ // Interpreter hard float
+#ifdef __ABI_HARD__
+ // Compiler float return value in FP registers
+ __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
+#else
+ // Compiler float return value in integer registers,
+ // copy to D0 for interpreter (S0 <-- R0)
+ __ fmdrr(D0_tos, R0, R1);
+#endif
+#endif // !__SOFTFP__
+ __ mov(SP, FP);
+
+#ifdef AARCH64
+ __ raw_pop(FP, LR);
+ __ ret();
+#else
+ __ pop(RegisterSet(FP) | RegisterSet(PC));
+#endif // AARCH64
+
+ __ flush();
+
+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
+ reexecute_offset, frame_size_in_words);
+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+#ifdef COMPILER2
+
+//------------------------------generate_uncommon_trap_blob--------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_uncommon_trap_blob() {
+ // allocate space for the code
+ ResourceMark rm;
+
+ // setup code generation tools
+ int pad = VerifyThread ? 512 : 0;
+#ifdef _LP64
+ CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
+#else
+ // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
+ // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
+ CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
+#endif
+ // bypassed when code generation useless
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
+ const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
+ assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
+
+ //
+ // This is the entry point for all traps the compiler takes when it thinks
+ // it cannot handle further execution of compilation code. The frame is
+ // deoptimized in these cases and converted into interpreter frames for
+ // execution
+ // The steps taken by this frame are as follows:
+ // - push a fake "unpack_frame"
+ // - call the C routine Deoptimization::uncommon_trap (this function
+ // packs the current compiled frame into vframe arrays and returns
+ // information about the number and size of interpreter frames which
+ // are equivalent to the frame which is being deoptimized)
+ // - deallocate the "unpack_frame"
+ // - deallocate the deoptimization frame
+ // - in a loop using the information returned in the previous step
+ // push interpreter frames;
+ // - create a dummy "unpack_frame"
+ // - call the C routine: Deoptimization::unpack_frames (this function
+ // lays out values on the interpreter frame which was just created)
+ // - deallocate the dummy unpack_frame
+ // - return to the interpreter entry point
+ //
+ // Refer to the following methods for more information:
+ // - Deoptimization::uncommon_trap
+ // - Deoptimization::unpack_frame
+
+ // the unloaded class index is in R0 (first parameter to this blob)
+
+ __ raw_push(FP, LR);
+ __ set_last_Java_frame(SP, FP, false, Rtemp);
+ __ mov(R2, Deoptimization::Unpack_uncommon_trap);
+ __ mov(R1, R0);
+ __ mov(R0, Rthread);
+ __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
+ __ mov(Rublock, R0);
+ __ reset_last_Java_frame(Rtemp);
+ __ raw_pop(FP, LR);
+
+#ifdef ASSERT
+ { Label L;
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+ __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
+ __ b(L, eq);
+ __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
+ __ bind(L);
+ }
+#endif
+
+
+ // Set initial stack state before pushing interpreter frames
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+ __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+ __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+#ifdef AARCH64
+ // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
+ // They are needed for correct stack walking during stack overflow handling.
+ // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
+ __ sub(Rtemp, Rtemp, 2*wordSize);
+ __ add(SP, SP, Rtemp, ex_uxtx);
+ __ raw_pop(FP, LR);
+
+#ifdef ASSERT
+ { Label L;
+ __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+ __ cmp(FP, Rtemp);
+ __ b(L, eq);
+ __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
+ __ bind(L);
+ }
+ { Label L;
+ __ ldr(Rtemp, Address(R2));
+ __ cmp(LR, Rtemp);
+ __ b(L, eq);
+ __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+#else
+ __ add(SP, SP, Rtemp);
+#endif //AARCH64
+
+ // See if it is enough stack to push deoptimized frames
+#ifdef ASSERT
+ // Compilers generate code that bang the stack by as much as the
+ // interpreter would need. So this stack banging should never
+ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+#ifndef AARCH64
+ // The compiled method that we are deoptimizing was popped from the stack.
+ // If the stack bang results in a stack overflow, we don't return to the
+ // method that is being deoptimized. The stack overflow exception is
+ // propagated to the caller of the deoptimized method. Need to get the pc
+ // from the caller in LR and restore FP.
+ __ ldr(LR, Address(R2, 0));
+ __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+#endif // !AARCH64
+ __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ arm_stack_overflow_check(R8, Rtemp);
+ }
+#endif
+ __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+ __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
+ __ mov(Rsender, SP);
+#ifdef AARCH64
+ __ sub(SP, SP, Rtemp, ex_uxtx);
+#else
+ __ sub(SP, SP, Rtemp);
+#endif
+#ifndef AARCH64
+ // __ ldr(FP, Address(FP));
+ __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+#endif // AARCH64
+
+ // Push interpreter frames in a loop
+ Label loop;
+ __ bind(loop);
+ __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc
+ __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size
+
+ __ raw_push(FP, LR); // create new frame
+ __ mov(FP, SP);
+ __ sub(Rtemp, Rtemp, 2*wordSize);
+
+#ifdef AARCH64
+ __ sub(SP, SP, Rtemp, ex_uxtx);
+#else
+ __ sub(SP, SP, Rtemp);
+#endif // AARCH64
+
+ __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+#ifdef AARCH64
+ __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
+#else
+ __ mov(LR, 0);
+ __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+ __ subs(R8, R8, 1); // decrement counter
+ __ mov(Rsender, SP);
+ __ b(loop, ne);
+
+ // Re-push self-frame
+ __ ldr(LR, Address(R2));
+ __ raw_push(FP, LR);
+ __ mov(FP, SP);
+
+ // Call unpack_frames with proper arguments
+ __ mov(R0, Rthread);
+ __ mov(R1, Deoptimization::Unpack_uncommon_trap);
+ __ set_last_Java_frame(SP, FP, false, Rtemp);
+ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
+ // oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
+ __ reset_last_Java_frame(Rtemp);
+
+ __ mov(SP, FP);
+#ifdef AARCH64
+ __ raw_pop(FP, LR);
+ __ ret();
+#else
+ __ pop(RegisterSet(FP) | RegisterSet(PC));
+#endif
+
+ masm->flush();
+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
+}
+
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// setup oopmap, and calls safepoint code to stop the compiled code for
+// a safepoint.
+//
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+ ResourceMark rm;
+ CodeBuffer buffer("handler_blob", 256, 256);
+ int frame_size_words;
+ OopMapSet* oop_maps;
+
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ address start = __ pc();
+ oop_maps = new OopMapSet();
+
+ if (!cause_return) {
+#ifdef AARCH64
+ __ raw_push(LR, LR);
+#else
+ __ sub(SP, SP, 4); // make room for LR which may still be live
+ // here if we are coming from a c2 method
+#endif // AARCH64
+ }
+
+ OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
+ if (!cause_return) {
+ // update saved PC with correct value
+ // need 2 steps because LR can be live in c2 method
+ __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
+ __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
+ }
+
+ __ mov(R0, Rthread);
+ int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
+ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
+ __ call(call_ptr);
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+ oop_maps->add_gc_map(pc_offset, map);
+ __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
+
+ // Check for pending exception
+ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
+ __ cmp(Rtemp, 0);
+
+#ifdef AARCH64
+ RegisterSaver::restore_live_registers(masm, cause_return);
+ Register ret_addr = cause_return ? LR : Rtemp;
+ if (!cause_return) {
+ __ raw_pop(FP, ret_addr);
+ }
+
+ Label throw_exception;
+ __ b(throw_exception, ne);
+ __ br(ret_addr);
+
+ __ bind(throw_exception);
+ __ mov(Rexception_pc, ret_addr);
+#else // AARCH64
+ if (!cause_return) {
+ RegisterSaver::restore_live_registers(masm, false);
+ __ pop(PC, eq);
+ __ pop(Rexception_pc);
+ } else {
+ RegisterSaver::restore_live_registers(masm);
+ __ bx(LR, eq);
+ __ mov(Rexception_pc, LR);
+ }
+#endif // AARCH64
+
+ __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
+
+ __ flush();
+
+ return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
+}
+
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+ ResourceMark rm;
+ CodeBuffer buffer(name, 1000, 512);
+ int frame_size_words;
+ OopMapSet *oop_maps;
+ int frame_complete;
+
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ Label pending_exception;
+
+ int start = __ offset();
+
+ oop_maps = new OopMapSet();
+ OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
+
+ frame_complete = __ offset();
+
+ __ mov(R0, Rthread);
+
+ int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
+ assert(start == 0, "warning: start differs from code_begin");
+ __ call(destination);
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+ oop_maps->add_gc_map(pc_offset, map);
+ __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
+
+ __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
+ __ cbnz(R1, pending_exception);
+
+ // Overwrite saved register values
+
+ // Place metadata result of VM call into Rmethod
+ __ get_vm_result_2(R1, Rtemp);
+ __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
+
+ // Place target address (VM call result) into Rtemp
+ __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
+
+ RegisterSaver::restore_live_registers(masm);
+ __ jump(Rtemp);
+
+ __ bind(pending_exception);
+
+ RegisterSaver::restore_live_registers(masm);
+ const Register Rzero = __ zero_register(Rtemp);
+ __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
+ __ mov(Rexception_pc, LR);
+ __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
+
+ __ flush();
+
+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/stubGenerator_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,4510 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_arm.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/method.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// -------------------------------------------------------------------------------------------------------------------------
+// Stub Code definitions
+
+// Platform dependent parameters for array copy stubs
+
+// Note: we have noticed a huge change in behavior on a microbenchmark
+// from platform to platform depending on the configuration.
+
+// Instead of adding a series of command line options (which
+// unfortunately have to be done in the shared file and cannot appear
+// only in the ARM port), the tested result are hard-coded here in a set
+// of options, selected by specifying 'ArmCopyPlatform'
+
+// Currently, this 'platform' is hardcoded to a value that is a good
+// enough trade-off. However, one can easily modify this file to test
+// the hard-coded configurations or create new ones. If the gain is
+// significant, we could decide to either add command line options or
+// add code to automatically choose a configuration.
+
+// see comments below for the various configurations created
+#define DEFAULT_ARRAYCOPY_CONFIG 0
+#define TEGRA2_ARRAYCOPY_CONFIG 1
+#define IMX515_ARRAYCOPY_CONFIG 2
+
+// Hard coded choices (XXX: could be changed to a command line option)
+#define ArmCopyPlatform DEFAULT_ARRAYCOPY_CONFIG
+
+#ifdef AARCH64
+#define ArmCopyCacheLineSize 64
+#else
+#define ArmCopyCacheLineSize 32 // not worth optimizing to 64 according to measured gains
+#endif // AARCH64
+
+// TODO-AARCH64: tune and revise AArch64 arraycopy optimizations
+
+// configuration for each kind of loop
+typedef struct {
+ int pld_distance; // prefetch distance (0 => no prefetch, <0: prefetch_before);
+#ifndef AARCH64
+ bool split_ldm; // if true, split each STM in STMs with fewer registers
+ bool split_stm; // if true, split each LTM in LTMs with fewer registers
+#endif // !AARCH64
+} arraycopy_loop_config;
+
+// configuration for all loops
+typedef struct {
+ // const char *description;
+ arraycopy_loop_config forward_aligned;
+ arraycopy_loop_config backward_aligned;
+ arraycopy_loop_config forward_shifted;
+ arraycopy_loop_config backward_shifted;
+} arraycopy_platform_config;
+
+// configured platforms
+static arraycopy_platform_config arraycopy_configurations[] = {
+ // configuration parameters for arraycopy loops
+#ifdef AARCH64
+ {
+ {-256 }, // forward aligned
+ {-128 }, // backward aligned
+ {-256 }, // forward shifted
+ {-128 } // backward shifted
+ }
+#else
+
+ // Configurations were chosen based on manual analysis of benchmark
+ // results, minimizing overhead with respect to best results on the
+ // different test cases.
+
+ // Prefetch before is always favored since it avoids dirtying the
+ // cache uselessly for small copies. Code for prefetch after has
+ // been kept in case the difference is significant for some
+ // platforms but we might consider dropping it.
+
+ // distance, ldm, stm
+ {
+ // default: tradeoff tegra2/imx515/nv-tegra2,
+ // Notes on benchmarking:
+ // - not far from optimal configuration on nv-tegra2
+ // - within 5% of optimal configuration except for backward aligned on IMX
+ // - up to 40% from optimal configuration for backward shifted and backward align for tegra2
+ // but still on par with the operating system copy
+ {-256, true, true }, // forward aligned
+ {-256, true, true }, // backward aligned
+ {-256, false, false }, // forward shifted
+ {-256, true, true } // backward shifted
+ },
+ {
+ // configuration tuned on tegra2-4.
+ // Warning: should not be used on nv-tegra2 !
+ // Notes:
+ // - prefetch after gives 40% gain on backward copies on tegra2-4,
+ // resulting in better number than the operating system
+ // copy. However, this can lead to a 300% loss on nv-tegra and has
+ // more impact on the cache (fetches futher than what is
+ // copied). Use this configuration with care, in case it improves
+ // reference benchmarks.
+ {-256, true, true }, // forward aligned
+ {96, false, false }, // backward aligned
+ {-256, false, false }, // forward shifted
+ {96, false, false } // backward shifted
+ },
+ {
+ // configuration tuned on imx515
+ // Notes:
+ // - smaller prefetch distance is sufficient to get good result and might be more stable
+ // - refined backward aligned options within 5% of optimal configuration except for
+ // tests were the arrays fit in the cache
+ {-160, false, false }, // forward aligned
+ {-160, false, false }, // backward aligned
+ {-160, false, false }, // forward shifted
+ {-160, true, true } // backward shifted
+ }
+#endif // AARCH64
+};
+
+class StubGenerator: public StubCodeGenerator {
+
+#ifdef PRODUCT
+#define inc_counter_np(a,b,c) ((void)0)
+#else
+#define inc_counter_np(counter, t1, t2) \
+ BLOCK_COMMENT("inc_counter " #counter); \
+ __ inc_counter(&counter, t1, t2);
+#endif
+
+ private:
+
+ address generate_call_stub(address& return_address) {
+ StubCodeMark mark(this, "StubRoutines", "call_stub");
+ address start = __ pc();
+
+#ifdef AARCH64
+ const int saved_regs_size = 192;
+
+ __ stp(FP, LR, Address(SP, -saved_regs_size, pre_indexed));
+ __ mov(FP, SP);
+
+ int sp_offset = 16;
+ assert(frame::entry_frame_call_wrapper_offset * wordSize == sp_offset, "adjust this code");
+ __ stp(R0, ZR, Address(SP, sp_offset)); sp_offset += 16;
+
+ const int saved_result_and_result_type_offset = sp_offset;
+ __ stp(R1, R2, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp(R19, R20, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp(R21, R22, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp(R23, R24, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp(R25, R26, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp(R27, R28, Address(SP, sp_offset)); sp_offset += 16;
+
+ __ stp_d(V8, V9, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16;
+ __ stp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16;
+ assert (sp_offset == saved_regs_size, "adjust this code");
+
+ __ mov(Rmethod, R3);
+ __ mov(Rthread, R7);
+ __ reinit_heapbase();
+
+ { // Pass parameters
+ Label done_parameters, pass_parameters;
+
+ __ mov(Rparams, SP);
+ __ cbz_w(R6, done_parameters);
+
+ __ sub(Rtemp, SP, R6, ex_uxtw, LogBytesPerWord);
+ __ align_reg(SP, Rtemp, StackAlignmentInBytes);
+ __ add(Rparams, SP, R6, ex_uxtw, LogBytesPerWord);
+
+ __ bind(pass_parameters);
+ __ subs_w(R6, R6, 1);
+ __ ldr(Rtemp, Address(R5, wordSize, post_indexed));
+ __ str(Rtemp, Address(Rparams, -wordSize, pre_indexed));
+ __ b(pass_parameters, ne);
+
+ __ bind(done_parameters);
+
+#ifdef ASSERT
+ {
+ Label L;
+ __ cmp(SP, Rparams);
+ __ b(L, eq);
+ __ stop("SP does not match Rparams");
+ __ bind(L);
+ }
+#endif
+ }
+
+ __ mov(Rsender_sp, SP);
+ __ blr(R4);
+ return_address = __ pc();
+
+ __ mov(SP, FP);
+
+ __ ldp(R1, R2, Address(SP, saved_result_and_result_type_offset));
+
+ { // Handle return value
+ Label cont;
+ __ str(R0, Address(R1));
+
+ __ cmp_w(R2, T_DOUBLE);
+ __ ccmp_w(R2, T_FLOAT, Assembler::flags_for_condition(eq), ne);
+ __ b(cont, ne);
+
+ __ str_d(V0, Address(R1));
+ __ bind(cont);
+ }
+
+ sp_offset = saved_result_and_result_type_offset + 16;
+ __ ldp(R19, R20, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp(R21, R22, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp(R23, R24, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp(R25, R26, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp(R27, R28, Address(SP, sp_offset)); sp_offset += 16;
+
+ __ ldp_d(V8, V9, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16;
+ __ ldp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16;
+ assert (sp_offset == saved_regs_size, "adjust this code");
+
+ __ ldp(FP, LR, Address(SP, saved_regs_size, post_indexed));
+ __ ret();
+
+#else // AARCH64
+
+ assert(frame::entry_frame_call_wrapper_offset == 0, "adjust this code");
+
+ __ mov(Rtemp, SP);
+ __ push(RegisterSet(FP) | RegisterSet(LR));
+#ifndef __SOFTFP__
+ __ fstmdbd(SP, FloatRegisterSet(D8, 8), writeback);
+#endif
+ __ stmdb(SP, RegisterSet(R0, R2) | RegisterSet(R4, R6) | RegisterSet(R8, R10) | altFP_7_11, writeback);
+ __ mov(Rmethod, R3);
+ __ ldmia(Rtemp, RegisterSet(R1, R3) | Rthread); // stacked arguments
+
+ // XXX: TODO
+ // Would be better with respect to native tools if the following
+ // setting of FP was changed to conform to the native ABI, with FP
+ // pointing to the saved FP slot (and the corresponding modifications
+ // for entry_frame_call_wrapper_offset and frame::real_fp).
+ __ mov(FP, SP);
+
+ {
+ Label no_parameters, pass_parameters;
+ __ cmp(R3, 0);
+ __ b(no_parameters, eq);
+
+ __ bind(pass_parameters);
+ __ ldr(Rtemp, Address(R2, wordSize, post_indexed)); // Rtemp OK, unused and scratchable
+ __ subs(R3, R3, 1);
+ __ push(Rtemp);
+ __ b(pass_parameters, ne);
+ __ bind(no_parameters);
+ }
+
+ __ mov(Rsender_sp, SP);
+ __ blx(R1);
+ return_address = __ pc();
+
+ __ add(SP, FP, wordSize); // Skip link to JavaCallWrapper
+ __ pop(RegisterSet(R2, R3));
+#ifndef __ABI_HARD__
+ __ cmp(R3, T_LONG);
+ __ cmp(R3, T_DOUBLE, ne);
+ __ str(R0, Address(R2));
+ __ str(R1, Address(R2, wordSize), eq);
+#else
+ Label cont, l_float, l_double;
+
+ __ cmp(R3, T_DOUBLE);
+ __ b(l_double, eq);
+
+ __ cmp(R3, T_FLOAT);
+ __ b(l_float, eq);
+
+ __ cmp(R3, T_LONG);
+ __ str(R0, Address(R2));
+ __ str(R1, Address(R2, wordSize), eq);
+ __ b(cont);
+
+
+ __ bind(l_double);
+ __ fstd(D0, Address(R2));
+ __ b(cont);
+
+ __ bind(l_float);
+ __ fsts(S0, Address(R2));
+
+ __ bind(cont);
+#endif
+
+ __ pop(RegisterSet(R4, R6) | RegisterSet(R8, R10) | altFP_7_11);
+#ifndef __SOFTFP__
+ __ fldmiad(SP, FloatRegisterSet(D8, 8), writeback);
+#endif
+ __ pop(RegisterSet(FP) | RegisterSet(PC));
+
+#endif // AARCH64
+ return start;
+ }
+
+
+ // (in) Rexception_obj: exception oop
+ address generate_catch_exception() {
+ StubCodeMark mark(this, "StubRoutines", "catch_exception");
+ address start = __ pc();
+
+ __ str(Rexception_obj, Address(Rthread, Thread::pending_exception_offset()));
+ __ b(StubRoutines::_call_stub_return_address);
+
+ return start;
+ }
+
+
+ // (in) Rexception_pc: return address
+ address generate_forward_exception() {
+ StubCodeMark mark(this, "StubRoutines", "forward exception");
+ address start = __ pc();
+
+ __ mov(c_rarg0, Rthread);
+ __ mov(c_rarg1, Rexception_pc);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+ SharedRuntime::exception_handler_for_return_address),
+ c_rarg0, c_rarg1);
+ __ ldr(Rexception_obj, Address(Rthread, Thread::pending_exception_offset()));
+ const Register Rzero = __ zero_register(Rtemp); // Rtemp OK (cleared by above call)
+ __ str(Rzero, Address(Rthread, Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+ // make sure exception is set
+ { Label L;
+ __ cbnz(Rexception_obj, L);
+ __ stop("StubRoutines::forward exception: no pending exception (2)");
+ __ bind(L);
+ }
+#endif
+
+ // Verify that there is really a valid exception in RAX.
+ __ verify_oop(Rexception_obj);
+
+ __ jump(R0); // handler is returned in R0 by runtime function
+ return start;
+ }
+
+
+#ifndef AARCH64
+
+ // Integer division shared routine
+ // Input:
+ // R0 - dividend
+ // R2 - divisor
+ // Output:
+ // R0 - remainder
+ // R1 - quotient
+ // Destroys:
+ // R2
+ // LR
+ address generate_idiv_irem() {
+ Label positive_arguments, negative_or_zero, call_slow_path;
+ Register dividend = R0;
+ Register divisor = R2;
+ Register remainder = R0;
+ Register quotient = R1;
+ Register tmp = LR;
+ assert(dividend == remainder, "must be");
+
+ address start = __ pc();
+
+ // Check for special cases: divisor <= 0 or dividend < 0
+ __ cmp(divisor, 0);
+ __ orrs(quotient, dividend, divisor, ne);
+ __ b(negative_or_zero, le);
+
+ __ bind(positive_arguments);
+ // Save return address on stack to free one extra register
+ __ push(LR);
+ // Approximate the mamximum order of the quotient
+ __ clz(tmp, dividend);
+ __ clz(quotient, divisor);
+ __ subs(tmp, quotient, tmp);
+ __ mov(quotient, 0);
+ // Jump to the appropriate place in the unrolled loop below
+ __ ldr(PC, Address(PC, tmp, lsl, 2), pl);
+ // If divisor is greater than dividend, return immediately
+ __ pop(PC);
+
+ // Offset table
+ Label offset_table[32];
+ int i;
+ for (i = 0; i <= 31; i++) {
+ __ emit_address(offset_table[i]);
+ }
+
+ // Unrolled loop of 32 division steps
+ for (i = 31; i >= 0; i--) {
+ __ bind(offset_table[i]);
+ __ cmp(remainder, AsmOperand(divisor, lsl, i));
+ __ sub(remainder, remainder, AsmOperand(divisor, lsl, i), hs);
+ __ add(quotient, quotient, 1 << i, hs);
+ }
+ __ pop(PC);
+
+ __ bind(negative_or_zero);
+ // Find the combination of argument signs and jump to corresponding handler
+ __ andr(quotient, dividend, 0x80000000, ne);
+ __ orr(quotient, quotient, AsmOperand(divisor, lsr, 31), ne);
+ __ add(PC, PC, AsmOperand(quotient, ror, 26), ne);
+ __ str(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
+
+ // The leaf runtime function can destroy R0-R3 and R12 registers which are still alive
+ RegisterSet saved_registers = RegisterSet(R3) | RegisterSet(R12);
+#if R9_IS_SCRATCHED
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but may not be worth
+ // revisiting for this slow case.
+
+ // save also R10 for alignment
+ saved_registers = saved_registers | RegisterSet(R9, R10);
+#endif
+ {
+ // divisor == 0
+ FixedSizeCodeBlock zero_divisor(_masm, 8, true);
+ __ push(saved_registers);
+ __ mov(R0, Rthread);
+ __ mov(R1, LR);
+ __ mov(R2, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+ __ b(call_slow_path);
+ }
+
+ {
+ // divisor > 0 && dividend < 0
+ FixedSizeCodeBlock positive_divisor_negative_dividend(_masm, 8, true);
+ __ push(LR);
+ __ rsb(dividend, dividend, 0);
+ __ bl(positive_arguments);
+ __ rsb(remainder, remainder, 0);
+ __ rsb(quotient, quotient, 0);
+ __ pop(PC);
+ }
+
+ {
+ // divisor < 0 && dividend > 0
+ FixedSizeCodeBlock negative_divisor_positive_dividend(_masm, 8, true);
+ __ push(LR);
+ __ rsb(divisor, divisor, 0);
+ __ bl(positive_arguments);
+ __ rsb(quotient, quotient, 0);
+ __ pop(PC);
+ }
+
+ {
+ // divisor < 0 && dividend < 0
+ FixedSizeCodeBlock negative_divisor_negative_dividend(_masm, 8, true);
+ __ push(LR);
+ __ rsb(dividend, dividend, 0);
+ __ rsb(divisor, divisor, 0);
+ __ bl(positive_arguments);
+ __ rsb(remainder, remainder, 0);
+ __ pop(PC);
+ }
+
+ __ bind(call_slow_path);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::continuation_for_implicit_exception));
+ __ pop(saved_registers);
+ __ bx(R0);
+
+ return start;
+ }
+
+
+ // As per atomic.hpp the Atomic read-modify-write operations must be logically implemented as:
+ // <fence>; <op>; <membar StoreLoad|StoreStore>
+ // But for load-linked/store-conditional based systems a fence here simply means
+ // no load/store can be reordered with respect to the initial load-linked, so we have:
+ // <membar storeload|loadload> ; load-linked; <op>; store-conditional; <membar storeload|storestore>
+ // There are no memory actions in <op> so nothing further is needed.
+ //
+ // So we define the following for convenience:
+#define MEMBAR_ATOMIC_OP_PRE \
+ MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad|MacroAssembler::LoadLoad)
+#define MEMBAR_ATOMIC_OP_POST \
+ MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad|MacroAssembler::StoreStore)
+
+ // Note: JDK 9 only supports ARMv7+ so we always have ldrexd available even though the
+ // code below allows for it to be otherwise. The else clause indicates an ARMv5 system
+ // for which we do not support MP and so membars are not necessary. This ARMv5 code will
+ // be removed in the future.
+
+ // Support for jint Atomic::add(jint add_value, volatile jint *dest)
+ //
+ // Arguments :
+ //
+ // add_value: R0
+ // dest: R1
+ //
+ // Results:
+ //
+ // R0: the new stored in dest
+ //
+ // Overwrites:
+ //
+ // R1, R2, R3
+ //
+ address generate_atomic_add() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_add");
+ Label retry;
+ start = __ pc();
+ Register addval = R0;
+ Register dest = R1;
+ Register prev = R2;
+ Register ok = R2;
+ Register newval = R3;
+
+ if (VM_Version::supports_ldrex()) {
+ __ membar(MEMBAR_ATOMIC_OP_PRE, prev);
+ __ bind(retry);
+ __ ldrex(newval, Address(dest));
+ __ add(newval, addval, newval);
+ __ strex(ok, newval, Address(dest));
+ __ cmp(ok, 0);
+ __ b(retry, ne);
+ __ mov (R0, newval);
+ __ membar(MEMBAR_ATOMIC_OP_POST, prev);
+ } else {
+ __ bind(retry);
+ __ ldr (prev, Address(dest));
+ __ add(newval, addval, prev);
+ __ atomic_cas_bool(prev, newval, dest, 0, noreg/*ignored*/);
+ __ b(retry, ne);
+ __ mov (R0, newval);
+ }
+ __ bx(LR);
+
+ return start;
+ }
+
+ // Support for jint Atomic::xchg(jint exchange_value, volatile jint *dest)
+ //
+ // Arguments :
+ //
+ // exchange_value: R0
+ // dest: R1
+ //
+ // Results:
+ //
+ // R0: the value previously stored in dest
+ //
+ // Overwrites:
+ //
+ // R1, R2, R3
+ //
+ address generate_atomic_xchg() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
+ start = __ pc();
+ Register newval = R0;
+ Register dest = R1;
+ Register prev = R2;
+
+ Label retry;
+
+ if (VM_Version::supports_ldrex()) {
+ Register ok=R3;
+ __ membar(MEMBAR_ATOMIC_OP_PRE, prev);
+ __ bind(retry);
+ __ ldrex(prev, Address(dest));
+ __ strex(ok, newval, Address(dest));
+ __ cmp(ok, 0);
+ __ b(retry, ne);
+ __ mov (R0, prev);
+ __ membar(MEMBAR_ATOMIC_OP_POST, prev);
+ } else {
+ __ bind(retry);
+ __ ldr (prev, Address(dest));
+ __ atomic_cas_bool(prev, newval, dest, 0, noreg/*ignored*/);
+ __ b(retry, ne);
+ __ mov (R0, prev);
+ }
+ __ bx(LR);
+
+ return start;
+ }
+
+ // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint *dest, jint compare_value)
+ //
+ // Arguments :
+ //
+ // compare_value: R0
+ // exchange_value: R1
+ // dest: R2
+ //
+ // Results:
+ //
+ // R0: the value previously stored in dest
+ //
+ // Overwrites:
+ //
+ // R0, R1, R2, R3, Rtemp
+ //
+ address generate_atomic_cmpxchg() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
+ start = __ pc();
+ Register cmp = R0;
+ Register newval = R1;
+ Register dest = R2;
+ Register temp1 = R3;
+ Register temp2 = Rtemp; // Rtemp free (native ABI)
+
+ __ membar(MEMBAR_ATOMIC_OP_PRE, temp1);
+
+ // atomic_cas returns previous value in R0
+ __ atomic_cas(temp1, temp2, cmp, newval, dest, 0);
+
+ __ membar(MEMBAR_ATOMIC_OP_POST, temp1);
+
+ __ bx(LR);
+
+ return start;
+ }
+
+ // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
+ // reordered before by a wrapper to (jlong compare_value, jlong exchange_value, volatile jlong *dest)
+ //
+ // Arguments :
+ //
+ // compare_value: R1 (High), R0 (Low)
+ // exchange_value: R3 (High), R2 (Low)
+ // dest: SP+0
+ //
+ // Results:
+ //
+ // R0:R1: the value previously stored in dest
+ //
+ // Overwrites:
+ //
+ address generate_atomic_cmpxchg_long() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
+ start = __ pc();
+ Register cmp_lo = R0;
+ Register cmp_hi = R1;
+ Register newval_lo = R2;
+ Register newval_hi = R3;
+ Register addr = Rtemp; /* After load from stack */
+ Register temp_lo = R4;
+ Register temp_hi = R5;
+ Register temp_result = R8;
+ assert_different_registers(cmp_lo, newval_lo, temp_lo, addr, temp_result, R7);
+ assert_different_registers(cmp_hi, newval_hi, temp_hi, addr, temp_result, R7);
+
+ __ membar(MEMBAR_ATOMIC_OP_PRE, Rtemp); // Rtemp free (native ABI)
+
+ // Stack is unaligned, maintain double word alignment by pushing
+ // odd number of regs.
+ __ push(RegisterSet(temp_result) | RegisterSet(temp_lo, temp_hi));
+ __ ldr(addr, Address(SP, 12));
+
+ // atomic_cas64 returns previous value in temp_lo, temp_hi
+ __ atomic_cas64(temp_lo, temp_hi, temp_result, cmp_lo, cmp_hi,
+ newval_lo, newval_hi, addr, 0);
+ __ mov(R0, temp_lo);
+ __ mov(R1, temp_hi);
+
+ __ pop(RegisterSet(temp_result) | RegisterSet(temp_lo, temp_hi));
+
+ __ membar(MEMBAR_ATOMIC_OP_POST, Rtemp); // Rtemp free (native ABI)
+ __ bx(LR);
+
+ return start;
+ }
+
+ address generate_atomic_load_long() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_load_long");
+ start = __ pc();
+ Register result_lo = R0;
+ Register result_hi = R1;
+ Register src = R0;
+
+ if (!os::is_MP()) {
+ __ ldmia(src, RegisterSet(result_lo, result_hi));
+ __ bx(LR);
+ } else if (VM_Version::supports_ldrexd()) {
+ __ ldrexd(result_lo, Address(src));
+ __ clrex(); // FIXME: safe to remove?
+ __ bx(LR);
+ } else {
+ __ stop("Atomic load(jlong) unsupported on this platform");
+ __ bx(LR);
+ }
+
+ return start;
+ }
+
+ address generate_atomic_store_long() {
+ address start;
+
+ StubCodeMark mark(this, "StubRoutines", "atomic_store_long");
+ start = __ pc();
+ Register newval_lo = R0;
+ Register newval_hi = R1;
+ Register dest = R2;
+ Register scratch_lo = R2;
+ Register scratch_hi = R3; /* After load from stack */
+ Register result = R3;
+
+ if (!os::is_MP()) {
+ __ stmia(dest, RegisterSet(newval_lo, newval_hi));
+ __ bx(LR);
+ } else if (VM_Version::supports_ldrexd()) {
+ __ mov(Rtemp, dest); // get dest to Rtemp
+ Label retry;
+ __ bind(retry);
+ __ ldrexd(scratch_lo, Address(Rtemp));
+ __ strexd(result, R0, Address(Rtemp));
+ __ rsbs(result, result, 1);
+ __ b(retry, eq);
+ __ bx(LR);
+ } else {
+ __ stop("Atomic store(jlong) unsupported on this platform");
+ __ bx(LR);
+ }
+
+ return start;
+ }
+
+
+#endif // AARCH64
+
+#ifdef COMPILER2
+ // Support for uint StubRoutine::Arm::partial_subtype_check( Klass sub, Klass super );
+ // Arguments :
+ //
+ // ret : R0, returned
+ // icc/xcc: set as R0 (depending on wordSize)
+ // sub : R1, argument, not changed
+ // super: R2, argument, not changed
+ // raddr: LR, blown by call
+ address generate_partial_subtype_check() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
+ address start = __ pc();
+
+ // based on SPARC check_klass_subtype_[fast|slow]_path (without CompressedOops)
+
+ // R0 used as tmp_reg (in addition to return reg)
+ Register sub_klass = R1;
+ Register super_klass = R2;
+ Register tmp_reg2 = R3;
+ Register tmp_reg3 = R4;
+#define saved_set tmp_reg2, tmp_reg3
+
+ Label L_loop, L_fail;
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+
+ // fast check should be redundant
+
+ // slow check
+ {
+ __ raw_push(saved_set);
+
+ // a couple of useful fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+
+ // Do a linear scan of the secondary super-klass chain.
+ // This code is rarely used, so simplicity is a virtue here.
+
+ inc_counter_np(SharedRuntime::_partial_subtype_ctr, tmp_reg2, tmp_reg3);
+
+ Register scan_temp = tmp_reg2;
+ Register count_temp = tmp_reg3;
+
+ // We will consult the secondary-super array.
+ __ ldr(scan_temp, Address(sub_klass, ss_offset));
+
+ Register search_key = super_klass;
+
+ // Load the array length.
+ __ ldr_s32(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
+ __ add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
+
+ __ add(count_temp, count_temp, 1);
+
+ // Top of search loop
+ __ bind(L_loop);
+ // Notes:
+ // scan_temp starts at the array elements
+ // count_temp is 1+size
+ __ subs(count_temp, count_temp, 1);
+ __ b(L_fail, eq); // not found in the array
+
+ // Load next super to check
+ // In the array of super classes elements are pointer sized.
+ int element_size = wordSize;
+ __ ldr(R0, Address(scan_temp, element_size, post_indexed));
+
+ // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+ __ subs(R0, R0, search_key); // set R0 to 0 on success (and flags to eq)
+
+ // A miss means we are NOT a subtype and need to keep looping
+ __ b(L_loop, ne);
+
+ // Falling out the bottom means we found a hit; we ARE a subtype
+
+ // Success. Cache the super we found and proceed in triumph.
+ __ str(super_klass, Address(sub_klass, sc_offset));
+
+ // Return success
+ // R0 is already 0 and flags are already set to eq
+ __ raw_pop(saved_set);
+ __ ret();
+
+ // Return failure
+ __ bind(L_fail);
+#ifdef AARCH64
+ // count_temp is 0, can't use ZR here
+ __ adds(R0, count_temp, 1); // sets the flags
+#else
+ __ movs(R0, 1); // sets the flags
+#endif
+ __ raw_pop(saved_set);
+ __ ret();
+ }
+ return start;
+ }
+#undef saved_set
+#endif // COMPILER2
+
+
+ //----------------------------------------------------------------------------------------------------
+ // Non-destructive plausibility checks for oops
+
+ address generate_verify_oop() {
+ StubCodeMark mark(this, "StubRoutines", "verify_oop");
+ address start = __ pc();
+
+ // Incoming arguments:
+ //
+ // R0: error message (char* )
+ // R1: address of register save area
+ // R2: oop to verify
+ //
+ // All registers are saved before calling this stub. However, condition flags should be saved here.
+
+ const Register oop = R2;
+ const Register klass = R3;
+ const Register tmp1 = R6;
+ const Register tmp2 = R8;
+
+ const Register flags = Rtmp_save0; // R4/R19
+ const Register ret_addr = Rtmp_save1; // R5/R20
+ assert_different_registers(oop, klass, tmp1, tmp2, flags, ret_addr, R7);
+
+ Label exit, error;
+ InlinedAddress verify_oop_count((address) StubRoutines::verify_oop_count_addr());
+
+#ifdef AARCH64
+ __ mrs(flags, Assembler::SysReg_NZCV);
+#else
+ __ mrs(Assembler::CPSR, flags);
+#endif // AARCH64
+
+ __ ldr_literal(tmp1, verify_oop_count);
+ __ ldr_s32(tmp2, Address(tmp1));
+ __ add(tmp2, tmp2, 1);
+ __ str_32(tmp2, Address(tmp1));
+
+ // make sure object is 'reasonable'
+ __ cbz(oop, exit); // if obj is NULL it is ok
+
+ // Check if the oop is in the right area of memory
+ // Note: oop_mask and oop_bits must be updated if the code is saved/reused
+ const address oop_mask = (address) Universe::verify_oop_mask();
+ const address oop_bits = (address) Universe::verify_oop_bits();
+ __ mov_address(tmp1, oop_mask, symbolic_Relocation::oop_mask_reference);
+ __ andr(tmp2, oop, tmp1);
+ __ mov_address(tmp1, oop_bits, symbolic_Relocation::oop_bits_reference);
+ __ cmp(tmp2, tmp1);
+ __ b(error, ne);
+
+ // make sure klass is 'reasonable'
+ __ load_klass(klass, oop); // get klass
+ __ cbz(klass, error); // if klass is NULL it is broken
+
+ // return if everything seems ok
+ __ bind(exit);
+
+#ifdef AARCH64
+ __ msr(Assembler::SysReg_NZCV, flags);
+#else
+ __ msr(Assembler::CPSR_f, flags);
+#endif // AARCH64
+
+ __ ret();
+
+ // handle errors
+ __ bind(error);
+
+ __ mov(ret_addr, LR); // save return address
+
+ // R0: error message
+ // R1: register save area
+ __ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
+
+ __ mov(LR, ret_addr);
+ __ b(exit);
+
+ __ bind_literal(verify_oop_count);
+
+ return start;
+ }
+
+ //----------------------------------------------------------------------------------------------------
+ // Array copy stubs
+
+ //
+ // Generate overlap test for array copy stubs
+ //
+ // Input:
+ // R0 - array1
+ // R1 - array2
+ // R2 - element count, 32-bit int
+ //
+ // input registers are preserved
+ //
+ void array_overlap_test(address no_overlap_target, int log2_elem_size, Register tmp1, Register tmp2) {
+ assert(no_overlap_target != NULL, "must be generated");
+ array_overlap_test(no_overlap_target, NULL, log2_elem_size, tmp1, tmp2);
+ }
+ void array_overlap_test(Label& L_no_overlap, int log2_elem_size, Register tmp1, Register tmp2) {
+ array_overlap_test(NULL, &L_no_overlap, log2_elem_size, tmp1, tmp2);
+ }
+ void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size, Register tmp1, Register tmp2) {
+ const Register from = R0;
+ const Register to = R1;
+ const Register count = R2;
+ const Register to_from = tmp1; // to - from
+#ifndef AARCH64
+ const Register byte_count = (log2_elem_size == 0) ? count : tmp2; // count << log2_elem_size
+#endif // AARCH64
+ assert_different_registers(from, to, count, tmp1, tmp2);
+
+ // no_overlap version works if 'to' lower (unsigned) than 'from'
+ // and or 'to' more than (count*size) from 'from'
+
+ BLOCK_COMMENT("Array Overlap Test:");
+ __ subs(to_from, to, from);
+#ifndef AARCH64
+ if (log2_elem_size != 0) {
+ __ mov(byte_count, AsmOperand(count, lsl, log2_elem_size));
+ }
+#endif // !AARCH64
+ if (NOLp == NULL)
+ __ b(no_overlap_target,lo);
+ else
+ __ b((*NOLp), lo);
+#ifdef AARCH64
+ __ subs(ZR, to_from, count, ex_sxtw, log2_elem_size);
+#else
+ __ cmp(to_from, byte_count);
+#endif // AARCH64
+ if (NOLp == NULL)
+ __ b(no_overlap_target, ge);
+ else
+ __ b((*NOLp), ge);
+ }
+
+#ifdef AARCH64
+ // TODO-AARCH64: revise usages of bulk_* methods (probably ldp`s and stp`s should interlace)
+
+ // Loads [from, from + count*wordSize) into regs[0], regs[1], ..., regs[count-1]
+ // and increases 'from' by count*wordSize.
+ void bulk_load_forward(Register from, const Register regs[], int count) {
+ assert (count > 0 && count % 2 == 0, "count must be positive even number");
+ int bytes = count * wordSize;
+
+ int offset = 0;
+ __ ldp(regs[0], regs[1], Address(from, bytes, post_indexed));
+ offset += 2*wordSize;
+
+ for (int i = 2; i < count; i += 2) {
+ __ ldp(regs[i], regs[i+1], Address(from, -bytes + offset));
+ offset += 2*wordSize;
+ }
+
+ assert (offset == bytes, "must be");
+ }
+
+ // Stores regs[0], regs[1], ..., regs[count-1] to [to, to + count*wordSize)
+ // and increases 'to' by count*wordSize.
+ void bulk_store_forward(Register to, const Register regs[], int count) {
+ assert (count > 0 && count % 2 == 0, "count must be positive even number");
+ int bytes = count * wordSize;
+
+ int offset = 0;
+ __ stp(regs[0], regs[1], Address(to, bytes, post_indexed));
+ offset += 2*wordSize;
+
+ for (int i = 2; i < count; i += 2) {
+ __ stp(regs[i], regs[i+1], Address(to, -bytes + offset));
+ offset += 2*wordSize;
+ }
+
+ assert (offset == bytes, "must be");
+ }
+
+ // Loads [from - count*wordSize, from) into regs[0], regs[1], ..., regs[count-1]
+ // and decreases 'from' by count*wordSize.
+ // Note that the word with lowest address goes to regs[0].
+ void bulk_load_backward(Register from, const Register regs[], int count) {
+ assert (count > 0 && count % 2 == 0, "count must be positive even number");
+ int bytes = count * wordSize;
+
+ int offset = 0;
+
+ for (int i = count - 2; i > 0; i -= 2) {
+ offset += 2*wordSize;
+ __ ldp(regs[i], regs[i+1], Address(from, -offset));
+ }
+
+ offset += 2*wordSize;
+ __ ldp(regs[0], regs[1], Address(from, -bytes, pre_indexed));
+
+ assert (offset == bytes, "must be");
+ }
+
+ // Stores regs[0], regs[1], ..., regs[count-1] into [to - count*wordSize, to)
+ // and decreases 'to' by count*wordSize.
+ // Note that regs[0] value goes into the memory with lowest address.
+ void bulk_store_backward(Register to, const Register regs[], int count) {
+ assert (count > 0 && count % 2 == 0, "count must be positive even number");
+ int bytes = count * wordSize;
+
+ int offset = 0;
+
+ for (int i = count - 2; i > 0; i -= 2) {
+ offset += 2*wordSize;
+ __ stp(regs[i], regs[i+1], Address(to, -offset));
+ }
+
+ offset += 2*wordSize;
+ __ stp(regs[0], regs[1], Address(to, -bytes, pre_indexed));
+
+ assert (offset == bytes, "must be");
+ }
+#endif // AARCH64
+
+ // TODO-AARCH64: rearrange in-loop prefetches:
+ // probably we should choose between "prefetch-store before or after store", not "before or after load".
+ void prefetch(Register from, Register to, int offset, int to_delta = 0) {
+ __ prefetch_read(Address(from, offset));
+#ifdef AARCH64
+ // Next line commented out to avoid significant loss of performance in memory copy - JDK-8078120
+ // __ prfm(pstl1keep, Address(to, offset + to_delta));
+#endif // AARCH64
+ }
+
+ // Generate the inner loop for forward aligned array copy
+ //
+ // Arguments
+ // from: src address, 64 bits aligned
+ // to: dst address, wordSize aligned
+ // count: number of elements (32-bit int)
+ // bytes_per_count: number of bytes for each unit of 'count'
+ //
+ // Return the minimum initial value for count
+ //
+ // Notes:
+ // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+ // - 'to' aligned on wordSize
+ // - 'count' must be greater or equal than the returned value
+ //
+ // Increases 'from' and 'to' by count*bytes_per_count.
+ //
+ // Scratches 'count', R3.
+ // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored).
+ //
+ int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
+ assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
+
+ const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
+ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].forward_aligned;
+ int pld_offset = config->pld_distance;
+ const int count_per_loop = bytes_per_loop / bytes_per_count;
+
+#ifndef AARCH64
+ bool split_read= config->split_ldm;
+ bool split_write= config->split_stm;
+
+ // XXX optim: use VLDM/VSTM when available (Neon) with PLD
+ // NEONCopyPLD
+ // PLD [r1, #0xC0]
+ // VLDM r1!,{d0-d7}
+ // VSTM r0!,{d0-d7}
+ // SUBS r2,r2,#0x40
+ // BGE NEONCopyPLD
+
+ __ push(RegisterSet(R4,R10));
+#endif // !AARCH64
+
+ const bool prefetch_before = pld_offset < 0;
+ const bool prefetch_after = pld_offset > 0;
+
+ Label L_skip_pld;
+
+ // predecrease to exit when there is less than count_per_loop
+ __ sub_32(count, count, count_per_loop);
+
+ if (pld_offset != 0) {
+ pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+ prefetch(from, to, 0);
+
+ if (prefetch_before) {
+ // If prefetch is done ahead, final PLDs that overflow the
+ // copied area can be easily avoided. 'count' is predecreased
+ // by the prefetch distance to optimize the inner loop and the
+ // outer loop skips the PLD.
+ __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count);
+
+ // skip prefetch for small copies
+ __ b(L_skip_pld, lt);
+ }
+
+ int offset = ArmCopyCacheLineSize;
+ while (offset <= pld_offset) {
+ prefetch(from, to, offset);
+ offset += ArmCopyCacheLineSize;
+ };
+ }
+
+#ifdef AARCH64
+ const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10};
+#endif // AARCH64
+ {
+ // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes
+
+ // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+ // PLD with 64 bytes cache line but the gain was not significant.
+
+ Label L_copy_loop;
+ __ align(OptoLoopAlignment);
+ __ BIND(L_copy_loop);
+
+ if (prefetch_before) {
+ prefetch(from, to, bytes_per_loop + pld_offset);
+ __ BIND(L_skip_pld);
+ }
+
+#ifdef AARCH64
+ bulk_load_forward(from, data_regs, 8);
+#else
+ if (split_read) {
+ // Split the register set in two sets so that there is less
+ // latency between LDM and STM (R3-R6 available while R7-R10
+ // still loading) and less register locking issue when iterating
+ // on the first LDM.
+ __ ldmia(from, RegisterSet(R3, R6), writeback);
+ __ ldmia(from, RegisterSet(R7, R10), writeback);
+ } else {
+ __ ldmia(from, RegisterSet(R3, R10), writeback);
+ }
+#endif // AARCH64
+
+ __ subs_32(count, count, count_per_loop);
+
+ if (prefetch_after) {
+ prefetch(from, to, pld_offset, bytes_per_loop);
+ }
+
+#ifdef AARCH64
+ bulk_store_forward(to, data_regs, 8);
+#else
+ if (split_write) {
+ __ stmia(to, RegisterSet(R3, R6), writeback);
+ __ stmia(to, RegisterSet(R7, R10), writeback);
+ } else {
+ __ stmia(to, RegisterSet(R3, R10), writeback);
+ }
+#endif // AARCH64
+
+ __ b(L_copy_loop, ge);
+
+ if (prefetch_before) {
+ // the inner loop may end earlier, allowing to skip PLD for the last iterations
+ __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+ __ b(L_skip_pld, ge);
+ }
+ }
+ BLOCK_COMMENT("Remaining bytes:");
+ // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+ // __ add(count, count, ...); // addition useless for the bit tests
+ assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+#ifdef AARCH64
+ assert (bytes_per_loop == 64, "adjust the code below");
+ assert (bytes_per_count <= 8, "adjust the code below");
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(32/bytes_per_count), L);
+
+ bulk_load_forward(from, data_regs, 4);
+ bulk_store_forward(to, data_regs, 4);
+
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(16/bytes_per_count), L);
+
+ bulk_load_forward(from, data_regs, 2);
+ bulk_store_forward(to, data_regs, 2);
+
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(8/bytes_per_count), L);
+
+ __ ldr(R3, Address(from, 8, post_indexed));
+ __ str(R3, Address(to, 8, post_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 4) {
+ Label L;
+ __ tbz(count, exact_log2(4/bytes_per_count), L);
+
+ __ ldr_w(R3, Address(from, 4, post_indexed));
+ __ str_w(R3, Address(to, 4, post_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 2) {
+ Label L;
+ __ tbz(count, exact_log2(2/bytes_per_count), L);
+
+ __ ldrh(R3, Address(from, 2, post_indexed));
+ __ strh(R3, Address(to, 2, post_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 1) {
+ Label L;
+ __ tbz(count, 0, L);
+
+ __ ldrb(R3, Address(from, 1, post_indexed));
+ __ strb(R3, Address(to, 1, post_indexed));
+
+ __ bind(L);
+ }
+#else
+ __ tst(count, 16 / bytes_per_count);
+ __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+ __ stmia(to, RegisterSet(R3, R6), writeback, ne);
+
+ __ tst(count, 8 / bytes_per_count);
+ __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+ __ stmia(to, RegisterSet(R3, R4), writeback, ne);
+
+ if (bytes_per_count <= 4) {
+ __ tst(count, 4 / bytes_per_count);
+ __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes
+ __ str(R3, Address(to, 4, post_indexed), ne);
+ }
+
+ if (bytes_per_count <= 2) {
+ __ tst(count, 2 / bytes_per_count);
+ __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes
+ __ strh(R3, Address(to, 2, post_indexed), ne);
+ }
+
+ if (bytes_per_count == 1) {
+ __ tst(count, 1);
+ __ ldrb(R3, Address(from, 1, post_indexed), ne);
+ __ strb(R3, Address(to, 1, post_indexed), ne);
+ }
+
+ __ pop(RegisterSet(R4,R10));
+#endif // AARCH64
+
+ return count_per_loop;
+ }
+
+
+ // Generate the inner loop for backward aligned array copy
+ //
+ // Arguments
+ // end_from: src end address, 64 bits aligned
+ // end_to: dst end address, wordSize aligned
+ // count: number of elements (32-bit int)
+ // bytes_per_count: number of bytes for each unit of 'count'
+ //
+ // Return the minimum initial value for count
+ //
+ // Notes:
+ // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+ // - 'end_to' aligned on wordSize
+ // - 'count' must be greater or equal than the returned value
+ //
+ // Decreases 'end_from' and 'end_to' by count*bytes_per_count.
+ //
+ // Scratches 'count', R3.
+ // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored).
+ //
+ int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) {
+ assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below");
+
+ const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
+ const int count_per_loop = bytes_per_loop / bytes_per_count;
+
+ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_aligned;
+ int pld_offset = config->pld_distance;
+
+#ifndef AARCH64
+ bool split_read= config->split_ldm;
+ bool split_write= config->split_stm;
+
+ // See the forward copy variant for additional comments.
+
+ __ push(RegisterSet(R4,R10));
+#endif // !AARCH64
+
+ __ sub_32(count, count, count_per_loop);
+
+ const bool prefetch_before = pld_offset < 0;
+ const bool prefetch_after = pld_offset > 0;
+
+ Label L_skip_pld;
+
+ if (pld_offset != 0) {
+ pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+ prefetch(end_from, end_to, -wordSize);
+
+ if (prefetch_before) {
+ __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count);
+ __ b(L_skip_pld, lt);
+ }
+
+ int offset = ArmCopyCacheLineSize;
+ while (offset <= pld_offset) {
+ prefetch(end_from, end_to, -(wordSize + offset));
+ offset += ArmCopyCacheLineSize;
+ };
+ }
+
+#ifdef AARCH64
+ const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10};
+#endif // AARCH64
+ {
+ // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes
+
+ // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+ // PLD with 64 bytes cache line but the gain was not significant.
+
+ Label L_copy_loop;
+ __ align(OptoLoopAlignment);
+ __ BIND(L_copy_loop);
+
+ if (prefetch_before) {
+ prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
+ __ BIND(L_skip_pld);
+ }
+
+#ifdef AARCH64
+ bulk_load_backward(end_from, data_regs, 8);
+#else
+ if (split_read) {
+ __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
+ __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
+ } else {
+ __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
+ }
+#endif // AARCH64
+
+ __ subs_32(count, count, count_per_loop);
+
+ if (prefetch_after) {
+ prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+ }
+
+#ifdef AARCH64
+ bulk_store_backward(end_to, data_regs, 8);
+#else
+ if (split_write) {
+ __ stmdb(end_to, RegisterSet(R7, R10), writeback);
+ __ stmdb(end_to, RegisterSet(R3, R6), writeback);
+ } else {
+ __ stmdb(end_to, RegisterSet(R3, R10), writeback);
+ }
+#endif // AARCH64
+
+ __ b(L_copy_loop, ge);
+
+ if (prefetch_before) {
+ __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+ __ b(L_skip_pld, ge);
+ }
+ }
+ BLOCK_COMMENT("Remaining bytes:");
+ // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+ // __ add(count, count, ...); // addition useless for the bit tests
+ assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+#ifdef AARCH64
+ assert (bytes_per_loop == 64, "adjust the code below");
+ assert (bytes_per_count <= 8, "adjust the code below");
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(32/bytes_per_count), L);
+
+ bulk_load_backward(end_from, data_regs, 4);
+ bulk_store_backward(end_to, data_regs, 4);
+
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(16/bytes_per_count), L);
+
+ bulk_load_backward(end_from, data_regs, 2);
+ bulk_store_backward(end_to, data_regs, 2);
+
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(8/bytes_per_count), L);
+
+ __ ldr(R3, Address(end_from, -8, pre_indexed));
+ __ str(R3, Address(end_to, -8, pre_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 4) {
+ Label L;
+ __ tbz(count, exact_log2(4/bytes_per_count), L);
+
+ __ ldr_w(R3, Address(end_from, -4, pre_indexed));
+ __ str_w(R3, Address(end_to, -4, pre_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 2) {
+ Label L;
+ __ tbz(count, exact_log2(2/bytes_per_count), L);
+
+ __ ldrh(R3, Address(end_from, -2, pre_indexed));
+ __ strh(R3, Address(end_to, -2, pre_indexed));
+
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 1) {
+ Label L;
+ __ tbz(count, 0, L);
+
+ __ ldrb(R3, Address(end_from, -1, pre_indexed));
+ __ strb(R3, Address(end_to, -1, pre_indexed));
+
+ __ bind(L);
+ }
+#else
+ __ tst(count, 16 / bytes_per_count);
+ __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+ __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
+
+ __ tst(count, 8 / bytes_per_count);
+ __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+ __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne);
+
+ if (bytes_per_count <= 4) {
+ __ tst(count, 4 / bytes_per_count);
+ __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes
+ __ str(R3, Address(end_to, -4, pre_indexed), ne);
+ }
+
+ if (bytes_per_count <= 2) {
+ __ tst(count, 2 / bytes_per_count);
+ __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes
+ __ strh(R3, Address(end_to, -2, pre_indexed), ne);
+ }
+
+ if (bytes_per_count == 1) {
+ __ tst(count, 1);
+ __ ldrb(R3, Address(end_from, -1, pre_indexed), ne);
+ __ strb(R3, Address(end_to, -1, pre_indexed), ne);
+ }
+
+ __ pop(RegisterSet(R4,R10));
+#endif // AARCH64
+
+ return count_per_loop;
+ }
+
+
+ // Generate the inner loop for shifted forward array copy (unaligned copy).
+ // It can be used when bytes_per_count < wordSize, i.e.
+ // byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64.
+ //
+ // Arguments
+ // from: start src address, 64 bits aligned
+ // to: start dst address, (now) wordSize aligned
+ // count: number of elements (32-bit int)
+ // bytes_per_count: number of bytes for each unit of 'count'
+ // lsr_shift: shift applied to 'old' value to skipped already written bytes
+ // lsl_shift: shift applied to 'new' value to set the high bytes of the next write
+ //
+ // Return the minimum initial value for count
+ //
+ // Notes:
+ // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+ // - 'to' aligned on wordSize
+ // - 'count' must be greater or equal than the returned value
+ // - 'lsr_shift' + 'lsl_shift' = BitsPerWord
+ // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64
+ //
+ // Increases 'to' by count*bytes_per_count.
+ //
+ // Scratches 'from' and 'count', R3-R10, R12
+ //
+ // On entry:
+ // - R12 is preloaded with the first 'BitsPerWord' bits read just before 'from'
+ // - (R12 >> lsr_shift) is the part not yet written (just before 'to')
+ // --> (*to) = (R12 >> lsr_shift) | (*from) << lsl_shift); ...
+ //
+ // This implementation may read more bytes than required.
+ // Actually, it always reads exactly all data from the copied region with upper bound aligned up by wordSize,
+ // so excessive read do not cross a word bound and is thus harmless.
+ //
+ int generate_forward_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, int lsr_shift, int lsl_shift) {
+ assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
+
+ const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iter
+ const int count_per_loop = bytes_per_loop / bytes_per_count;
+
+ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].forward_shifted;
+ int pld_offset = config->pld_distance;
+
+#ifndef AARCH64
+ bool split_read= config->split_ldm;
+ bool split_write= config->split_stm;
+#endif // !AARCH64
+
+ const bool prefetch_before = pld_offset < 0;
+ const bool prefetch_after = pld_offset > 0;
+ Label L_skip_pld, L_last_read, L_done;
+ if (pld_offset != 0) {
+
+ pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+ prefetch(from, to, 0);
+
+ if (prefetch_before) {
+ __ cmp_32(count, count_per_loop);
+ __ b(L_last_read, lt);
+ // skip prefetch for small copies
+ // warning: count is predecreased by the prefetch distance to optimize the inner loop
+ __ subs_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop);
+ __ b(L_skip_pld, lt);
+ }
+
+ int offset = ArmCopyCacheLineSize;
+ while (offset <= pld_offset) {
+ prefetch(from, to, offset);
+ offset += ArmCopyCacheLineSize;
+ };
+ }
+
+ Label L_shifted_loop;
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_shifted_loop);
+
+ if (prefetch_before) {
+ // do it early if there might be register locking issues
+ prefetch(from, to, bytes_per_loop + pld_offset);
+ __ BIND(L_skip_pld);
+ } else {
+ __ cmp_32(count, count_per_loop);
+ __ b(L_last_read, lt);
+ }
+
+#ifdef AARCH64
+ const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12};
+ __ logical_shift_right(R3, R12, lsr_shift); // part of R12 not yet written
+ __ subs_32(count, count, count_per_loop);
+ bulk_load_forward(from, &data_regs[1], 8);
+#else
+ // read 32 bytes
+ if (split_read) {
+ // if write is not split, use less registers in first set to reduce locking
+ RegisterSet set1 = split_write ? RegisterSet(R4, R7) : RegisterSet(R4, R5);
+ RegisterSet set2 = (split_write ? RegisterSet(R8, R10) : RegisterSet(R6, R10)) | R12;
+ __ ldmia(from, set1, writeback);
+ __ mov(R3, AsmOperand(R12, lsr, lsr_shift)); // part of R12 not yet written
+ __ ldmia(from, set2, writeback);
+ __ subs(count, count, count_per_loop); // XXX: should it be before the 2nd LDM ? (latency vs locking)
+ } else {
+ __ mov(R3, AsmOperand(R12, lsr, lsr_shift)); // part of R12 not yet written
+ __ ldmia(from, RegisterSet(R4, R10) | R12, writeback); // Note: small latency on R4
+ __ subs(count, count, count_per_loop);
+ }
+#endif // AARCH64
+
+ if (prefetch_after) {
+ // do it after the 1st ldm/ldp anyway (no locking issues with early STM/STP)
+ prefetch(from, to, pld_offset, bytes_per_loop);
+ }
+
+ // prepare (shift) the values in R3..R10
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift)); // merged below low bytes of next val
+ __ logical_shift_right(R4, R4, lsr_shift); // unused part of next val
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift)); // ...
+ __ logical_shift_right(R5, R5, lsr_shift);
+ __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift));
+ __ logical_shift_right(R6, R6, lsr_shift);
+ __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift));
+#ifndef AARCH64
+ if (split_write) {
+ // write the first half as soon as possible to reduce stm locking
+ __ stmia(to, RegisterSet(R3, R6), writeback, prefetch_before ? gt : ge);
+ }
+#endif // !AARCH64
+ __ logical_shift_right(R7, R7, lsr_shift);
+ __ orr(R7, R7, AsmOperand(R8, lsl, lsl_shift));
+ __ logical_shift_right(R8, R8, lsr_shift);
+ __ orr(R8, R8, AsmOperand(R9, lsl, lsl_shift));
+ __ logical_shift_right(R9, R9, lsr_shift);
+ __ orr(R9, R9, AsmOperand(R10, lsl, lsl_shift));
+ __ logical_shift_right(R10, R10, lsr_shift);
+ __ orr(R10, R10, AsmOperand(R12, lsl, lsl_shift));
+
+#ifdef AARCH64
+ bulk_store_forward(to, data_regs, 8);
+#else
+ if (split_write) {
+ __ stmia(to, RegisterSet(R7, R10), writeback, prefetch_before ? gt : ge);
+ } else {
+ __ stmia(to, RegisterSet(R3, R10), writeback, prefetch_before ? gt : ge);
+ }
+#endif // AARCH64
+ __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
+
+ if (prefetch_before) {
+ // the first loop may end earlier, allowing to skip pld at the end
+ __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+#ifndef AARCH64
+ __ stmia(to, RegisterSet(R3, R10), writeback); // stmia was skipped
+#endif // !AARCH64
+ __ b(L_skip_pld, ge);
+ __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop);
+ }
+
+ __ BIND(L_last_read);
+ __ b(L_done, eq);
+
+#ifdef AARCH64
+ assert(bytes_per_count < 8, "adjust the code below");
+
+ __ logical_shift_right(R3, R12, lsr_shift);
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(32/bytes_per_count), L);
+ bulk_load_forward(from, &data_regs[1], 4);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
+ __ logical_shift_right(R4, R4, lsr_shift);
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift));
+ __ logical_shift_right(R5, R5, lsr_shift);
+ __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift));
+ __ logical_shift_right(R6, R6, lsr_shift);
+ __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift));
+ bulk_store_forward(to, data_regs, 4);
+ __ logical_shift_right(R3, R7, lsr_shift);
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(16/bytes_per_count), L);
+ bulk_load_forward(from, &data_regs[1], 2);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
+ __ logical_shift_right(R4, R4, lsr_shift);
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift));
+ bulk_store_forward(to, data_regs, 2);
+ __ logical_shift_right(R3, R5, lsr_shift);
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(8/bytes_per_count), L);
+ __ ldr(R4, Address(from, 8, post_indexed));
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
+ __ str(R3, Address(to, 8, post_indexed));
+ __ logical_shift_right(R3, R4, lsr_shift);
+ __ bind(L);
+ }
+
+ const int have_bytes = lsl_shift/BitsPerByte; // number of already read bytes in R3
+
+ // It remains less than wordSize to write.
+ // Do not check count if R3 already has maximal number of loaded elements (one less than wordSize).
+ if (have_bytes < wordSize - bytes_per_count) {
+ Label L;
+ __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact
+ __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
+ __ b(L, le);
+ __ ldr(R4, Address(from, 8, post_indexed));
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(4/bytes_per_count), L);
+ __ str_w(R3, Address(to, 4, post_indexed));
+ if (bytes_per_count < 4) {
+ __ logical_shift_right(R3, R3, 4*BitsPerByte);
+ }
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 2) {
+ Label L;
+ __ tbz(count, exact_log2(2/bytes_per_count), L);
+ __ strh(R3, Address(to, 2, post_indexed));
+ if (bytes_per_count < 2) {
+ __ logical_shift_right(R3, R3, 2*BitsPerByte);
+ }
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 1) {
+ Label L;
+ __ tbz(count, exact_log2(1/bytes_per_count), L);
+ __ strb(R3, Address(to, 1, post_indexed));
+ __ bind(L);
+ }
+#else
+ switch (bytes_per_count) {
+ case 2:
+ __ mov(R3, AsmOperand(R12, lsr, lsr_shift));
+ __ tst(count, 8);
+ __ ldmia(from, RegisterSet(R4, R7), writeback, ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne); // merged below low bytes of next val
+ __ mov(R4, AsmOperand(R4, lsr, lsr_shift), ne); // unused part of next val
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift), ne); // ...
+ __ mov(R5, AsmOperand(R5, lsr, lsr_shift), ne);
+ __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift), ne);
+ __ mov(R6, AsmOperand(R6, lsr, lsr_shift), ne);
+ __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift), ne);
+ __ stmia(to, RegisterSet(R3, R6), writeback, ne);
+ __ mov(R3, AsmOperand(R7, lsr, lsr_shift), ne);
+
+ __ tst(count, 4);
+ __ ldmia(from, RegisterSet(R4, R5), writeback, ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne); // merged below low bytes of next val
+ __ mov(R4, AsmOperand(R4, lsr, lsr_shift), ne); // unused part of next val
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift), ne); // ...
+ __ stmia(to, RegisterSet(R3, R4), writeback, ne);
+ __ mov(R3, AsmOperand(R5, lsr, lsr_shift), ne);
+
+ __ tst(count, 2);
+ __ ldr(R4, Address(from, 4, post_indexed), ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne);
+ __ str(R3, Address(to, 4, post_indexed), ne);
+ __ mov(R3, AsmOperand(R4, lsr, lsr_shift), ne);
+
+ __ tst(count, 1);
+ __ strh(R3, Address(to, 2, post_indexed), ne); // one last short
+ break;
+
+ case 1:
+ __ mov(R3, AsmOperand(R12, lsr, lsr_shift));
+ __ tst(count, 16);
+ __ ldmia(from, RegisterSet(R4, R7), writeback, ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne); // merged below low bytes of next val
+ __ mov(R4, AsmOperand(R4, lsr, lsr_shift), ne); // unused part of next val
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift), ne); // ...
+ __ mov(R5, AsmOperand(R5, lsr, lsr_shift), ne);
+ __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift), ne);
+ __ mov(R6, AsmOperand(R6, lsr, lsr_shift), ne);
+ __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift), ne);
+ __ stmia(to, RegisterSet(R3, R6), writeback, ne);
+ __ mov(R3, AsmOperand(R7, lsr, lsr_shift), ne);
+
+ __ tst(count, 8);
+ __ ldmia(from, RegisterSet(R4, R5), writeback, ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne); // merged below low bytes of next val
+ __ mov(R4, AsmOperand(R4, lsr, lsr_shift), ne); // unused part of next val
+ __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift), ne); // ...
+ __ stmia(to, RegisterSet(R3, R4), writeback, ne);
+ __ mov(R3, AsmOperand(R5, lsr, lsr_shift), ne);
+
+ __ tst(count, 4);
+ __ ldr(R4, Address(from, 4, post_indexed), ne);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ne);
+ __ str(R3, Address(to, 4, post_indexed), ne);
+ __ mov(R3, AsmOperand(R4, lsr, lsr_shift), ne);
+
+ __ andr(count, count, 3);
+ __ cmp(count, 2);
+
+ // Note: R3 might contain enough bytes ready to write (3 needed at most),
+ // thus load on lsl_shift==24 is not needed (in fact forces reading
+ // beyond source buffer end boundary)
+ if (lsl_shift == 8) {
+ __ ldr(R4, Address(from, 4, post_indexed), ge);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), ge);
+ } else if (lsl_shift == 16) {
+ __ ldr(R4, Address(from, 4, post_indexed), gt);
+ __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift), gt);
+ }
+
+ __ strh(R3, Address(to, 2, post_indexed), ge); // two last bytes
+ __ mov(R3, AsmOperand(R3, lsr, 16), gt);
+
+ __ tst(count, 1);
+ __ strb(R3, Address(to, 1, post_indexed), ne); // one last byte
+ break;
+ }
+#endif // AARCH64
+
+ __ BIND(L_done);
+ return 0; // no minimum
+ }
+
+ // Generate the inner loop for shifted backward array copy (unaligned copy).
+ // It can be used when bytes_per_count < wordSize, i.e.
+ // byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64.
+ //
+ // Arguments
+ // end_from: end src address, 64 bits aligned
+ // end_to: end dst address, (now) wordSize aligned
+ // count: number of elements (32-bit int)
+ // bytes_per_count: number of bytes for each unit of 'count'
+ // lsl_shift: shift applied to 'old' value to skipped already written bytes
+ // lsr_shift: shift applied to 'new' value to set the low bytes of the next write
+ //
+ // Return the minimum initial value for count
+ //
+ // Notes:
+ // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+ // - 'end_to' aligned on wordSize
+ // - 'count' must be greater or equal than the returned value
+ // - 'lsr_shift' + 'lsl_shift' = 'BitsPerWord'
+ // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64
+ //
+ // Decreases 'end_to' by count*bytes_per_count.
+ //
+ // Scratches 'end_from', 'count', R3-R10, R12
+ //
+ // On entry:
+ // - R3 is preloaded with the first 'BitsPerWord' bits read just after 'from'
+ // - (R3 << lsl_shift) is the part not yet written
+ // --> (*--to) = (R3 << lsl_shift) | (*--from) >> lsr_shift); ...
+ //
+ // This implementation may read more bytes than required.
+ // Actually, it always reads exactly all data from the copied region with beginning aligned down by wordSize,
+ // so excessive read do not cross a word bound and is thus harmless.
+ //
+ int generate_backward_shifted_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count, int lsr_shift, int lsl_shift) {
+ assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below");
+
+ const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iter
+ const int count_per_loop = bytes_per_loop / bytes_per_count;
+
+ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_shifted;
+ int pld_offset = config->pld_distance;
+
+#ifndef AARCH64
+ bool split_read= config->split_ldm;
+ bool split_write= config->split_stm;
+#endif // !AARCH64
+
+
+ const bool prefetch_before = pld_offset < 0;
+ const bool prefetch_after = pld_offset > 0;
+
+ Label L_skip_pld, L_done, L_last_read;
+ if (pld_offset != 0) {
+
+ pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+ prefetch(end_from, end_to, -wordSize);
+
+ if (prefetch_before) {
+ __ cmp_32(count, count_per_loop);
+ __ b(L_last_read, lt);
+
+ // skip prefetch for small copies
+ // warning: count is predecreased by the prefetch distance to optimize the inner loop
+ __ subs_32(count, count, ((bytes_per_loop + pld_offset)/bytes_per_count) + count_per_loop);
+ __ b(L_skip_pld, lt);
+ }
+
+ int offset = ArmCopyCacheLineSize;
+ while (offset <= pld_offset) {
+ prefetch(end_from, end_to, -(wordSize + offset));
+ offset += ArmCopyCacheLineSize;
+ };
+ }
+
+ Label L_shifted_loop;
+ __ align(OptoLoopAlignment);
+ __ BIND(L_shifted_loop);
+
+ if (prefetch_before) {
+ // do the 1st ldm/ldp first anyway (no locking issues with early STM/STP)
+ prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
+ __ BIND(L_skip_pld);
+ } else {
+ __ cmp_32(count, count_per_loop);
+ __ b(L_last_read, lt);
+ }
+
+#ifdef AARCH64
+ __ logical_shift_left(R12, R3, lsl_shift);
+ const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12};
+ bulk_load_backward(end_from, data_regs, 8);
+#else
+ if (split_read) {
+ __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
+ __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
+ __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
+ } else {
+ __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
+ __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
+ }
+#endif // AARCH64
+
+ __ subs_32(count, count, count_per_loop);
+
+ if (prefetch_after) { // do prefetch during ldm/ldp latency
+ prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+ }
+
+ // prepare the values in R4..R10,R12
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift)); // merged above high bytes of prev val
+ __ logical_shift_left(R10, R10, lsl_shift); // unused part of prev val
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift)); // ...
+ __ logical_shift_left(R9, R9, lsl_shift);
+ __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift));
+ __ logical_shift_left(R8, R8, lsl_shift);
+ __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift));
+ __ logical_shift_left(R7, R7, lsl_shift);
+ __ orr(R7, R7, AsmOperand(R6, lsr, lsr_shift));
+ __ logical_shift_left(R6, R6, lsl_shift);
+ __ orr(R6, R6, AsmOperand(R5, lsr, lsr_shift));
+#ifndef AARCH64
+ if (split_write) {
+ // store early to reduce locking issues
+ __ stmdb(end_to, RegisterSet(R6, R10) | R12, writeback, prefetch_before ? gt : ge);
+ }
+#endif // !AARCH64
+ __ logical_shift_left(R5, R5, lsl_shift);
+ __ orr(R5, R5, AsmOperand(R4, lsr, lsr_shift));
+ __ logical_shift_left(R4, R4, lsl_shift);
+ __ orr(R4, R4, AsmOperand(R3, lsr, lsr_shift));
+
+#ifdef AARCH64
+ bulk_store_backward(end_to, &data_regs[1], 8);
+#else
+ if (split_write) {
+ __ stmdb(end_to, RegisterSet(R4, R5), writeback, prefetch_before ? gt : ge);
+ } else {
+ __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback, prefetch_before ? gt : ge);
+ }
+#endif // AARCH64
+
+ __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
+
+ if (prefetch_before) {
+ // the first loop may end earlier, allowing to skip pld at the end
+ __ cmn_32(count, ((bytes_per_loop + pld_offset)/bytes_per_count));
+#ifndef AARCH64
+ __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback); // stmdb was skipped
+#endif // !AARCH64
+ __ b(L_skip_pld, ge);
+ __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop);
+ }
+
+ __ BIND(L_last_read);
+ __ b(L_done, eq);
+
+#ifdef AARCH64
+ assert(bytes_per_count < 8, "adjust the code below");
+
+ __ logical_shift_left(R12, R3, lsl_shift);
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(32/bytes_per_count), L);
+ bulk_load_backward(end_from, &data_regs[4], 4);
+
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
+ __ logical_shift_left(R10, R10, lsl_shift);
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift));
+ __ logical_shift_left(R9, R9, lsl_shift);
+ __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift));
+ __ logical_shift_left(R8, R8, lsl_shift);
+ __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift));
+
+ bulk_store_backward(end_to, &data_regs[5], 4);
+ __ logical_shift_left(R12, R7, lsl_shift);
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(16/bytes_per_count), L);
+ bulk_load_backward(end_from, &data_regs[6], 2);
+
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
+ __ logical_shift_left(R10, R10, lsl_shift);
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift));
+
+ bulk_store_backward(end_to, &data_regs[7], 2);
+ __ logical_shift_left(R12, R9, lsl_shift);
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(8/bytes_per_count), L);
+ __ ldr(R10, Address(end_from, -8, pre_indexed));
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
+ __ str(R12, Address(end_to, -8, pre_indexed));
+ __ logical_shift_left(R12, R10, lsl_shift);
+ __ bind(L);
+ }
+
+ const int have_bytes = lsr_shift/BitsPerByte; // number of already read bytes in R12
+
+ // It remains less than wordSize to write.
+ // Do not check count if R12 already has maximal number of loaded elements (one less than wordSize).
+ if (have_bytes < wordSize - bytes_per_count) {
+ Label L;
+ __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact
+ __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
+ __ b(L, le);
+ __ ldr(R10, Address(end_from, -8, pre_indexed));
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
+ __ bind(L);
+ }
+
+ assert (bytes_per_count <= 4, "must be");
+
+ {
+ Label L;
+ __ tbz(count, exact_log2(4/bytes_per_count), L);
+ __ logical_shift_right(R9, R12, (wordSize-4)*BitsPerByte);
+ __ str_w(R9, Address(end_to, -4, pre_indexed)); // Write 4 MSB
+ if (bytes_per_count < 4) {
+ __ logical_shift_left(R12, R12, 4*BitsPerByte); // Promote remaining bytes to MSB
+ }
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 2) {
+ Label L;
+ __ tbz(count, exact_log2(2/bytes_per_count), L);
+ __ logical_shift_right(R9, R12, (wordSize-2)*BitsPerByte);
+ __ strh(R9, Address(end_to, -2, pre_indexed)); // Write 2 MSB
+ if (bytes_per_count < 2) {
+ __ logical_shift_left(R12, R12, 2*BitsPerByte); // Promote remaining bytes to MSB
+ }
+ __ bind(L);
+ }
+
+ if (bytes_per_count <= 1) {
+ Label L;
+ __ tbz(count, exact_log2(1/bytes_per_count), L);
+ __ logical_shift_right(R9, R12, (wordSize-1)*BitsPerByte);
+ __ strb(R9, Address(end_to, -1, pre_indexed)); // Write 1 MSB
+ __ bind(L);
+ }
+#else
+ switch(bytes_per_count) {
+ case 2:
+ __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
+ __ tst(count, 8);
+ __ ldmdb(end_from, RegisterSet(R7,R10), writeback, ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ mov(R10, AsmOperand(R10, lsl, lsl_shift),ne); // unused part of prev val
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift),ne); // ...
+ __ mov(R9, AsmOperand(R9, lsl, lsl_shift),ne);
+ __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift),ne);
+ __ mov(R8, AsmOperand(R8, lsl, lsl_shift),ne);
+ __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift),ne);
+ __ stmdb(end_to, RegisterSet(R8,R10)|R12, writeback, ne);
+ __ mov(R12, AsmOperand(R7, lsl, lsl_shift), ne);
+
+ __ tst(count, 4);
+ __ ldmdb(end_from, RegisterSet(R9, R10), writeback, ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ mov(R10, AsmOperand(R10, lsl, lsl_shift),ne); // unused part of prev val
+ __ orr(R10, R10, AsmOperand(R9, lsr,lsr_shift),ne); // ...
+ __ stmdb(end_to, RegisterSet(R10)|R12, writeback, ne);
+ __ mov(R12, AsmOperand(R9, lsl, lsl_shift), ne);
+
+ __ tst(count, 2);
+ __ ldr(R10, Address(end_from, -4, pre_indexed), ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ str(R12, Address(end_to, -4, pre_indexed), ne);
+ __ mov(R12, AsmOperand(R10, lsl, lsl_shift), ne);
+
+ __ tst(count, 1);
+ __ mov(R12, AsmOperand(R12, lsr, lsr_shift),ne);
+ __ strh(R12, Address(end_to, -2, pre_indexed), ne); // one last short
+ break;
+
+ case 1:
+ __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
+ __ tst(count, 16);
+ __ ldmdb(end_from, RegisterSet(R7,R10), writeback, ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ mov(R10, AsmOperand(R10, lsl, lsl_shift),ne); // unused part of prev val
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift),ne); // ...
+ __ mov(R9, AsmOperand(R9, lsl, lsl_shift),ne);
+ __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift),ne);
+ __ mov(R8, AsmOperand(R8, lsl, lsl_shift),ne);
+ __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift),ne);
+ __ stmdb(end_to, RegisterSet(R8,R10)|R12, writeback, ne);
+ __ mov(R12, AsmOperand(R7, lsl, lsl_shift), ne);
+
+ __ tst(count, 8);
+ __ ldmdb(end_from, RegisterSet(R9,R10), writeback, ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ mov(R10, AsmOperand(R10, lsl, lsl_shift),ne); // unused part of prev val
+ __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift),ne); // ...
+ __ stmdb(end_to, RegisterSet(R10)|R12, writeback, ne);
+ __ mov(R12, AsmOperand(R9, lsl, lsl_shift), ne);
+
+ __ tst(count, 4);
+ __ ldr(R10, Address(end_from, -4, pre_indexed), ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift), ne);
+ __ str(R12, Address(end_to, -4, pre_indexed), ne);
+ __ mov(R12, AsmOperand(R10, lsl, lsl_shift), ne);
+
+ __ tst(count, 2);
+ if (lsr_shift != 24) {
+ // avoid useless reading R10 when we already have 3 bytes ready in R12
+ __ ldr(R10, Address(end_from, -4, pre_indexed), ne);
+ __ orr(R12, R12, AsmOperand(R10, lsr,lsr_shift), ne);
+ }
+
+ // Note: R12 contains enough bytes ready to write (3 needed at most)
+ // write the 2 MSBs
+ __ mov(R9, AsmOperand(R12, lsr, 16), ne);
+ __ strh(R9, Address(end_to, -2, pre_indexed), ne);
+ // promote remaining to MSB
+ __ mov(R12, AsmOperand(R12, lsl, 16), ne);
+
+ __ tst(count, 1);
+ // write the MSB of R12
+ __ mov(R12, AsmOperand(R12, lsr, 24), ne);
+ __ strb(R12, Address(end_to, -1, pre_indexed), ne);
+
+ break;
+ }
+#endif // AARCH64
+
+ __ BIND(L_done);
+ return 0; // no minimum
+ }
+
+ // This method is very useful for merging forward/backward implementations
+ Address get_addr_with_indexing(Register base, int delta, bool forward) {
+ if (forward) {
+ return Address(base, delta, post_indexed);
+ } else {
+ return Address(base, -delta, pre_indexed);
+ }
+ }
+
+#ifdef AARCH64
+ // Loads one 'size_in_bytes'-sized value from 'from' in given direction, i.e.
+ // if forward: loads value at from and increases from by size
+ // if !forward: loads value at from-size_in_bytes and decreases from by size
+ void load_one(Register rd, Register from, int size_in_bytes, bool forward) {
+ assert_different_registers(from, rd);
+ Address addr = get_addr_with_indexing(from, size_in_bytes, forward);
+ __ load_sized_value(rd, addr, size_in_bytes, false);
+ }
+
+ // Stores one 'size_in_bytes'-sized value to 'to' in given direction (see load_one)
+ void store_one(Register rd, Register to, int size_in_bytes, bool forward) {
+ assert_different_registers(to, rd);
+ Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
+ __ store_sized_value(rd, addr, size_in_bytes);
+ }
+#else
+ // load_one and store_one are the same as for AArch64 except for
+ // *) Support for condition execution
+ // *) Second value register argument for 8-byte values
+
+ void load_one(Register rd, Register from, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) {
+ assert_different_registers(from, rd, rd2);
+ if (size_in_bytes < 8) {
+ Address addr = get_addr_with_indexing(from, size_in_bytes, forward);
+ __ load_sized_value(rd, addr, size_in_bytes, false, cond);
+ } else {
+ assert (rd2 != noreg, "second value register must be specified");
+ assert (rd->encoding() < rd2->encoding(), "wrong value register set");
+
+ if (forward) {
+ __ ldmia(from, RegisterSet(rd) | rd2, writeback, cond);
+ } else {
+ __ ldmdb(from, RegisterSet(rd) | rd2, writeback, cond);
+ }
+ }
+ }
+
+ void store_one(Register rd, Register to, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) {
+ assert_different_registers(to, rd, rd2);
+ if (size_in_bytes < 8) {
+ Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
+ __ store_sized_value(rd, addr, size_in_bytes, cond);
+ } else {
+ assert (rd2 != noreg, "second value register must be specified");
+ assert (rd->encoding() < rd2->encoding(), "wrong value register set");
+
+ if (forward) {
+ __ stmia(to, RegisterSet(rd) | rd2, writeback, cond);
+ } else {
+ __ stmdb(to, RegisterSet(rd) | rd2, writeback, cond);
+ }
+ }
+ }
+#endif // AARCH64
+
+ // Copies data from 'from' to 'to' in specified direction to align 'from' by 64 bits.
+ // (on 32-bit ARM 64-bit alignment is better for LDM).
+ //
+ // Arguments:
+ // from: beginning (if forward) or upper bound (if !forward) of the region to be read
+ // to: beginning (if forward) or upper bound (if !forward) of the region to be written
+ // count: 32-bit int, maximum number of elements which can be copied
+ // bytes_per_count: size of an element
+ // forward: specifies copy direction
+ //
+ // Notes:
+ // 'from' and 'to' must be aligned by 'bytes_per_count'
+ // 'count' must not be less than the returned value
+ // shifts 'from' and 'to' by the number of copied bytes in corresponding direction
+ // decreases 'count' by the number of elements copied
+ //
+ // Returns maximum number of bytes which may be copied.
+ int align_src(Register from, Register to, Register count, Register tmp, int bytes_per_count, bool forward) {
+ assert_different_registers(from, to, count, tmp);
+#ifdef AARCH64
+ // TODO-AARCH64: replace by simple loop?
+ Label Laligned_by_2, Laligned_by_4, Laligned_by_8;
+
+ if (bytes_per_count == 1) {
+ __ tbz(from, 0, Laligned_by_2);
+ __ sub_32(count, count, 1);
+ load_one(tmp, from, 1, forward);
+ store_one(tmp, to, 1, forward);
+ }
+
+ __ BIND(Laligned_by_2);
+
+ if (bytes_per_count <= 2) {
+ __ tbz(from, 1, Laligned_by_4);
+ __ sub_32(count, count, 2/bytes_per_count);
+ load_one(tmp, from, 2, forward);
+ store_one(tmp, to, 2, forward);
+ }
+
+ __ BIND(Laligned_by_4);
+
+ if (bytes_per_count <= 4) {
+ __ tbz(from, 2, Laligned_by_8);
+ __ sub_32(count, count, 4/bytes_per_count);
+ load_one(tmp, from, 4, forward);
+ store_one(tmp, to, 4, forward);
+ }
+ __ BIND(Laligned_by_8);
+#else // AARCH64
+ if (bytes_per_count < 8) {
+ Label L_align_src;
+ __ BIND(L_align_src);
+ __ tst(from, 7);
+ // ne => not aligned: copy one element and (if bytes_per_count < 4) loop
+ __ sub(count, count, 1, ne);
+ load_one(tmp, from, bytes_per_count, forward, ne);
+ store_one(tmp, to, bytes_per_count, forward, ne);
+ if (bytes_per_count < 4) {
+ __ b(L_align_src, ne); // if bytes_per_count == 4, then 0 or 1 loop iterations are enough
+ }
+ }
+#endif // AARCH64
+ return 7/bytes_per_count;
+ }
+
+ // Copies 'count' of 'bytes_per_count'-sized elements in the specified direction.
+ //
+ // Arguments:
+ // from: beginning (if forward) or upper bound (if !forward) of the region to be read
+ // to: beginning (if forward) or upper bound (if !forward) of the region to be written
+ // count: 32-bit int, number of elements to be copied
+ // entry: copy loop entry point
+ // bytes_per_count: size of an element
+ // forward: specifies copy direction
+ //
+ // Notes:
+ // shifts 'from' and 'to'
+ void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) {
+ assert_different_registers(from, to, count, tmp);
+
+ __ align(OptoLoopAlignment);
+#ifdef AARCH64
+ Label L_small_array_done, L_small_array_loop;
+ __ BIND(entry);
+ __ cbz_32(count, L_small_array_done);
+
+ __ BIND(L_small_array_loop);
+ __ subs_32(count, count, 1);
+ load_one(tmp, from, bytes_per_count, forward);
+ store_one(tmp, to, bytes_per_count, forward);
+ __ b(L_small_array_loop, gt);
+
+ __ BIND(L_small_array_done);
+#else
+ Label L_small_loop;
+ __ BIND(L_small_loop);
+ store_one(tmp, to, bytes_per_count, forward, al, tmp2);
+ __ BIND(entry); // entry point
+ __ subs(count, count, 1);
+ load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
+ __ b(L_small_loop, ge);
+#endif // AARCH64
+ }
+
+ // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
+ //
+ // Arguments:
+ // to: beginning (if forward) or upper bound (if !forward) of the region to be written
+ // count: 32-bit int, number of elements allowed to be copied
+ // to_remainder: remainder of dividing 'to' by wordSize
+ // bytes_per_count: size of an element
+ // forward: specifies copy direction
+ // Rval: contains an already read but not yet written word;
+ // its' LSBs (if forward) or MSBs (if !forward) are to be written to align 'to'.
+ //
+ // Notes:
+ // 'count' must not be less then the returned value
+ // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
+ // shifts 'to' by the number of written bytes (so that it becomes the bound of memory to be written)
+ // decreases 'count' by the the number of elements written
+ // Rval's MSBs or LSBs remain to be written further by generate_{forward,backward}_shifted_copy_loop
+ int align_dst(Register to, Register count, Register Rval, Register tmp,
+ int to_remainder, int bytes_per_count, bool forward) {
+ assert_different_registers(to, count, tmp, Rval);
+
+ assert (0 < to_remainder && to_remainder < wordSize, "to_remainder is not valid");
+ assert (to_remainder % bytes_per_count == 0, "to must be aligned by bytes_per_count");
+
+ int bytes_to_write = forward ? (wordSize - to_remainder) : to_remainder;
+
+ int offset = 0;
+
+ for (int l = 0; l < LogBytesPerWord; ++l) {
+ int s = (1 << l);
+ if (bytes_to_write & s) {
+ int new_offset = offset + s*BitsPerByte;
+ if (forward) {
+ if (offset == 0) {
+ store_one(Rval, to, s, forward);
+ } else {
+ __ logical_shift_right(tmp, Rval, offset);
+ store_one(tmp, to, s, forward);
+ }
+ } else {
+ __ logical_shift_right(tmp, Rval, BitsPerWord - new_offset);
+ store_one(tmp, to, s, forward);
+ }
+
+ offset = new_offset;
+ }
+ }
+
+ assert (offset == bytes_to_write * BitsPerByte, "all bytes must be copied");
+
+ __ sub_32(count, count, bytes_to_write/bytes_per_count);
+
+ return bytes_to_write / bytes_per_count;
+ }
+
+ // Copies 'count' of elements using shifted copy loop
+ //
+ // Arguments:
+ // from: beginning (if forward) or upper bound (if !forward) of the region to be read
+ // to: beginning (if forward) or upper bound (if !forward) of the region to be written
+ // count: 32-bit int, number of elements to be copied
+ // to_remainder: remainder of dividing 'to' by wordSize
+ // bytes_per_count: size of an element
+ // forward: specifies copy direction
+ // Rval: contains an already read but not yet written word
+ //
+ //
+ // Notes:
+ // 'count' must not be less then the returned value
+ // 'from' must be aligned by wordSize
+ // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
+ // shifts 'to' by the number of copied bytes
+ //
+ // Scratches R3-R10, R12
+ int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, Register Rval,
+ int to_remainder, int bytes_per_count, bool forward) {
+
+ assert (0 < to_remainder && to_remainder < wordSize, "to_remainder is invalid");
+
+ const Register tmp = forward ? R3 : R12; // TODO-AARCH64: on cojoint_short R4 was used for tmp
+ assert_different_registers(from, to, count, Rval, tmp);
+
+ int required_to_align = align_dst(to, count, Rval, tmp, to_remainder, bytes_per_count, forward);
+
+ int lsr_shift = (wordSize - to_remainder) * BitsPerByte;
+ int lsl_shift = to_remainder * BitsPerByte;
+
+ int min_copy;
+ if (forward) {
+ min_copy = generate_forward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
+ } else {
+ min_copy = generate_backward_shifted_copy_loop(from, to, count, bytes_per_count, lsr_shift, lsl_shift);
+ }
+
+ return min_copy + required_to_align;
+ }
+
+ // Copies 'count' of elements using shifted copy loop
+ //
+ // Arguments:
+ // from: beginning (if forward) or upper bound (if !forward) of the region to be read
+ // to: beginning (if forward) or upper bound (if !forward) of the region to be written
+ // count: 32-bit int, number of elements to be copied
+ // bytes_per_count: size of an element
+ // forward: specifies copy direction
+ //
+ // Notes:
+ // 'count' must not be less then the returned value
+ // 'from' must be aligned by wordSize
+ // 'to' must be aligned by bytes_per_count but must not be aligned by wordSize
+ // shifts 'to' by the number of copied bytes
+ //
+ // Scratches 'from', 'count', R3 and R12.
+ // On AArch64 also scratches R4-R10, on 32-bit ARM saves them to use.
+ int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
+
+ const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect
+
+ int min_copy = 0;
+
+ // Note: if {seq} is a sequence of numbers, L{seq} means that if the execution reaches this point,
+ // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
+
+#ifdef AARCH64
+ // TODO-AARCH64: simplify, tune
+
+ load_one(Rval, from, wordSize, forward);
+
+ Label L_loop_finished;
+
+ switch (bytes_per_count) {
+ case 4:
+ min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
+ break;
+ case 2:
+ {
+ Label L2, L4, L6;
+
+ __ tbz(to, 1, L4);
+ __ tbz(to, 2, L2);
+
+ __ BIND(L6);
+ int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L2);
+ int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L4);
+ int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
+
+ min_copy = MAX2(MAX2(min_copy2, min_copy4), min_copy6);
+ break;
+ }
+ case 1:
+ {
+ Label L1, L2, L3, L4, L5, L6, L7;
+ Label L15, L26;
+ Label L246;
+
+ __ tbz(to, 0, L246);
+ __ tbz(to, 1, L15);
+ __ tbz(to, 2, L3);
+
+ __ BIND(L7);
+ int min_copy7 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 7, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L246);
+ __ tbnz(to, 1, L26);
+
+ __ BIND(L4);
+ int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L15);
+ __ tbz(to, 2, L1);
+
+ __ BIND(L5);
+ int min_copy5 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 5, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L3);
+ int min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L26);
+ __ tbz(to, 2, L2);
+
+ __ BIND(L6);
+ int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L1);
+ int min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L2);
+ int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+
+
+ min_copy = MAX2(min_copy1, min_copy2);
+ min_copy = MAX2(min_copy, min_copy3);
+ min_copy = MAX2(min_copy, min_copy4);
+ min_copy = MAX2(min_copy, min_copy5);
+ min_copy = MAX2(min_copy, min_copy6);
+ min_copy = MAX2(min_copy, min_copy7);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ __ BIND(L_loop_finished);
+
+#else
+ __ push(RegisterSet(R4,R10));
+ load_one(Rval, from, wordSize, forward);
+
+ switch (bytes_per_count) {
+ case 2:
+ min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+ break;
+ case 1:
+ {
+ Label L1, L2, L3;
+ int min_copy1, min_copy2, min_copy3;
+
+ Label L_loop_finished;
+
+ if (forward) {
+ __ tbz(to, 0, L2);
+ __ tbz(to, 1, L1);
+
+ __ BIND(L3);
+ min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L1);
+ min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L2);
+ min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+ } else {
+ __ tbz(to, 0, L2);
+ __ tbnz(to, 1, L3);
+
+ __ BIND(L1);
+ min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L3);
+ min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+ __ b(L_loop_finished);
+
+ __ BIND(L2);
+ min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+ }
+
+ min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3);
+
+ __ BIND(L_loop_finished);
+
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+
+ __ pop(RegisterSet(R4,R10));
+#endif // AARCH64
+
+ return min_copy;
+ }
+
+#ifndef PRODUCT
+ int * get_arraycopy_counter(int bytes_per_count) {
+ switch (bytes_per_count) {
+ case 1:
+ return &SharedRuntime::_jbyte_array_copy_ctr;
+ case 2:
+ return &SharedRuntime::_jshort_array_copy_ctr;
+ case 4:
+ return &SharedRuntime::_jint_array_copy_ctr;
+ case 8:
+ return &SharedRuntime::_jlong_array_copy_ctr;
+ default:
+ ShouldNotReachHere();
+ return NULL;
+ }
+ }
+#endif // !PRODUCT
+
+ //
+ // Generate stub for primitive array copy. If "aligned" is true, the
+ // "from" and "to" addresses are assumed to be heapword aligned.
+ //
+ // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
+ // "nooverlap_target" must be specified as the address to jump if they don't.
+ //
+ // Arguments for generated stub:
+ // from: R0
+ // to: R1
+ // count: R2 treated as signed 32-bit int
+ //
+ address generate_primitive_copy(bool aligned, const char * name, bool status, int bytes_per_count, bool disjoint, address nooverlap_target = NULL) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ const Register from = R0; // source array address
+ const Register to = R1; // destination array address
+ const Register count = R2; // elements count
+ const Register tmp1 = R3;
+ const Register tmp2 = R12;
+
+ if (!aligned) {
+ BLOCK_COMMENT("Entry:");
+ }
+
+ __ zap_high_non_significant_bits(R2);
+
+ if (!disjoint) {
+ assert (nooverlap_target != NULL, "must be specified for conjoint case");
+ array_overlap_test(nooverlap_target, exact_log2(bytes_per_count), tmp1, tmp2);
+ }
+
+ inc_counter_np(*get_arraycopy_counter(bytes_per_count), tmp1, tmp2);
+
+ // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
+ // Disjoint case: perform forward copy
+ bool forward = disjoint;
+
+
+ if (!forward) {
+ // Set 'from' and 'to' to upper bounds
+ int log_bytes_per_count = exact_log2(bytes_per_count);
+ __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
+ __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
+ }
+
+ // There are two main copy loop implementations:
+ // *) The huge and complex one applicable only for large enough arrays
+ // *) The small and simple one applicable for any array (but not efficient for large arrays).
+ // Currently "small" implementation is used if and only if the "large" one could not be used.
+ // XXX optim: tune the limit higher ?
+ // Large implementation lower applicability bound is actually determined by
+ // aligned copy loop which require <=7 bytes for src alignment, and 8 words for aligned copy loop.
+ const int small_copy_limit = (8*wordSize + 7) / bytes_per_count;
+
+ Label L_small_array;
+ __ cmp_32(count, small_copy_limit);
+ __ b(L_small_array, le); // TODO-AARCH64: le vs lt
+
+ // Otherwise proceed with large implementation.
+
+ bool from_is_aligned = (bytes_per_count >= 8);
+ if (aligned && forward && (HeapWordSize % 8 == 0)) {
+ // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
+ // then from is aligned by 8
+ from_is_aligned = true;
+ }
+
+ int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
+ assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+
+ // now 'from' is aligned
+
+ bool to_is_aligned = false;
+
+ if (bytes_per_count >= wordSize) {
+ // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
+ to_is_aligned = true;
+ } else {
+ if (aligned && (8 % HeapWordSize == 0) && (HeapWordSize % wordSize == 0)) {
+ // Originally 'from' and 'to' were heapword aligned;
+ // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
+ // so 'to' is also heapword aligned and thus aligned by wordSize.
+ to_is_aligned = true;
+ }
+ }
+
+ Label L_unaligned_dst;
+
+ if (!to_is_aligned) {
+ BLOCK_COMMENT("Check dst alignment:");
+ __ tst(to, wordSize - 1);
+ __ b(L_unaligned_dst, ne); // 'to' is not aligned
+ }
+
+ // 'from' and 'to' are properly aligned
+
+ int min_copy;
+ if (forward) {
+ min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count);
+ } else {
+ min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
+ }
+ assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
+
+ if (status) {
+ __ mov(R0, 0); // OK
+ }
+
+ __ ret();
+
+ {
+ copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */);
+
+ if (status) {
+ __ mov(R0, 0); // OK
+ }
+
+ __ ret();
+ }
+
+ if (! to_is_aligned) {
+ __ BIND(L_unaligned_dst);
+ int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
+ assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
+
+ if (status) {
+ __ mov(R0, 0); // OK
+ }
+
+ __ ret();
+ }
+
+ return start;
+ }
+
+#if INCLUDE_ALL_GCS
+ //
+ // Generate pre-write barrier for array.
+ //
+ // Input:
+ // addr - register containing starting address
+ // count - register containing element count, 32-bit int
+ // callee_saved_regs -
+ // the call must preserve this number of registers: R0, R1, ..., R[callee_saved_regs-1]
+ //
+ // callee_saved_regs must include addr and count
+ // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.
+ void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ if (bs->has_write_ref_pre_barrier()) {
+ assert(bs->has_write_ref_array_pre_opt(),
+ "Else unsupported barrier set.");
+
+ assert( addr->encoding() < callee_saved_regs, "addr must be saved");
+ assert(count->encoding() < callee_saved_regs, "count must be saved");
+
+ BLOCK_COMMENT("PreBarrier");
+
+#ifdef AARCH64
+ callee_saved_regs = round_to(callee_saved_regs, 2);
+ for (int i = 0; i < callee_saved_regs; i += 2) {
+ __ raw_push(as_Register(i), as_Register(i+1));
+ }
+#else
+ RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
+ __ push(saved_regs | R9ifScratched);
+#endif // AARCH64
+
+ if (addr != R0) {
+ assert_different_registers(count, R0);
+ __ mov(R0, addr);
+ }
+#ifdef AARCH64
+ __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t
+#else
+ if (count != R1) {
+ __ mov(R1, count);
+ }
+#endif // AARCH64
+
+ __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
+
+#ifdef AARCH64
+ for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
+ __ raw_pop(as_Register(i), as_Register(i+1));
+ }
+#else
+ __ pop(saved_regs | R9ifScratched);
+#endif // AARCH64
+ }
+ }
+#endif // INCLUDE_ALL_GCS
+
+ //
+ // Generate post-write barrier for array.
+ //
+ // Input:
+ // addr - register containing starting address (can be scratched)
+ // count - register containing element count, 32-bit int (can be scratched)
+ // tmp - scratch register
+ //
+ // Note: LR can be scratched but might be equal to addr, count or tmp
+ // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+ void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
+ assert_different_registers(addr, count, tmp);
+ BarrierSet* bs = Universe::heap()->barrier_set();
+
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCTLogging:
+ {
+ BLOCK_COMMENT("G1PostBarrier");
+ if (addr != R0) {
+ assert_different_registers(count, R0);
+ __ mov(R0, addr);
+ }
+#ifdef AARCH64
+ __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_post takes size_t
+#else
+ if (count != R1) {
+ __ mov(R1, count);
+ }
+#if R9_IS_SCRATCHED
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but is not in
+ // general worth optimizing for the few platforms where R9
+ // is scratched. Note that the optimization might not be to
+ // difficult for this particular call site.
+ __ push(R9);
+#endif
+#endif // !AARCH64
+ __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
+#ifndef AARCH64
+#if R9_IS_SCRATCHED
+ __ pop(R9);
+#endif
+#endif // !AARCH64
+ }
+ break;
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ {
+ BLOCK_COMMENT("CardTablePostBarrier");
+ CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+ Label L_cardtable_loop;
+
+ __ add_ptr_scaled_int32(count, addr, count, LogBytesPerHeapOop);
+ __ sub(count, count, BytesPerHeapOop); // last addr
+
+ __ logical_shift_right(addr, addr, CardTableModRefBS::card_shift);
+ __ logical_shift_right(count, count, CardTableModRefBS::card_shift);
+ __ sub(count, count, addr); // nb of cards
+
+ // warning: Rthread has not been preserved
+ __ mov_address(tmp, (address) ct->byte_map_base, symbolic_Relocation::card_table_reference);
+ __ add(addr,tmp, addr);
+
+ Register zero = __ zero_register(tmp);
+
+ __ BIND(L_cardtable_loop);
+ __ strb(zero, Address(addr, 1, post_indexed));
+ __ subs(count, count, 1);
+ __ b(L_cardtable_loop, ge);
+ }
+ break;
+ case BarrierSet::ModRef:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ // Generates pattern of code to be placed after raw data copying in generate_oop_copy
+ // Includes return from arraycopy stub.
+ //
+ // Arguments:
+ // to: destination pointer after copying.
+ // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
+ // count: total number of copied elements, 32-bit int
+ //
+ // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
+ void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {
+ assert_different_registers(to, count, tmp);
+
+ if (forward) {
+ // 'to' is upper bound of the modified region
+ // restore initial dst:
+ __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
+ }
+
+ // 'to' is the beginning of the region
+
+ gen_write_ref_array_post_barrier(to, count, tmp);
+
+ if (status) {
+ __ mov(R0, 0); // OK
+ }
+
+#ifdef AARCH64
+ __ raw_pop(LR, ZR);
+ __ ret();
+#else
+ __ pop(PC);
+#endif // AARCH64
+ }
+
+
+ // Generate stub for assign-compatible oop copy. If "aligned" is true, the
+ // "from" and "to" addresses are assumed to be heapword aligned.
+ //
+ // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
+ // "nooverlap_target" must be specified as the address to jump if they don't.
+ //
+ // Arguments for generated stub:
+ // from: R0
+ // to: R1
+ // count: R2 treated as signed 32-bit int
+ //
+ address generate_oop_copy(bool aligned, const char * name, bool status, bool disjoint, address nooverlap_target = NULL) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ Register from = R0;
+ Register to = R1;
+ Register count = R2;
+ Register tmp1 = R3;
+ Register tmp2 = R12;
+
+
+ if (!aligned) {
+ BLOCK_COMMENT("Entry:");
+ }
+
+ __ zap_high_non_significant_bits(R2);
+
+ if (!disjoint) {
+ assert (nooverlap_target != NULL, "must be specified for conjoint case");
+ array_overlap_test(nooverlap_target, LogBytesPerHeapOop, tmp1, tmp2);
+ }
+
+ inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
+
+ // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
+ // Disjoint case: perform forward copy
+ bool forward = disjoint;
+
+ const int bytes_per_count = BytesPerHeapOop;
+ const int log_bytes_per_count = LogBytesPerHeapOop;
+
+ const Register saved_count = LR;
+ const int callee_saved_regs = 3; // R0-R2
+
+ // LR is used later to save barrier args
+#ifdef AARCH64
+ __ raw_push(LR, ZR);
+#else
+ __ push(LR);
+#endif // AARCH64
+
+#if INCLUDE_ALL_GCS
+ gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
+#endif // INCLUDE_ALL_GCS
+
+ // save arguments for barrier generation (after the pre barrier)
+ __ mov(saved_count, count);
+
+ if (!forward) {
+ __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
+ __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
+ }
+
+ // for short arrays, just do single element copy
+ Label L_small_array;
+ const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
+ __ cmp_32(count, small_copy_limit);
+ __ b(L_small_array, le);
+
+ bool from_is_aligned = (bytes_per_count >= 8);
+ if (aligned && forward && (HeapWordSize % 8 == 0)) {
+ // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
+ // then from is aligned by 8
+ from_is_aligned = true;
+ }
+
+ int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
+ assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+
+ // now 'from' is aligned
+
+ bool to_is_aligned = false;
+
+ if (bytes_per_count >= wordSize) {
+ // 'to' is aligned by bytes_per_count, so it is aligned by wordSize
+ to_is_aligned = true;
+ } else {
+ if (aligned && (8 % HeapWordSize == 0) && (HeapWordSize % wordSize == 0)) {
+ // Originally 'from' and 'to' were heapword aligned;
+ // (from - to) has not been changed, so since now 'from' is 8-byte aligned, then it is also heapword aligned,
+ // so 'to' is also heapword aligned and thus aligned by wordSize.
+ to_is_aligned = true;
+ }
+ }
+
+ Label L_unaligned_dst;
+
+ if (!to_is_aligned) {
+ BLOCK_COMMENT("Check dst alignment:");
+ __ tst(to, wordSize - 1);
+ __ b(L_unaligned_dst, ne); // 'to' is not aligned
+ }
+
+ int min_copy;
+ if (forward) {
+ min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
+ } else {
+ min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
+ }
+ assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
+
+ oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
+
+ {
+ copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
+
+ oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
+ }
+
+ if (!to_is_aligned) {
+ // !to_is_aligned <=> UseCompressedOops && AArch64
+ __ BIND(L_unaligned_dst);
+#ifdef AARCH64
+ assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
+#else
+ ShouldNotReachHere();
+#endif // AARCH64
+ int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
+ assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
+
+ oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
+ }
+
+ return start;
+ }
+
+ // Generate 'unsafe' array copy stub
+ // Though just as safe as the other stubs, it takes an unscaled
+ // size_t argument instead of an element count.
+ //
+ // Arguments for generated stub:
+ // from: R0
+ // to: R1
+ // count: R2 byte count, treated as ssize_t, can be zero
+ //
+ // Examines the alignment of the operands and dispatches
+ // to a long, int, short, or byte copy loop.
+ //
+ address generate_unsafe_copy(const char* name) {
+
+ const Register R0_from = R0; // source array address
+ const Register R1_to = R1; // destination array address
+ const Register R2_count = R2; // elements count
+
+ const Register R3_bits = R3; // test copy of low bits
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+#ifdef AARCH64
+ __ NOT_IMPLEMENTED();
+ start = NULL;
+#else
+ const Register tmp = Rtemp;
+
+ // bump this on entry, not on exit:
+ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, R3, tmp);
+
+ __ orr(R3_bits, R0_from, R1_to);
+ __ orr(R3_bits, R2_count, R3_bits);
+
+ __ tst(R3_bits, BytesPerLong-1);
+ __ mov(R2_count,AsmOperand(R2_count,asr,LogBytesPerLong), eq);
+ __ jump(StubRoutines::_jlong_arraycopy, relocInfo::runtime_call_type, tmp, eq);
+
+ __ tst(R3_bits, BytesPerInt-1);
+ __ mov(R2_count,AsmOperand(R2_count,asr,LogBytesPerInt), eq);
+ __ jump(StubRoutines::_jint_arraycopy, relocInfo::runtime_call_type, tmp, eq);
+
+ __ tst(R3_bits, BytesPerShort-1);
+ __ mov(R2_count,AsmOperand(R2_count,asr,LogBytesPerShort), eq);
+ __ jump(StubRoutines::_jshort_arraycopy, relocInfo::runtime_call_type, tmp, eq);
+
+ __ jump(StubRoutines::_jbyte_arraycopy, relocInfo::runtime_call_type, tmp);
+#endif
+ return start;
+ }
+
+ // Helper for generating a dynamic type check.
+ // Smashes only the given temp registers.
+ void generate_type_check(Register sub_klass,
+ Register super_check_offset,
+ Register super_klass,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ Label& L_success) {
+ assert_different_registers(sub_klass, super_check_offset, super_klass, tmp1, tmp2, tmp3);
+
+ BLOCK_COMMENT("type_check:");
+
+ // If the pointers are equal, we are done (e.g., String[] elements).
+
+ __ cmp(super_klass, sub_klass);
+ __ b(L_success, eq); // fast success
+
+
+ Label L_loop, L_fail;
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+
+ // Check the supertype display:
+ __ ldr(tmp1, Address(sub_klass, super_check_offset));
+ __ cmp(tmp1, super_klass);
+ __ b(L_success, eq);
+
+ __ cmp(super_check_offset, sc_offset);
+ __ b(L_fail, ne); // failure
+
+ BLOCK_COMMENT("type_check_slow_path:");
+
+ // a couple of useful fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+
+ // Do a linear scan of the secondary super-klass chain.
+
+#ifndef PRODUCT
+ int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+ __ inc_counter((address) pst_counter, tmp1, tmp2);
+#endif
+
+ Register scan_temp = tmp1;
+ Register count_temp = tmp2;
+
+ // We will consult the secondary-super array.
+ __ ldr(scan_temp, Address(sub_klass, ss_offset));
+
+ Register search_key = super_klass;
+
+ // Load the array length.
+ __ ldr_s32(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
+ __ add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
+
+ __ add(count_temp, count_temp, 1);
+
+ // Top of search loop
+ __ bind(L_loop);
+ // Notes:
+ // scan_temp starts at the array elements
+ // count_temp is 1+size
+
+ __ subs(count_temp, count_temp, 1);
+ __ b(L_fail, eq); // not found
+
+ // Load next super to check
+ // In the array of super classes elements are pointer sized.
+ int element_size = wordSize;
+ __ ldr(tmp3, Address(scan_temp, element_size, post_indexed));
+
+ // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+ __ cmp(tmp3, search_key);
+
+ // A miss means we are NOT a subtype and need to keep looping
+ __ b(L_loop, ne);
+
+ // Falling out the bottom means we found a hit; we ARE a subtype
+
+ // Success. Cache the super we found and proceed in triumph.
+ __ str(super_klass, Address(sub_klass, sc_offset));
+
+ // Jump to success
+ __ b(L_success);
+
+ // Fall through on failure!
+ __ bind(L_fail);
+ }
+
+ // Generate stub for checked oop copy.
+ //
+ // Arguments for generated stub:
+ // from: R0
+ // to: R1
+ // count: R2 treated as signed 32-bit int
+ // ckoff: R3 (super_check_offset)
+ // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
+ // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
+ //
+ address generate_checkcast_copy(const char * name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ const Register from = R0; // source array address
+ const Register to = R1; // destination array address
+ const Register count = R2; // elements count
+
+ const Register R3_ckoff = R3; // super_check_offset
+ const Register R4_ckval = R4; // super_klass
+
+ const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
+
+ Label load_element, store_element, do_card_marks, fail;
+
+ BLOCK_COMMENT("Entry:");
+
+ __ zap_high_non_significant_bits(R2);
+
+#ifdef AARCH64
+ __ raw_push(LR, ZR);
+ __ raw_push(R19, R20);
+#else
+ int pushed = 0;
+ __ push(LR);
+ pushed+=1;
+#endif // AARCH64
+
+#if INCLUDE_ALL_GCS
+ gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
+#endif // INCLUDE_ALL_GCS
+
+#ifndef AARCH64
+ const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
+ __ push(caller_saved_regs);
+ assert(caller_saved_regs.size() == 6, "check the count");
+ pushed+=6;
+
+ __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
+#endif // !AARCH64
+
+ // Save arguments for barrier generation (after the pre barrier):
+ // - must be a caller saved register and not LR
+ // - ARM32: avoid R10 in case RThread is needed
+ const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
+#ifdef AARCH64
+ __ mov_w(saved_count, count);
+ __ cbnz_w(count, load_element); // and test count
+#else
+ __ movs(saved_count, count); // and test count
+ __ b(load_element,ne);
+#endif // AARCH64
+
+ // nothing to copy
+ __ mov(R0, 0);
+
+#ifdef AARCH64
+ __ raw_pop(R19, R20);
+ __ raw_pop(LR, ZR);
+ __ ret();
+#else
+ __ pop(caller_saved_regs);
+ __ pop(PC);
+#endif // AARCH64
+
+ // ======== begin loop ========
+ // (Loop is rotated; its entry is load_element.)
+ __ align(OptoLoopAlignment);
+ __ BIND(store_element);
+ if (UseCompressedOops) {
+ __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
+ __ subs_32(count,count,1);
+ } else {
+ __ subs_32(count,count,1);
+ __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
+ }
+ __ b(do_card_marks, eq); // count exhausted
+
+ // ======== loop entry is here ========
+ __ BIND(load_element);
+ __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed)); // load the oop
+ __ cbz(R5, store_element); // NULL
+
+ __ load_klass(R6, R5);
+
+ generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
+ // branch to this on success:
+ store_element);
+ // ======== end loop ========
+
+ // It was a real error; we must depend on the caller to finish the job.
+ // Register count has number of *remaining* oops, saved_count number of *total* oops.
+ // Emit GC store barriers for the oops we have copied
+ // and report their number to the caller (0 or (-1^n))
+ __ BIND(fail);
+
+ // Note: fail marked by the fact that count differs from saved_count
+
+ __ BIND(do_card_marks);
+
+ Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
+ Label L_not_copied;
+
+ __ subs_32(copied, saved_count, count); // copied count (in saved reg)
+ __ b(L_not_copied, eq); // nothing was copied, skip post barrier
+ __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
+ __ mov(R12, copied); // count arg scratched by post barrier
+
+ gen_write_ref_array_post_barrier(to, R12, R3);
+
+ assert_different_registers(R3,R12,LR,copied,saved_count);
+ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
+
+ __ BIND(L_not_copied);
+ __ cmp_32(copied, saved_count); // values preserved in saved registers
+
+#ifdef AARCH64
+ __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
+ __ raw_pop(R19, R20);
+ __ raw_pop(LR, ZR);
+ __ ret();
+#else
+ __ mov(R0, 0, eq); // 0 if all copied
+ __ mvn(R0, copied, ne); // else NOT(copied)
+ __ pop(caller_saved_regs);
+ __ pop(PC);
+#endif // AARCH64
+
+ return start;
+ }
+
+ // Perform range checks on the proposed arraycopy.
+ // Kills the two temps, but nothing else.
+ void arraycopy_range_checks(Register src, // source array oop
+ Register src_pos, // source position (32-bit int)
+ Register dst, // destination array oop
+ Register dst_pos, // destination position (32-bit int)
+ Register length, // length of copy (32-bit int)
+ Register temp1, Register temp2,
+ Label& L_failed) {
+
+ BLOCK_COMMENT("arraycopy_range_checks:");
+
+ // if (src_pos + length > arrayOop(src)->length() ) FAIL;
+
+ const Register array_length = temp1; // scratch
+ const Register end_pos = temp2; // scratch
+
+ __ add_32(end_pos, length, src_pos); // src_pos + length
+ __ ldr_s32(array_length, Address(src, arrayOopDesc::length_offset_in_bytes()));
+ __ cmp_32(end_pos, array_length);
+ __ b(L_failed, hi);
+
+ // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
+ __ add_32(end_pos, length, dst_pos); // dst_pos + length
+ __ ldr_s32(array_length, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+ __ cmp_32(end_pos, array_length);
+ __ b(L_failed, hi);
+
+ BLOCK_COMMENT("arraycopy_range_checks done");
+ }
+
+ //
+ // Generate generic array copy stubs
+ //
+ // Input:
+ // R0 - src oop
+ // R1 - src_pos (32-bit int)
+ // R2 - dst oop
+ // R3 - dst_pos (32-bit int)
+ // R4 (AArch64) / SP[0] (32-bit ARM) - element count (32-bit int)
+ //
+ // Output: (32-bit int)
+ // R0 == 0 - success
+ // R0 < 0 - need to call System.arraycopy
+ //
+ address generate_generic_copy(const char *name) {
+ Label L_failed, L_objArray;
+
+ // Input registers
+ const Register src = R0; // source array oop
+ const Register src_pos = R1; // source position
+ const Register dst = R2; // destination array oop
+ const Register dst_pos = R3; // destination position
+
+ // registers used as temp
+ const Register R5_src_klass = R5; // source array klass
+ const Register R6_dst_klass = R6; // destination array klass
+ const Register R_lh = AARCH64_ONLY(R7) NOT_AARCH64(altFP_7_11); // layout handler
+ const Register R8_temp = R8;
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ __ zap_high_non_significant_bits(R1);
+ __ zap_high_non_significant_bits(R3);
+ __ zap_high_non_significant_bits(R4);
+
+#ifndef AARCH64
+ int pushed = 0;
+ const RegisterSet saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
+ __ push(saved_regs);
+ assert(saved_regs.size() == 6, "check the count");
+ pushed+=6;
+#endif // !AARCH64
+
+ // bump this on entry, not on exit:
+ inc_counter_np(SharedRuntime::_generic_array_copy_ctr, R5, R12);
+
+ const Register length = R4; // elements count
+#ifndef AARCH64
+ __ ldr(length, Address(SP,4*pushed));
+#endif // !AARCH64
+
+
+ //-----------------------------------------------------------------------
+ // Assembler stubs will be used for this call to arraycopy
+ // if the following conditions are met:
+ //
+ // (1) src and dst must not be null.
+ // (2) src_pos must not be negative.
+ // (3) dst_pos must not be negative.
+ // (4) length must not be negative.
+ // (5) src klass and dst klass should be the same and not NULL.
+ // (6) src and dst should be arrays.
+ // (7) src_pos + length must not exceed length of src.
+ // (8) dst_pos + length must not exceed length of dst.
+ BLOCK_COMMENT("arraycopy initial argument checks");
+
+ // if (src == NULL) return -1;
+ __ cbz(src, L_failed);
+
+ // if (src_pos < 0) return -1;
+ __ cmp_32(src_pos, 0);
+ __ b(L_failed, lt);
+
+ // if (dst == NULL) return -1;
+ __ cbz(dst, L_failed);
+
+ // if (dst_pos < 0) return -1;
+ __ cmp_32(dst_pos, 0);
+ __ b(L_failed, lt);
+
+ // if (length < 0) return -1;
+ __ cmp_32(length, 0);
+ __ b(L_failed, lt);
+
+ BLOCK_COMMENT("arraycopy argument klass checks");
+ // get src->klass()
+ __ load_klass(R5_src_klass, src);
+
+ // Load layout helper
+ //
+ // |array_tag| | header_size | element_type | |log2_element_size|
+ // 32 30 24 16 8 2 0
+ //
+ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+ //
+
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
+ __ ldr_u32(R_lh, Address(R5_src_klass, lh_offset));
+
+ __ load_klass(R6_dst_klass, dst);
+
+ // Handle objArrays completely differently...
+ juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+ __ mov_slow(R8_temp, objArray_lh);
+ __ cmp_32(R_lh, R8_temp);
+ __ b(L_objArray,eq);
+
+ // if (src->klass() != dst->klass()) return -1;
+ __ cmp(R5_src_klass, R6_dst_klass);
+ __ b(L_failed, ne);
+
+ // if (!src->is_Array()) return -1;
+ __ cmp_32(R_lh, Klass::_lh_neutral_value); // < 0
+ __ b(L_failed, ge);
+
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+ R8_temp, R6_dst_klass, L_failed);
+
+ {
+ // TypeArrayKlass
+ //
+ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
+ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
+ //
+
+ const Register R6_offset = R6_dst_klass; // array offset
+ const Register R12_elsize = R12; // log2 element size
+
+ __ logical_shift_right(R6_offset, R_lh, Klass::_lh_header_size_shift);
+ __ andr(R6_offset, R6_offset, (unsigned int)Klass::_lh_header_size_mask); // array_offset
+ __ add(src, src, R6_offset); // src array offset
+ __ add(dst, dst, R6_offset); // dst array offset
+ __ andr(R12_elsize, R_lh, (unsigned int)Klass::_lh_log2_element_size_mask); // log2 element size
+
+ // next registers should be set before the jump to corresponding stub
+ const Register from = R0; // source array address
+ const Register to = R1; // destination array address
+ const Register count = R2; // elements count
+
+ // 'from', 'to', 'count' registers should be set in this order
+ // since they are the same as 'src', 'src_pos', 'dst'.
+
+#ifdef AARCH64
+
+ BLOCK_COMMENT("choose copy loop based on element size and scale indexes");
+ Label Lbyte, Lshort, Lint, Llong;
+
+ __ cbz(R12_elsize, Lbyte);
+
+ assert (LogBytesPerShort < LogBytesPerInt && LogBytesPerInt < LogBytesPerLong, "must be");
+ __ cmp(R12_elsize, LogBytesPerInt);
+ __ b(Lint, eq);
+ __ b(Llong, gt);
+
+ __ BIND(Lshort);
+ __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerShort);
+ __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerShort);
+ __ mov(count, length);
+ __ b(StubRoutines::_jshort_arraycopy);
+
+ __ BIND(Lint);
+ __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerInt);
+ __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerInt);
+ __ mov(count, length);
+ __ b(StubRoutines::_jint_arraycopy);
+
+ __ BIND(Lbyte);
+ __ add_ptr_scaled_int32(from, src, src_pos, 0);
+ __ add_ptr_scaled_int32(to, dst, dst_pos, 0);
+ __ mov(count, length);
+ __ b(StubRoutines::_jbyte_arraycopy);
+
+ __ BIND(Llong);
+ __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerLong);
+ __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerLong);
+ __ mov(count, length);
+ __ b(StubRoutines::_jlong_arraycopy);
+
+#else // AARCH64
+
+ BLOCK_COMMENT("scale indexes to element size");
+ __ add(from, src, AsmOperand(src_pos, lsl, R12_elsize)); // src_addr
+ __ add(to, dst, AsmOperand(dst_pos, lsl, R12_elsize)); // dst_addr
+
+ __ mov(count, length); // length
+
+ // XXX optim: avoid later push in arraycopy variants ?
+
+ __ pop(saved_regs);
+
+ BLOCK_COMMENT("choose copy loop based on element size");
+ __ cmp(R12_elsize, 0);
+ __ b(StubRoutines::_jbyte_arraycopy,eq);
+
+ __ cmp(R12_elsize, LogBytesPerShort);
+ __ b(StubRoutines::_jshort_arraycopy,eq);
+
+ __ cmp(R12_elsize, LogBytesPerInt);
+ __ b(StubRoutines::_jint_arraycopy,eq);
+
+ __ b(StubRoutines::_jlong_arraycopy);
+
+#endif // AARCH64
+ }
+
+ // ObjArrayKlass
+ __ BIND(L_objArray);
+ // live at this point: R5_src_klass, R6_dst_klass, src[_pos], dst[_pos], length
+
+ Label L_plain_copy, L_checkcast_copy;
+ // test array classes for subtyping
+ __ cmp(R5_src_klass, R6_dst_klass); // usual case is exact equality
+ __ b(L_checkcast_copy, ne);
+
+ BLOCK_COMMENT("Identically typed arrays");
+ {
+ // Identically typed arrays can be copied without element-wise checks.
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+ R8_temp, R_lh, L_failed);
+
+ // next registers should be set before the jump to corresponding stub
+ const Register from = R0; // source array address
+ const Register to = R1; // destination array address
+ const Register count = R2; // elements count
+
+ __ add(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+ __ add(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+ __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerHeapOop); // src_addr
+ __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop); // dst_addr
+ __ BIND(L_plain_copy);
+ __ mov(count, length);
+
+#ifndef AARCH64
+ __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ?
+#endif // !AARCH64
+ __ b(StubRoutines::_oop_arraycopy);
+ }
+
+ {
+ __ BIND(L_checkcast_copy);
+ // live at this point: R5_src_klass, R6_dst_klass
+
+ // Before looking at dst.length, make sure dst is also an objArray.
+ __ ldr_u32(R8_temp, Address(R6_dst_klass, lh_offset));
+ __ cmp_32(R_lh, R8_temp);
+ __ b(L_failed, ne);
+
+ // It is safe to examine both src.length and dst.length.
+
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+ R8_temp, R_lh, L_failed);
+
+ // next registers should be set before the jump to corresponding stub
+ const Register from = R0; // source array address
+ const Register to = R1; // destination array address
+ const Register count = R2; // elements count
+
+ // Marshal the base address arguments now, freeing registers.
+ __ add(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+ __ add(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+ __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerHeapOop); // src_addr
+ __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerHeapOop); // dst_addr
+
+ __ mov(count, length); // length (reloaded)
+
+ Register sco_temp = R3; // this register is free now
+ assert_different_registers(from, to, count, sco_temp,
+ R6_dst_klass, R5_src_klass);
+
+ // Generate the type check.
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ __ ldr_u32(sco_temp, Address(R6_dst_klass, sco_offset));
+ generate_type_check(R5_src_klass, sco_temp, R6_dst_klass,
+ R8_temp, R9,
+ AARCH64_ONLY(R10) NOT_AARCH64(R12),
+ L_plain_copy);
+
+ // Fetch destination element klass from the ObjArrayKlass header.
+ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+
+ // the checkcast_copy loop needs two extra arguments:
+ const Register Rdst_elem_klass = AARCH64_ONLY(R4) NOT_AARCH64(R3);
+ __ ldr(Rdst_elem_klass, Address(R6_dst_klass, ek_offset)); // dest elem klass
+#ifndef AARCH64
+ __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ?
+ __ str(Rdst_elem_klass, Address(SP,0)); // dest elem klass argument
+#endif // !AARCH64
+ __ ldr_u32(R3, Address(Rdst_elem_klass, sco_offset)); // sco of elem klass
+ __ b(StubRoutines::_checkcast_arraycopy);
+ }
+
+ __ BIND(L_failed);
+
+#ifndef AARCH64
+ __ pop(saved_regs);
+#endif // !AARCH64
+ __ mvn(R0, 0); // failure, with 0 copied
+ __ ret();
+
+ return start;
+ }
+
+ // Safefetch stubs.
+ void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
+ // safefetch signatures:
+ // int SafeFetch32(int* adr, int errValue);
+ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+ //
+ // arguments:
+ // R0 = adr
+ // R1 = errValue
+ //
+ // result:
+ // R0 = *adr or errValue
+
+ StubCodeMark mark(this, "StubRoutines", name);
+
+ // Entry point, pc or function descriptor.
+ *entry = __ pc();
+
+ // Load *adr into c_rarg2, may fault.
+ *fault_pc = __ pc();
+
+ switch (size) {
+ case 4: // int32_t
+ __ ldr_s32(R1, Address(R0));
+ break;
+
+ case 8: // int64_t
+#ifdef AARCH64
+ __ ldr(R1, Address(R0));
+#else
+ Unimplemented();
+#endif // AARCH64
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ // return errValue or *adr
+ *continuation_pc = __ pc();
+ __ mov(R0, R1);
+ __ ret();
+ }
+
+ void generate_arraycopy_stubs() {
+
+ // Note: the disjoint stubs must be generated first, some of
+ // the conjoint stubs use them.
+
+ bool status = false; // non failing C2 stubs need not return a status in R0
+
+#ifdef TEST_C2_GENERIC_ARRAYCOPY /* Internal development flag */
+ // With this flag, the C2 stubs are tested by generating calls to
+ // generic_arraycopy instead of Runtime1::arraycopy
+
+ // Runtime1::arraycopy return a status in R0 (0 if OK, else ~copied)
+ // and the result is tested to see whether the arraycopy stub should
+ // be called.
+
+ // When we test arraycopy this way, we must generate extra code in the
+ // arraycopy methods callable from C2 generic_arraycopy to set the
+ // status to 0 for those who always succeed (calling the slow path stub might
+ // lead to errors since the copy has already been performed).
+
+ status = true; // generate a status compatible with C1 calls
+#endif
+
+ // these need always status in case they are called from generic_arraycopy
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_primitive_copy(false, "jbyte_disjoint_arraycopy", true, 1, true);
+ StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(false, "jshort_disjoint_arraycopy", true, 2, true);
+ StubRoutines::_jint_disjoint_arraycopy = generate_primitive_copy(false, "jint_disjoint_arraycopy", true, 4, true);
+ StubRoutines::_jlong_disjoint_arraycopy = generate_primitive_copy(false, "jlong_disjoint_arraycopy", true, 8, true);
+ StubRoutines::_oop_disjoint_arraycopy = generate_oop_copy (false, "oop_disjoint_arraycopy", true, true);
+
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_primitive_copy(true, "arrayof_jbyte_disjoint_arraycopy", status, 1, true);
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_primitive_copy(true, "arrayof_jshort_disjoint_arraycopy",status, 2, true);
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_primitive_copy(true, "arrayof_jint_disjoint_arraycopy", status, 4, true);
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_primitive_copy(true, "arrayof_jlong_disjoint_arraycopy", status, 8, true);
+ StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_oop_copy (true, "arrayof_oop_disjoint_arraycopy", status, true);
+
+ // these need always status in case they are called from generic_arraycopy
+ StubRoutines::_jbyte_arraycopy = generate_primitive_copy(false, "jbyte_arraycopy", true, 1, false, StubRoutines::_jbyte_disjoint_arraycopy);
+ StubRoutines::_jshort_arraycopy = generate_primitive_copy(false, "jshort_arraycopy", true, 2, false, StubRoutines::_jshort_disjoint_arraycopy);
+ StubRoutines::_jint_arraycopy = generate_primitive_copy(false, "jint_arraycopy", true, 4, false, StubRoutines::_jint_disjoint_arraycopy);
+ StubRoutines::_jlong_arraycopy = generate_primitive_copy(false, "jlong_arraycopy", true, 8, false, StubRoutines::_jlong_disjoint_arraycopy);
+ StubRoutines::_oop_arraycopy = generate_oop_copy (false, "oop_arraycopy", true, false, StubRoutines::_oop_disjoint_arraycopy);
+
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_primitive_copy(true, "arrayof_jbyte_arraycopy", status, 1, false, StubRoutines::_arrayof_jbyte_disjoint_arraycopy);
+ StubRoutines::_arrayof_jshort_arraycopy = generate_primitive_copy(true, "arrayof_jshort_arraycopy", status, 2, false, StubRoutines::_arrayof_jshort_disjoint_arraycopy);
+#ifdef _LP64
+ // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
+ StubRoutines::_arrayof_jint_arraycopy = generate_primitive_copy(true, "arrayof_jint_arraycopy", status, 4, false, StubRoutines::_arrayof_jint_disjoint_arraycopy);
+#else
+ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
+#endif
+ if (BytesPerHeapOop < HeapWordSize) {
+ StubRoutines::_arrayof_oop_arraycopy = generate_oop_copy (true, "arrayof_oop_arraycopy", status, false, StubRoutines::_arrayof_oop_disjoint_arraycopy);
+ } else {
+ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
+ }
+ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
+
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
+ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
+
+
+ }
+
+#ifndef AARCH64
+#define COMPILE_CRYPTO
+#include "stubRoutinesCrypto_arm.cpp"
+#else
+
+#ifdef COMPILER2
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_encryptBlock() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+
+ Label L_doLast;
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = R8;
+
+ address start = __ pc();
+ __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
+ __ mov(FP, SP);
+
+ __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input
+
+ __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+
+ int quad = 1;
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
+ __ aese(V0, V1);
+ __ aesmc(V0, V0);
+ __ aese(V0, V2);
+ __ aesmc(V0, V0);
+ __ aese(V0, V3);
+ __ aesmc(V0, V0);
+ __ aese(V0, V4);
+ __ aesmc(V0, V0);
+
+ __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
+ __ aese(V0, V1);
+ __ aesmc(V0, V0);
+ __ aese(V0, V2);
+ __ aesmc(V0, V0);
+ __ aese(V0, V3);
+ __ aesmc(V0, V0);
+ __ aese(V0, V4);
+ __ aesmc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ cmp_w(keylen, 44);
+ __ b(L_doLast, eq);
+
+ __ aese(V0, V1);
+ __ aesmc(V0, V0);
+ __ aese(V0, V2);
+ __ aesmc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ cmp_w(keylen, 52);
+ __ b(L_doLast, eq);
+
+ __ aese(V0, V1);
+ __ aesmc(V0, V0);
+ __ aese(V0, V2);
+ __ aesmc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ BIND(L_doLast);
+
+ __ aese(V0, V1);
+ __ aesmc(V0, V0);
+ __ aese(V0, V2);
+
+ __ vld1(V1, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ mov(R0, 0);
+
+ __ mov(SP, FP);
+ __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
+ __ ret(LR);
+
+ return start;
+ }
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_decryptBlock() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+ Label L_doLast;
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = R8;
+
+ address start = __ pc();
+ __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
+ __ mov(FP, SP);
+
+ __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input
+
+ __ vld1(V5, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+
+ int quad = 1;
+ __ rev32(V5, V5, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
+ __ aesd(V0, V1);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V2);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V3);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V4);
+ __ aesimc(V0, V0);
+
+ __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
+ __ aesd(V0, V1);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V2);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V3);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V4);
+ __ aesimc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ cmp_w(keylen, 44);
+ __ b(L_doLast, eq);
+
+ __ aesd(V0, V1);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V2);
+ __ aesimc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ cmp_w(keylen, 52);
+ __ b(L_doLast, eq);
+
+ __ aesd(V0, V1);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V2);
+ __ aesimc(V0, V0);
+
+ __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ BIND(L_doLast);
+
+ __ aesd(V0, V1);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V2);
+
+ __ eor(V0, V0, V5, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ mov(R0, 0);
+
+ __ mov(SP, FP);
+ __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
+ __ ret(LR);
+
+
+ return start;
+ }
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ // Output:
+ // x0 - input length
+ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+
+ Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+ const Register keylen = R8;
+
+ address start = __ pc();
+ __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
+ __ mov(FP, SP);
+
+ __ mov(R9, len_reg);
+ __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ vld1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ cmp_w(keylen, 52);
+ __ b(L_loadkeys_44, cc);
+ __ b(L_loadkeys_52, eq);
+
+ __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+
+ int quad = 1;
+ __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad);
+ __ BIND(L_loadkeys_52);
+ __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad);
+ __ BIND(L_loadkeys_44);
+ __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad);
+ __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad);
+ __ vld1(V29, V30, V31, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ BIND(L_aes_loop);
+ __ vld1(V1, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ b(L_rounds_44, cc);
+ __ b(L_rounds_52, eq);
+
+ __ aese(V0, V17);
+ __ aesmc(V0, V0);
+ __ aese(V0, V18);
+ __ aesmc(V0, V0);
+ __ BIND(L_rounds_52);
+ __ aese(V0, V19);
+ __ aesmc(V0, V0);
+ __ aese(V0, V20);
+ __ aesmc(V0, V0);
+ __ BIND(L_rounds_44);
+ __ aese(V0, V21);
+ __ aesmc(V0, V0);
+ __ aese(V0, V22);
+ __ aesmc(V0, V0);
+ __ aese(V0, V23);
+ __ aesmc(V0, V0);
+ __ aese(V0, V24);
+ __ aesmc(V0, V0);
+ __ aese(V0, V25);
+ __ aesmc(V0, V0);
+ __ aese(V0, V26);
+ __ aesmc(V0, V0);
+ __ aese(V0, V27);
+ __ aesmc(V0, V0);
+ __ aese(V0, V28);
+ __ aesmc(V0, V0);
+ __ aese(V0, V29);
+ __ aesmc(V0, V0);
+ __ aese(V0, V30);
+ __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ sub(len_reg, len_reg, 16);
+ __ cbnz(len_reg, L_aes_loop);
+
+ __ vst1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ mov(R0, R9);
+
+ __ mov(SP, FP);
+ __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
+ __ ret(LR);
+
+ return start;
+ }
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ // Output:
+ // rax - input length
+ //
+ address generate_cipherBlockChaining_decryptAESCrypt() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+
+ Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+ const Register keylen = R8;
+
+ address start = __ pc();
+ __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
+ __ mov(FP, SP);
+
+ __ mov(R9, len_reg);
+ __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ vld1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ vld1(V31, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+
+ int quad = 1;
+ __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ cmp_w(keylen, 52);
+ __ b(L_loadkeys_44, cc);
+ __ b(L_loadkeys_52, eq);
+
+ __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad);
+ __ BIND(L_loadkeys_52);
+ __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad);
+ __ BIND(L_loadkeys_44);
+ __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad);
+ __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad);
+ __ vld1(V29, V30, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
+ __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad);
+ __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ BIND(L_aes_loop);
+ __ vld1(V0, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ orr(V1, V0, V0, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ b(L_rounds_44, cc);
+ __ b(L_rounds_52, eq);
+
+ __ aesd(V0, V17);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V17);
+ __ aesimc(V0, V0);
+ __ BIND(L_rounds_52);
+ __ aesd(V0, V19);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V20);
+ __ aesimc(V0, V0);
+ __ BIND(L_rounds_44);
+ __ aesd(V0, V21);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V22);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V23);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V24);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V25);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V26);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V27);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V28);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V29);
+ __ aesimc(V0, V0);
+ __ aesd(V0, V30);
+ __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad);
+ __ eor(V0, V0, V2, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
+ __ orr(V2, V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
+
+ __ sub(len_reg, len_reg, 16);
+ __ cbnz(len_reg, L_aes_loop);
+
+ __ vst1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
+
+ __ mov(R0, R9);
+
+ __ mov(SP, FP);
+ __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
+ __ ret(LR);
+
+ return start;
+ }
+
+#endif // COMPILER2
+#endif // AARCH64
+
+ private:
+
+#undef __
+#define __ masm->
+
+ //------------------------------------------------------------------------------------------------------------------------
+ // Continuation point for throwing of implicit exceptions that are not handled in
+ // the current activation. Fabricates an exception oop and initiates normal
+ // exception dispatching in this frame.
+ address generate_throw_exception(const char* name, address runtime_entry) {
+ int insts_size = 128;
+ int locs_size = 32;
+ CodeBuffer code(name, insts_size, locs_size);
+ OopMapSet* oop_maps;
+ int frame_size;
+ int frame_complete;
+
+ oop_maps = new OopMapSet();
+ MacroAssembler* masm = new MacroAssembler(&code);
+
+ address start = __ pc();
+
+ frame_size = 2;
+ __ mov(Rexception_pc, LR);
+ __ raw_push(FP, LR);
+
+ frame_complete = __ pc() - start;
+
+ // Any extra arguments are already supposed to be R1 and R2
+ __ mov(R0, Rthread);
+
+ int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
+ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
+ __ call(runtime_entry);
+ if (pc_offset == -1) {
+ pc_offset = __ offset();
+ }
+
+ // Generate oop map
+ OopMap* map = new OopMap(frame_size*VMRegImpl::slots_per_word, 0);
+ oop_maps->add_gc_map(pc_offset, map);
+ __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
+
+ __ raw_pop(FP, LR);
+ __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
+
+ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete,
+ frame_size, oop_maps, false);
+ return stub->entry_point();
+ }
+
+ //---------------------------------------------------------------------------
+ // Initialization
+
+ void generate_initial() {
+ // Generates all stubs and initializes the entry points
+
+ //------------------------------------------------------------------------------------------------------------------------
+ // entry points that exist in all platforms
+ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
+ // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
+ StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+ StubRoutines::_call_stub_entry =
+ generate_call_stub(StubRoutines::_call_stub_return_address);
+ // is referenced by megamorphic call
+ StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+ // stub for throwing stack overflow error used both by interpreter and compiler
+ StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+
+#ifndef AARCH64
+ // integer division used both by interpreter and compiler
+ StubRoutines::Arm::_idiv_irem_entry = generate_idiv_irem();
+
+ StubRoutines::_atomic_add_entry = generate_atomic_add();
+ StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
+ StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
+ StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
+ StubRoutines::_atomic_load_long_entry = generate_atomic_load_long();
+ StubRoutines::_atomic_store_long_entry = generate_atomic_store_long();
+#endif // !AARCH64
+ }
+
+ void generate_all() {
+ // Generates all stubs and initializes the entry points
+
+#ifdef COMPILER2
+ // Generate partial_subtype_check first here since its code depends on
+ // UseZeroBaseCompressedOops which is defined after heap initialization.
+ StubRoutines::Arm::_partial_subtype_check = generate_partial_subtype_check();
+#endif
+ // These entry points require SharedInfo::stack0 to be set up in non-core builds
+ // and need to be relocatable, so they each fabricate a RuntimeStub internally.
+ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
+ StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
+ StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
+
+ //------------------------------------------------------------------------------------------------------------------------
+ // entry points that are platform specific
+
+ // support for verify_oop (must happen after universe_init)
+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
+
+ // arraycopy stubs used by compilers
+ generate_arraycopy_stubs();
+
+ // Safefetch stubs.
+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
+ &StubRoutines::_safefetch32_fault_pc,
+ &StubRoutines::_safefetch32_continuation_pc);
+#ifdef AARCH64
+ generate_safefetch("SafeFetchN", wordSize, &StubRoutines::_safefetchN_entry,
+ &StubRoutines::_safefetchN_fault_pc,
+ &StubRoutines::_safefetchN_continuation_pc);
+#ifdef COMPILER2
+ if (UseAESIntrinsics) {
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+ }
+#endif
+#else
+ assert (sizeof(int) == wordSize, "32-bit architecture");
+ StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
+ StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
+ StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
+#endif // AARCH64
+
+#ifdef COMPILE_CRYPTO
+ // generate AES intrinsics code
+ if (UseAESIntrinsics) {
+ aes_init();
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+ }
+#endif // COMPILE_CRYPTO
+ }
+
+
+ public:
+ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+ if (all) {
+ generate_all();
+ } else {
+ generate_initial();
+ }
+ }
+}; // end class declaration
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+ StubGenerator g(code, all);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/stubRoutinesCrypto_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1033 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifdef COMPILE_CRYPTO
+
+// The Rijndael S-box and inverted S-box are embedded here for a faster access.
+//
+// Note about lookup tables (T1...T4 and T5..T8):
+// The tables (boxes) combine ahead-of-time precalculated transposition and mixing steps as
+// an alternative to a runtime calculation.
+// The tables are statically generated in com/sun/crypto/provider/AESCrypt class.
+// Only the first table reference is passed to AES methods below. The other 3 tables
+// in ecryption and decryption are calculated in runtime by rotating the T1 result accordingly.
+// It is a free operation on ARM with embedded register-shifted-register EOR capability.
+// The table reference is passed in a form of a last argument on the parametes list.
+// The tables lookup method proves to perform better then a runtime Galois Field caclulation,
+// due to a lack of HW acceleration for the later.
+
+unsigned char * SBox;
+unsigned char * SInvBox;
+
+void aes_init() {
+
+ const static unsigned char Si[256] =
+ {
+ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+ 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+ 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+ 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+ 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+ 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+ 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+ 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+ 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+ 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+ 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+ 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+ 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+ 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+ 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+ 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+ 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+ 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+ 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+ 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+ 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+ 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+ 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+ 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+ 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+ 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
+ };
+
+ static const unsigned char S[256]={
+ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+ 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+ 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+ 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+ 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+ 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+ 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+ 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+ 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+ 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+ 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+ 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+ 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+ 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+ 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+ 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+ 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
+ };
+
+ SBox = (unsigned char*)S;
+ SInvBox = (unsigned char*)Si;
+}
+
+address generate_aescrypt_encryptBlock() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
+
+ address start = __ pc();
+
+ // Register from = R0; // source byte array
+ // Register to = R1; // destination byte array
+ // Register key = R2; // expanded key array
+ // Register tbox = R3; // transposition box reference
+
+ __ push (RegisterSet(R4, R12) | LR);
+ __ fstmdbd(SP, FloatRegisterSet(D0, 4), writeback);
+ __ sub(SP, SP, 32);
+
+ // preserve TBox references
+ __ add(R3, R3, arrayOopDesc::base_offset_in_bytes(T_INT));
+ __ str(R3, Address(SP, 16));
+
+ // retrieve key length. The length is used to determine the number of subsequent rounds (10, 12 or 14)
+ __ ldr(R9, Address(R2, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ ldr(R5, Address(R0));
+ __ ldr(R10, Address(R2, 4, post_indexed));
+ __ rev(R5, R5);
+ __ eor(R5, R5, R10);
+ __ ldr(R6, Address(R0, 4));
+ __ ldr(R10, Address(R2, 4, post_indexed));
+ __ rev(R6, R6);
+ __ eor(R6, R6, R10);
+ __ ldr(R7, Address(R0, 8));
+ __ ldr(R10, Address(R2, 4, post_indexed));
+ __ rev(R7, R7);
+ __ eor(R7, R7, R10);
+ __ ldr(R8, Address(R0, 12));
+ __ ldr(R10, Address(R2, 4, post_indexed));
+ __ rev(R8, R8);
+ __ eor(R8, R8, R10);
+
+ // Store the key size; However before doing that adjust the key to compensate for the Initial and Last rounds
+ __ sub(R9, R9, 8);
+ __ fmsr(S7, R1);
+
+ // load first transporistion box (T1)
+ __ ldr(R0, Address(SP, 16));
+
+ __ mov(LR, R2);
+
+ Label round;
+
+ __ bind(round);
+
+ // Utilize a Transposition Box lookup along with subsequent shift and EOR with a round key.
+ // instructions ordering is rearranged to minimize ReadAferWrite dependency. Not that important on A15 target
+ // with register renaming but performs ~10% better on A9.
+ __ mov(R12, AsmOperand(R5, lsr, 24));
+ __ ubfx(R4, R6, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R7, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R8);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R10, R1, R12);
+
+ __ mov(R12, AsmOperand(R6, lsr, 24));
+ __ ubfx(R4, R7, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R8, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R5);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R11, R1, R12);
+
+ __ mov(R12, AsmOperand(R7, lsr, 24));
+ __ ubfx(R4, R8, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R5, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R6);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R3, R1, R12);
+ __ str(R3, Address(SP, 0));
+
+ __ mov(R12, AsmOperand(R8, lsr, 24));
+ __ ubfx(R4, R5, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R6, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R7);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R8, R1, R12);
+
+ // update round count
+ __ subs(R9, R9, 4);
+
+ __ mov(R5, R10);
+ __ mov(R6, R11);
+ __ ldr(R7, Address(SP, 0));
+
+ __ b(round, gt);
+
+
+ // last round - a special case, no MixColumn
+ __ mov_slow(R10, (int)SBox);
+
+
+ // output buffer pointer
+ __ fmrs(R9, S7);
+
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ __ ldrb(R0, Address(R10, R5, lsr, 24));
+ __ ubfx(R12, R6, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R7, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R8);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+ __ str(R0, Address(R9, 4, post_indexed));
+
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ __ ldrb(R0, Address(R10, R6, lsr, 24));
+ __ ubfx(R12, R7, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R8, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R5);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+
+ __ str(R0, Address(R9, 4, post_indexed));
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ __ ldrb(R0, Address(R10, R7, lsr, 24));
+ __ ubfx(R12, R8, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R5, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R6);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+
+ __ str(R0, Address(R9, 4, post_indexed));
+ __ ldr(R11, Address(LR));
+ __ ldrb(R0, Address(R10, R8, lsr, 24));
+ __ ubfx(R12, R5, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R6, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R7);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+
+ __ str(R0, Address(R9));
+
+ __ add(SP, SP, 32);
+ __ fldmiad(SP, FloatRegisterSet(D0, 4), writeback);;
+
+ __ pop(RegisterSet(R4, R12) | PC);
+ return start;
+}
+
+address generate_aescrypt_decryptBlock() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
+
+ address start = __ pc();
+
+ // Register from = R0; // source byte array
+ // Register to = R1; // destination byte array
+ // Register key = R2; // expanded key array
+ // Register tbox = R3; // transposition box reference
+
+ __ push (RegisterSet(R4, R12) | LR);
+ __ fstmdbd(SP, FloatRegisterSet(D0, 4), writeback);
+ __ sub(SP, SP, 32);
+
+ // retrieve key length
+ __ ldr(R9, Address(R2, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ // preserve TBox references
+ __ add(R3, R3, arrayOopDesc::base_offset_in_bytes(T_INT));
+ __ str(R3, Address(SP, 16));
+
+
+ // Preserve the expanded key pointer
+ __ fmsr(S8, R2);
+
+ // The first key round is applied to the last round
+ __ add(LR, R2, 16);
+
+
+ __ ldr(R5, Address(R0));
+ __ ldr(R10, Address(LR, 4, post_indexed));
+ __ rev(R5, R5);
+ __ eor(R5, R5, R10);
+ __ ldr(R6, Address(R0, 4));
+ __ ldr(R10, Address(LR, 4, post_indexed));
+ __ rev(R6, R6);
+ __ eor(R6, R6, R10);
+ __ ldr(R7, Address(R0, 8));
+ __ ldr(R10, Address(LR, 4, post_indexed));
+ __ rev(R7, R7);
+ __ eor(R7, R7, R10);
+ __ ldr(R8, Address(R0, 12));
+ __ ldr(R10, Address(LR, 4, post_indexed));
+ __ rev(R8, R8);
+ __ eor(R8, R8, R10);
+
+
+ // Store the key size; However before doing that adjust the key to compensate for the Initial and Last rounds
+ __ sub(R9, R9, 8);
+ __ fmsr(S7, R1);
+
+ // load transporistion box (T5)
+ __ ldr(R0, Address(SP, 16));
+
+ Label round;
+
+ __ bind(round);
+ // each sub-block is treated similary:
+
+ // combine SubBytes|ShiftRows|MixColumn through a precalculated set of tables
+ // Utilize a Transposition Box lookup along with subsequent shift and EOR with a round key.
+ // instructions ordering is rearranged to minimize ReadAferWrite dependency. Not that important on A15 target
+ // with register renaming but performs ~10% better on A9.
+ __ mov(R12, AsmOperand(R5, lsr, 24));
+ __ ubfx(R4, R8, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R7, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R6);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R10, R1, R12);
+
+ __ mov(R12, AsmOperand(R6, lsr, 24));
+ __ ubfx(R4, R5, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R8, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R7);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R11, R1, R12);
+
+ __ mov(R12, AsmOperand(R7, lsr, 24));
+ __ ubfx(R4, R6, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R5, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R8);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R3, R1, R12);
+ __ str(R3, Address(SP, 0));
+
+ __ mov(R12, AsmOperand(R8, lsr, 24));
+ __ ubfx(R4, R7, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R6, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R5);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R8, R1, R12);
+
+ // update round count
+ __ subs(R9, R9, 4);
+
+ __ mov(R5, R10);
+ __ mov(R6, R11);
+ __ ldr(R7, Address(SP, 0));
+
+ __ b(round, gt);
+
+ // last round - a special case, no MixColumn:
+
+ // Retrieve expanded key pointer
+ __ fmrs(LR, S8);
+
+ __ mov_slow(R10, (int)SInvBox);
+
+ // output buffer pointer
+ __ fmrs(R9, S7);
+
+ // process each sub-block in a similar manner:
+ // 1. load a corresponding round key
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ // 2. combine SubBytes and ShiftRows stages
+ __ ldrb(R0, Address(R10, R5, lsr, 24));
+ __ ubfx(R12, R8, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R7, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R6);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R3, R3, AsmOperand(R0, lsl, 8));
+ // 3. AddRoundKey stage
+ __ eor(R0, R3, R11);
+ // 4. convert the result to LE representation
+ __ rev(R0, R0);
+ // 5. store in the output buffer
+ __ str(R0, Address(R9, 4, post_indexed));
+
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ __ ldrb(R0, Address(R10, R6, lsr, 24));
+ __ ubfx(R12, R5, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R8, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R7);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+ __ str(R0, Address(R9, 4, post_indexed));
+
+ __ ldr(R11, Address(LR, 4, post_indexed));
+ __ ldrb(R0, Address(R10, R7, lsr, 24));
+ __ ubfx(R12, R6, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R5, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R8);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+ __ str(R0, Address(R9, 4, post_indexed));
+
+ __ ldr(R11, Address(LR));
+ __ ldrb(R0, Address(R10, R8, lsr, 24));
+ __ ubfx(R12, R7, 16, 8);
+ __ ldrb(R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R6, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb (R12, R5);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ eor(R0, R0, R11);
+ __ rev(R0, R0);
+ __ str(R0, Address(R9));
+
+ __ add(SP, SP, 32);
+ __ fldmiad(SP, FloatRegisterSet(D0, 4), writeback);;
+ __ pop(RegisterSet(R4, R12) | PC);
+
+ return start;
+}
+
+address generate_cipherBlockChaining_encryptAESCrypt() {
+ // R0 - plain
+ // R1 - cipher
+ // R2 - expanded key
+ // R3 - Initialization Vector (IV)
+ // [sp+0] - cipher len
+ // [sp+4] Transposition Box reference
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+
+ address start = __ pc();
+
+ __ push(RegisterSet(R4, R12) | LR);
+ // load cipher length (which is first element on the original calling stack)
+ __ ldr(R4, Address(SP, 40));
+
+ __ sub(SP, SP, 32);
+
+ // preserve some arguments
+ __ mov(R5, R1);
+ __ mov(R6, R2);
+
+ // load IV
+ __ ldmia(R3, RegisterSet(R9, R12), writeback);
+
+ // preserve original source buffer on stack
+ __ str(R0, Address(SP, 16));
+
+ Label loop;
+ __ bind(loop);
+ __ ldmia(R0, RegisterSet(R0, R1) | RegisterSet(R7, R8));
+
+ __ eor(R0, R0, R9);
+ __ eor(R1, R1, R10);
+ __ eor(R7, R7, R11);
+ __ eor(R8, R8, R12);
+ __ stmia(SP, RegisterSet(R0, R1) | RegisterSet(R7, R8));
+
+ __ mov(R0, SP);
+ __ mov(R1, R5);
+ __ mov(R2, R6);
+ __ ldr(R3, Address(SP, 40+32+4));
+
+ // near call is sufficient since the target is also in the stubs
+ __ bl(StubRoutines::_aescrypt_encryptBlock);
+
+ __ subs(R4, R4, 16);
+ __ ldr(R0, Address(SP, 16), gt);
+ __ ldmia(R5, RegisterSet(R9, R12), writeback);
+ __ add(R0, R0, 16, gt);
+ __ str(R0, Address(SP, 16), gt);
+ __ b(loop, gt);
+
+ __ add(SP, SP, 32);
+ __ pop(RegisterSet(R4, R12) | LR);
+ // return cipher len (copied from the original argument)
+ __ ldr(R0, Address(SP));
+ __ bx(LR);
+
+ return start;
+}
+
+
+// The CBC decryption could benefit from parallel processing as the blocks could be
+// decrypted separatly from each other.
+// NEON is utilized (if available) to perform parallel execution on 8 blocks at a time.
+// Since Transposition Box (tbox) is used the parallel execution will only apply to an
+// Initial Round and the last round. It's not practical to use NEON for a table lookup
+// larger than 128 bytes. It also appears to be faster performing tbox lookup
+// sequentially then execute Galois Field calculation in parallel.
+
+address generate_cipherBlockChaining_decryptAESCrypt() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+
+ address start = __ pc();
+
+ Label single_block_done, single_block, cbc_done;
+ // R0 - cipher
+ // R1 - plain
+ // R2 - expanded key
+ // R3 - Initialization Vector (iv)
+ // [sp+0] - cipher len
+ // [sp+4] - Transpotition Box reference
+
+ __ push(RegisterSet(R4, R12) | LR);
+
+ // load cipher len: must be modulo 16
+ __ ldr(R4, Address(SP, 40));
+
+ if (VM_Version::has_simd()) {
+ __ andrs(R4, R4, 0x7f);
+ }
+
+ // preserve registers based arguments
+ __ mov(R7, R2);
+ __ mov(R8, R3);
+
+ if (VM_Version::has_simd()) {
+ __ b(single_block_done, eq);
+ }
+
+ __ bind(single_block);
+ // preserve args
+ __ mov(R5, R0);
+ __ mov(R6, R1);
+
+ // reload arguments
+ __ mov(R2, R7);
+ __ ldr(R3, Address(SP, 40+4));
+
+ // near call is sufficient as the method is part of the StubGenerator
+ __ bl((address)StubRoutines::_aescrypt_decryptBlock);
+
+ // check remainig cipher size (for individual block processing)
+ __ subs(R4, R4, 16);
+ if (VM_Version::has_simd()) {
+ __ tst(R4, 0x7f);
+ }
+
+ // load IV (changes based on a CBC schedule)
+ __ ldmia(R8, RegisterSet(R9, R12));
+
+ // load plaintext from the previous block processing
+ __ ldmia(R6, RegisterSet(R0, R3));
+
+ // perform IV addition and save the plaintext for good now
+ __ eor(R0, R0, R9);
+ __ eor(R1, R1, R10);
+ __ eor(R2, R2, R11);
+ __ eor(R3, R3, R12);
+ __ stmia(R6, RegisterSet(R0, R3));
+
+ // adjust pointers for next block processing
+ __ mov(R8, R5);
+ __ add(R0, R5, 16);
+ __ add(R1, R6, 16);
+ __ b(single_block, ne);
+
+ __ bind(single_block_done);
+ if (!VM_Version::has_simd()) {
+ __ b(cbc_done);
+ } else {
+ // done with single blocks.
+ // check if any 8 block chunks are available for parallel processing
+ __ ldr(R4, Address(SP, 40));
+ __ bics(R4, R4, 0x7f);
+ __ b(cbc_done, eq);
+
+ Label decrypt_8_blocks;
+ int quad = 1;
+ // Process 8 blocks in parallel
+ __ fstmdbd(SP, FloatRegisterSet(D8, 8), writeback);
+ __ sub(SP, SP, 40);
+
+ // record output buffer end address (used as a block counter)
+ Address output_buffer_end(SP, 16);
+ __ add(R5, R1, R4);
+ __ str(R5, output_buffer_end);
+
+ // preserve key pointer
+ Address rounds_key(SP, 28);
+ __ str(R7, rounds_key);
+ // in decryption the first 16 bytes of expanded key are used in the last round
+ __ add(LR, R7, 16);
+
+
+ // Record the end of the key which is used to indicate a last round
+ __ ldr(R3, Address(R7, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ add(R9, R7, AsmOperand(R3, lsl, 2));
+
+ // preserve IV
+ Address iv(SP, 36);
+ __ str(R8, iv);
+
+ __ bind(decrypt_8_blocks);
+ __ mov(R5, R1);
+
+ // preserve original source pointer
+ Address original_src(SP, 32);
+ __ str(R0, original_src);
+
+ // Apply ShiftRow for 8 block at once:
+ // use output buffer for a temp storage to preload it into cache
+
+ __ vld1(D18, LR, MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vld1(D0, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D0, D0, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D0, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D2, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D2, D2, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D2, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D4, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D4, D4, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D4, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D6, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D6, D6, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D6, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D8, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D8, D8, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D8, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D10, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D10, D10, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D10, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D12, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D12, D12, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D12, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D14, Address(R0, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D14, D14, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ veor(D20, D14, D18, quad);
+ __ vst1(D20, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+
+ // Local frame map:
+ // sp+20 - ouput buffer pointer
+ // sp+28 - key pointer
+ // sp+32 - original source
+ // sp+36 - block counter
+
+
+ // preserve output buffer pointer
+ Address block_current_output_buffer(SP, 20);
+ __ str(R1, block_current_output_buffer);
+
+ // individual rounds in block processing are executed sequentially .
+ Label block_start;
+
+ // record end of the output buffer
+ __ add(R0, R1, 128);
+ __ str(R0, Address(SP, 12));
+
+ __ bind(block_start);
+
+ // load transporistion box reference (T5)
+ // location of the reference (6th incoming argument, second slot on the stack):
+ // 10 scalar registers on stack
+ // 8 double-precision FP registers
+ // 40 bytes frame size for local storage
+ // 4 bytes offset to the original arguments list
+ __ ldr(R0, Address(SP, 40+64+40+4));
+ __ add(R0, R0, arrayOopDesc::base_offset_in_bytes(T_INT));
+
+ // load rounds key and compensate for the first and last rounds
+ __ ldr(LR, rounds_key);
+ __ add(LR, LR, 32);
+
+ // load block data out buffer
+ __ ldr(R2, block_current_output_buffer);
+ __ ldmia(R2, RegisterSet(R5, R8));
+
+ Label round;
+ __ bind(round);
+
+ // Utilize a Transposition Box lookup along with subsequent shift and EOR with a round key.
+ // instructions ordering is rearranged to minimize ReadAferWrite dependency. Not that important on A15 target
+ // with register renaming but performs ~10% better on A9.
+ __ mov(R12, AsmOperand(R5, lsr, 24));
+ __ ubfx(R4, R8, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R7, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R6);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R10, R1, R12);
+
+ __ mov(R12, AsmOperand(R6, lsr, 24));
+ __ ubfx(R4, R5, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R8, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R7);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R11, R1, R12);
+
+ __ mov(R12, AsmOperand(R7, lsr, 24));
+ __ ubfx(R4, R6, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R5, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R8);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R3, R1, R12);
+ __ str(R3, Address(SP, 0));
+
+ __ mov(R12, AsmOperand(R8, lsr, 24));
+ __ ubfx(R4, R7, 16, 8);
+ __ ldr (R1, Address(R0, R12, lsl, 2));
+ __ ldr(R2, Address(R0, R4, lsl, 2));
+ __ ubfx(R3, R6, 8, 8);
+ __ eor(R1, R1, AsmOperand(R2, ror, 8));
+ __ uxtb(R4, R5);
+ __ ldr(R3, Address(R0, R3, lsl, 2));
+ __ ldr(R4, Address(R0, R4, lsl, 2));
+ __ ldr(R12, Address(LR, 4, post_indexed));
+ __ eor(R1, R1, AsmOperand(R3, ror, 16));
+ __ eor(R12, R12, AsmOperand(R4, ror, 24));
+ __ eor(R8, R1, R12);
+
+ // see if we reached the key array end
+ __ cmp(R9, LR);
+
+ // load processed data
+ __ mov(R5, R10);
+ __ mov(R6, R11);
+ __ ldr(R7, Address(SP, 0));
+
+ __ b(round, gt);
+
+
+ // last round is special
+ // this round could be implemented through vtbl instruction in NEON. However vtbl is limited to a 32-byte wide table (4 vectors),
+ // thus it requires 8 lookup rounds to cover 256-byte wide Si table. On the other hand scalar lookup is independent of the
+ // lookup table size and thus proves to be faster.
+ __ ldr(LR, block_current_output_buffer);
+
+ // cipher counter
+ __ ldr(R11, Address(SP, 12));
+
+ __ mov_slow(R10, (int)SInvBox);
+ __ ldrb(R0, Address(R10, R5, lsr, 24));
+ __ ubfx(R12, R8, 16, 8);
+ __ ldrb (R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R7, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb(R12, R6);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ str(R0, Address(LR, 4, post_indexed));
+
+ __ ldrb(R0, Address(R10, R6, lsr, 24));
+ __ ubfx(R12, R5, 16, 8);
+ __ ldrb (R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R8, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb(R12, R7);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ str(R0, Address(LR, 4, post_indexed));
+
+
+ __ ldrb(R0, Address(R10, R7, lsr, 24));
+ __ ubfx(R12, R6, 16, 8);
+ __ ldrb (R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R5, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb(R12, R8);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ str(R0, Address(LR, 4, post_indexed));
+
+
+ __ ldrb(R0, Address(R10, R8, lsr, 24));
+ __ ubfx(R12, R7, 16, 8);
+ __ ldrb (R1, Address(R10, R12));
+ __ orr(R0, R1, AsmOperand(R0, lsl, 8));
+ __ ubfx(R12, R6, 8, 8);
+ __ ldrb(R2, Address(R10, R12));
+ __ orr(R0, R2, AsmOperand(R0, lsl, 8));
+ __ uxtb(R12, R5);
+ __ ldrb(R3, Address(R10, R12));
+ __ orr(R0, R3, AsmOperand(R0, lsl, 8));
+ __ str(R0, Address(LR, 4, post_indexed));
+
+
+ // preserve current scratch buffer pointer
+ __ cmp(R11, LR);
+ __ str(LR, block_current_output_buffer);
+
+ // go to the next block processing
+ __ b(block_start, ne);
+
+
+
+ // Perform last round AddRoundKey state on all 8 blocks
+
+ // load key pointer (remember that [sp+24] points to a byte #32 at the key array)
+ // last round is processed with the key[0 ..3]
+ __ ldr(LR, rounds_key);
+
+ // retireve original output buffer pointer
+ __ ldr(R1, block_current_output_buffer);
+ __ sub(R1, R1, 128);
+ __ mov(R5, R1);
+
+
+ // retrieve original cipher (source) pointer
+ __ ldr(R0, original_src);
+
+ // retrieve IV (second argument on stack)
+ __ ldr(R6, iv);
+
+ __ vld1(D20, R6, MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ vrev(D20, D20, quad, 32, MacroAssembler::VELEM_SIZE_8);
+
+ // perform last AddRoundKey and IV addition
+ __ vld1(D18, Address(LR, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D20, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D0, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D2, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D4, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D6, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D8, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D10, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+ __ vld1(D22, Address(R1, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+ __ veor(D22, D22, D18, quad);
+ __ veor(D22, D22, D12, quad);
+ __ vrev(D22, D22, quad, 32, MacroAssembler::VELEM_SIZE_8);
+ __ vst1(D22, Address(R5, 0, post_indexed), MacroAssembler::VELEM_SIZE_8, MacroAssembler::VLD1_TYPE_2_REGS);
+
+
+ // check if we're done
+ __ ldr(R4, output_buffer_end);
+ __ cmp(R4, R1);
+ __ add(R0, R0, 128-16);
+ __ str(R0, iv);
+ __ add(R0, R0, 16);
+
+ __ b(decrypt_8_blocks, ne);
+
+ __ add(SP, SP, 40);
+ __ fldmiad(SP, FloatRegisterSet(D8, 8), writeback);;
+ }
+
+ __ bind(cbc_done);
+ __ pop(RegisterSet(R4, R12) | LR);
+ __ ldr(R0, Address(SP));
+ __ bx(LR);
+
+ return start;
+}
+#endif // USE_CRYPTO
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/stubRoutines_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#ifndef AARCH64
+address StubRoutines::Arm::_idiv_irem_entry = NULL;
+#endif
+
+address StubRoutines::Arm::_partial_subtype_check = NULL;
+
+#ifndef AARCH64
+address StubRoutines::_atomic_load_long_entry = NULL;
+address StubRoutines::_atomic_store_long_entry = NULL;
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/stubRoutines_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_STUBROUTINES_ARM_HPP
+#define CPU_ARM_VM_STUBROUTINES_ARM_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+enum platform_dependent_constants {
+ code_size1 = 9000, // simply increase if too small (assembler will crash if too small)
+ code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
+};
+
+class Arm {
+ friend class StubGenerator;
+ friend class VMStructs;
+
+ private:
+
+#ifndef AARCH64
+ static address _idiv_irem_entry;
+#endif
+ static address _partial_subtype_check;
+
+ public:
+
+#ifndef AARCH64
+ static address idiv_irem_entry() { return _idiv_irem_entry; }
+#endif
+ static address partial_subtype_check() { return _partial_subtype_check; }
+};
+
+ static bool returns_to_call_stub(address return_pc) {
+ return return_pc == _call_stub_return_address;
+ }
+
+#ifndef AARCH64
+ static address _atomic_load_long_entry;
+ static address _atomic_store_long_entry;
+
+ static address atomic_load_long_entry() { return _atomic_load_long_entry; }
+ static address atomic_store_long_entry() { return _atomic_store_long_entry; }
+#endif
+
+
+#endif // CPU_ARM_VM_STUBROUTINES_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/templateInterpreterGenerator_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,1976 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+// Size of interpreter code. Increase if too small. Interpreter will
+// fail with a guarantee ("not enough space for interpreter generation");
+// if too small.
+// Run with +PrintInterpreter to get the VM to print out the size.
+// Max size with JVMTI
+int TemplateInterpreter::InterpreterCodeSize = 180 * 1024;
+
+#define __ _masm->
+
+//------------------------------------------------------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
+ address entry = __ pc();
+
+ // callee-save register for saving LR, shared with generate_native_entry
+ const Register Rsaved_ret_addr = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0);
+
+ __ mov(Rsaved_ret_addr, LR);
+
+ __ mov(R1, Rmethod);
+ __ mov(R2, Rlocals);
+ __ mov(R3, SP);
+
+#ifdef AARCH64
+ // expand expr. stack and extended SP to avoid cutting SP in call_VM
+ __ mov(Rstack_top, SP);
+ __ str(Rstack_top, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
+ __ check_stack_top();
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), R1, R2, R3, false);
+
+ __ ldp(ZR, c_rarg1, Address(SP, 2*wordSize, post_indexed));
+ __ ldp(c_rarg2, c_rarg3, Address(SP, 2*wordSize, post_indexed));
+ __ ldp(c_rarg4, c_rarg5, Address(SP, 2*wordSize, post_indexed));
+ __ ldp(c_rarg6, c_rarg7, Address(SP, 2*wordSize, post_indexed));
+
+ __ ldp_d(V0, V1, Address(SP, 2*wordSize, post_indexed));
+ __ ldp_d(V2, V3, Address(SP, 2*wordSize, post_indexed));
+ __ ldp_d(V4, V5, Address(SP, 2*wordSize, post_indexed));
+ __ ldp_d(V6, V7, Address(SP, 2*wordSize, post_indexed));
+#else
+
+ // Safer to save R9 (when scratched) since callers may have been
+ // written assuming R9 survives. This is suboptimal but
+ // probably not important for this slow case call site.
+ // Note for R9 saving: slow_signature_handler may copy register
+ // arguments above the current SP (passed as R3). It is safe for
+ // call_VM to use push and pop to protect additional values on the
+ // stack if needed.
+ __ call_VM(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), true /* save R9 if needed*/);
+ __ add(SP, SP, wordSize); // Skip R0
+ __ pop(RegisterSet(R1, R3)); // Load arguments passed in registers
+#ifdef __ABI_HARD__
+ // Few alternatives to an always-load-FP-registers approach:
+ // - parse method signature to detect FP arguments
+ // - keep a counter/flag on a stack indicationg number of FP arguments in the method.
+ // The later has been originally implemented and tested but a conditional path could
+ // eliminate any gain imposed by avoiding 8 double word loads.
+ __ fldmiad(SP, FloatRegisterSet(D0, 8), writeback);
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ __ ret(Rsaved_ret_addr);
+
+ return entry;
+}
+
+
+//
+// Various method entries (that c++ and asm interpreter agree upon)
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
+ address entry_point = __ pc();
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp);
+ __ restore_stack_top();
+#endif
+
+ __ empty_expression_stack();
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+
+ DEBUG_ONLY(STOP("generate_abstract_entry");) // Should not reach here
+ return entry_point;
+}
+
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+ // TODO: ARM
+ return NULL;
+
+ address entry_point = __ pc();
+ STOP("generate_math_entry");
+ return entry_point;
+}
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+ address entry = __ pc();
+
+ // Note: There should be a minimal interpreter frame set up when stack
+ // overflow occurs since we check explicitly for it now.
+ //
+#ifdef ASSERT
+ { Label L;
+ __ sub(Rtemp, FP, - frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ __ cmp(SP, Rtemp); // Rtemp = maximal SP for current FP,
+ // (stack grows negative)
+ __ b(L, ls); // check if frame is complete
+ __ stop ("interpreter frame not set up");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+ // Restore bcp under the assumption that the current frame is still
+ // interpreted
+ __ restore_bcp();
+
+ // expression stack must be empty before entering the VM if an exception
+ // happened
+ __ empty_expression_stack();
+
+ // throw exception
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+
+ __ should_not_reach_here();
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) {
+ address entry = __ pc();
+
+ // index is in R4_ArrayIndexOutOfBounds_index
+
+ InlinedString Lname(name);
+
+ // expression stack must be empty before entering the VM if an exception happened
+ __ empty_expression_stack();
+
+ // setup parameters
+ __ ldr_literal(R1, Lname);
+ __ mov(R2, R4_ArrayIndexOutOfBounds_index);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), R1, R2);
+
+ __ nop(); // to avoid filling CPU pipeline with invalid instructions
+ __ nop();
+ __ should_not_reach_here();
+ __ bind_literal(Lname);
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+ address entry = __ pc();
+
+ // object is in R2_ClassCastException_obj
+
+ // expression stack must be empty before entering the VM if an exception
+ // happened
+ __ empty_expression_stack();
+
+ __ mov(R1, R2_ClassCastException_obj);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_ClassCastException),
+ R1);
+
+ __ should_not_reach_here();
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) {
+ assert(!pass_oop || message == NULL, "either oop or message but not both");
+ address entry = __ pc();
+
+ InlinedString Lname(name);
+ InlinedString Lmessage(message);
+
+ if (pass_oop) {
+ // object is at TOS
+ __ pop_ptr(R2);
+ }
+
+ // expression stack must be empty before entering the VM if an exception happened
+ __ empty_expression_stack();
+
+ // setup parameters
+ __ ldr_literal(R1, Lname);
+
+ if (pass_oop) {
+ __ call_VM(Rexception_obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), R1, R2);
+ } else {
+ if (message != NULL) {
+ __ ldr_literal(R2, Lmessage);
+ } else {
+ __ mov(R2, 0);
+ }
+ __ call_VM(Rexception_obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), R1, R2);
+ }
+
+ // throw exception
+ __ b(Interpreter::throw_exception_entry());
+
+ __ nop(); // to avoid filling CPU pipeline with invalid instructions
+ __ nop();
+ __ bind_literal(Lname);
+ if (!pass_oop && (message != NULL)) {
+ __ bind_literal(Lmessage);
+ }
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
+ // Not used.
+ STOP("generate_continuation_for");
+ return NULL;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+ address entry = __ pc();
+
+ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp); // Restore SP to extended SP
+ __ restore_stack_top();
+#else
+ // Restore stack bottom in case i2c adjusted stack
+ __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+ // and NULL it as marker that SP is now tos until next java call
+ __ mov(Rtemp, (int)NULL_WORD);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ restore_method();
+ __ restore_bcp();
+ __ restore_dispatch();
+ __ restore_locals();
+
+ const Register Rcache = R2_tmp;
+ const Register Rindex = R3_tmp;
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1, index_size);
+
+ __ add(Rtemp, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldrb(Rtemp, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+ __ check_stack_top();
+ __ add(Rstack_top, Rstack_top, AsmOperand(Rtemp, lsl, Interpreter::logStackElementSize));
+
+#ifndef AARCH64
+ __ convert_retval_to_tos(state);
+#endif // !AARCH64
+
+ __ dispatch_next(state, step);
+
+ return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
+ address entry = __ pc();
+
+ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp); // Restore SP to extended SP
+ __ restore_stack_top();
+#else
+ // The stack is not extended by deopt but we must NULL last_sp as this
+ // entry is like a "return".
+ __ mov(Rtemp, 0);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ restore_method();
+ __ restore_bcp();
+ __ restore_dispatch();
+ __ restore_locals();
+
+ // handle exceptions
+ { Label L;
+ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
+ __ cbz(Rtemp, L);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+
+ __ dispatch_next(state, step);
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
+#ifdef AARCH64
+ address entry = __ pc();
+ switch (type) {
+ case T_BOOLEAN:
+ __ tst(R0, 0xff);
+ __ cset(R0, ne);
+ break;
+ case T_CHAR : __ zero_extend(R0, R0, 16); break;
+ case T_BYTE : __ sign_extend(R0, R0, 8); break;
+ case T_SHORT : __ sign_extend(R0, R0, 16); break;
+ case T_INT : // fall through
+ case T_LONG : // fall through
+ case T_VOID : // fall through
+ case T_FLOAT : // fall through
+ case T_DOUBLE : /* nothing to do */ break;
+ case T_OBJECT :
+ // retrieve result from frame
+ __ ldr(R0, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize));
+ // and verify it
+ __ verify_oop(R0);
+ break;
+ default : ShouldNotReachHere();
+ }
+ __ ret();
+ return entry;
+#else
+ // Result handlers are not used on 32-bit ARM
+ // since the returned value is already in appropriate format.
+ __ should_not_reach_here(); // to avoid empty code block
+
+ // The result handler non-zero indicates an object is returned and this is
+ // used in the native entry code.
+ return type == T_OBJECT ? (address)(-1) : NULL;
+#endif // AARCH64
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) {
+ address entry = __ pc();
+ __ push(state);
+ __ call_VM(noreg, runtime_entry);
+
+ // load current bytecode
+ __ ldrb(R3_bytecode, Address(Rbcp));
+ __ dispatch_only_normal(vtos);
+ return entry;
+}
+
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+// so we have a 'sticky' overflow test
+//
+// In: Rmethod.
+//
+// Uses R0, R1, Rtemp.
+//
+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
+ Label* profile_method,
+ Label* profile_method_continue) {
+ Label done;
+ const Register Rcounters = Rtemp;
+ const Address invocation_counter(Rcounters,
+ MethodCounters::invocation_counter_offset() +
+ InvocationCounter::counter_offset());
+
+ // Note: In tiered we increment either counters in MethodCounters* or
+ // in MDO depending if we're profiling or not.
+ if (TieredCompilation) {
+ int increment = InvocationCounter::count_increment;
+ Label no_mdo;
+ if (ProfileInterpreter) {
+ // Are we profiling?
+ __ ldr(R1_tmp, Address(Rmethod, Method::method_data_offset()));
+ __ cbz(R1_tmp, no_mdo);
+ // Increment counter in the MDO
+ const Address mdo_invocation_counter(R1_tmp,
+ in_bytes(MethodData::invocation_counter_offset()) +
+ in_bytes(InvocationCounter::counter_offset()));
+ const Address mask(R1_tmp, in_bytes(MethodData::invoke_mask_offset()));
+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, R0_tmp, Rtemp, eq, overflow);
+ __ b(done);
+ }
+ __ bind(no_mdo);
+ __ get_method_counters(Rmethod, Rcounters, done);
+ const Address mask(Rcounters, in_bytes(MethodCounters::invoke_mask_offset()));
+ __ increment_mask_and_jump(invocation_counter, increment, mask, R0_tmp, R1_tmp, eq, overflow);
+ __ bind(done);
+ } else { // not TieredCompilation
+ const Address backedge_counter(Rcounters,
+ MethodCounters::backedge_counter_offset() +
+ InvocationCounter::counter_offset());
+
+ const Register Ricnt = R0_tmp; // invocation counter
+ const Register Rbcnt = R1_tmp; // backedge counter
+
+ __ get_method_counters(Rmethod, Rcounters, done);
+
+ if (ProfileInterpreter) {
+ const Register Riic = R1_tmp;
+ __ ldr_s32(Riic, Address(Rcounters, MethodCounters::interpreter_invocation_counter_offset()));
+ __ add(Riic, Riic, 1);
+ __ str_32(Riic, Address(Rcounters, MethodCounters::interpreter_invocation_counter_offset()));
+ }
+
+ // Update standard invocation counters
+
+ __ ldr_u32(Ricnt, invocation_counter);
+ __ ldr_u32(Rbcnt, backedge_counter);
+
+ __ add(Ricnt, Ricnt, InvocationCounter::count_increment);
+
+#ifdef AARCH64
+ __ andr(Rbcnt, Rbcnt, (unsigned int)InvocationCounter::count_mask_value); // mask out the status bits
+#else
+ __ bic(Rbcnt, Rbcnt, ~InvocationCounter::count_mask_value); // mask out the status bits
+#endif // AARCH64
+
+ __ str_32(Ricnt, invocation_counter); // save invocation count
+ __ add(Ricnt, Ricnt, Rbcnt); // add both counters
+
+ // profile_method is non-null only for interpreted method so
+ // profile_method != NULL == !native_call
+ // BytecodeInterpreter only calls for native so code is elided.
+
+ if (ProfileInterpreter && profile_method != NULL) {
+ assert(profile_method_continue != NULL, "should be non-null");
+
+ // Test to see if we should create a method data oop
+ // Reuse R1_tmp as we don't need backedge counters anymore.
+ Address profile_limit(Rcounters, in_bytes(MethodCounters::interpreter_profile_limit_offset()));
+ __ ldr_s32(R1_tmp, profile_limit);
+ __ cmp_32(Ricnt, R1_tmp);
+ __ b(*profile_method_continue, lt);
+
+ // if no method data exists, go to profile_method
+ __ test_method_data_pointer(R1_tmp, *profile_method);
+ }
+
+ Address invoke_limit(Rcounters, in_bytes(MethodCounters::interpreter_invocation_limit_offset()));
+ __ ldr_s32(R1_tmp, invoke_limit);
+ __ cmp_32(Ricnt, R1_tmp);
+ __ b(*overflow, hs);
+ __ bind(done);
+ }
+}
+
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+ // InterpreterRuntime::frequency_counter_overflow takes one argument
+ // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
+ // The call returns the address of the verified entry point for the method or NULL
+ // if the compilation did not complete (either went background or bailed out).
+ __ mov(R1, (int)false);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R1);
+
+ // jump to the interpreted entry.
+ __ b(do_continue);
+}
+
+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
+ // Check if we've got enough room on the stack for
+ // - overhead;
+ // - locals;
+ // - expression stack.
+ //
+ // Registers on entry:
+ //
+ // R3 = number of additional locals
+ // R11 = max expression stack slots (AArch64 only)
+ // Rthread
+ // Rmethod
+ // Registers used: R0, R1, R2, Rtemp.
+
+ const Register Radditional_locals = R3;
+ const Register RmaxStack = AARCH64_ONLY(R11) NOT_AARCH64(R2);
+
+ // monitor entry size
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+ // total overhead size: entry_size + (saved registers, thru expr stack bottom).
+ // be sure to change this if you add/subtract anything to/from the overhead area
+ const int overhead_size = (frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset)*wordSize + entry_size;
+
+ // Pages reserved for VM runtime calls and subsequent Java calls.
+ const int reserved_pages = JavaThread::stack_shadow_zone_size();
+
+ // Thread::stack_size() includes guard pages, and they should not be touched.
+ const int guard_pages = JavaThread::stack_guard_zone_size();
+
+ __ ldr(R0, Address(Rthread, Thread::stack_base_offset()));
+ __ ldr(R1, Address(Rthread, Thread::stack_size_offset()));
+#ifndef AARCH64
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ __ ldrh(RmaxStack, Address(Rtemp, ConstMethod::max_stack_offset()));
+#endif // !AARCH64
+ __ sub_slow(Rtemp, SP, overhead_size + reserved_pages + guard_pages + Method::extra_stack_words());
+
+ // reserve space for additional locals
+ __ sub(Rtemp, Rtemp, AsmOperand(Radditional_locals, lsl, Interpreter::logStackElementSize));
+
+ // stack size
+ __ sub(R0, R0, R1);
+
+ // reserve space for expression stack
+ __ sub(Rtemp, Rtemp, AsmOperand(RmaxStack, lsl, Interpreter::logStackElementSize));
+
+ __ cmp(Rtemp, R0);
+
+#ifdef AARCH64
+ Label L;
+ __ b(L, hi);
+ __ mov(SP, Rsender_sp); // restore SP
+ __ b(StubRoutines::throw_StackOverflowError_entry());
+ __ bind(L);
+#else
+ __ mov(SP, Rsender_sp, ls); // restore SP
+ __ b(StubRoutines::throw_StackOverflowError_entry(), ls);
+#endif // AARCH64
+}
+
+
+// Allocate monitor and lock method (asm interpreter)
+//
+void TemplateInterpreterGenerator::lock_method() {
+ // synchronize method
+
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ assert ((entry_size % StackAlignmentInBytes) == 0, "should keep stack alignment");
+
+ #ifdef ASSERT
+ { Label L;
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ __ tbnz(Rtemp, JVM_ACC_SYNCHRONIZED_BIT, L);
+ __ stop("method doesn't need synchronization");
+ __ bind(L);
+ }
+ #endif // ASSERT
+
+ // get synchronization object
+ { Label done;
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+#ifdef AARCH64
+ __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case)
+ __ tbz(Rtemp, JVM_ACC_STATIC_BIT, done);
+#else
+ __ tst(Rtemp, JVM_ACC_STATIC);
+ __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0)), eq); // get receiver (assume this is frequent case)
+ __ b(done, eq);
+#endif // AARCH64
+ __ load_mirror(R0, Rmethod, Rtemp);
+ __ bind(done);
+ }
+
+ // add space for monitor & lock
+
+#ifdef AARCH64
+ __ check_extended_sp(Rtemp);
+ __ sub(SP, SP, entry_size); // adjust extended SP
+ __ mov(Rtemp, SP);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ sub(Rstack_top, Rstack_top, entry_size);
+ __ check_stack_top_on_expansion();
+ // add space for a monitor entry
+ __ str(Rstack_top, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // set new monitor block top
+ __ str(R0, Address(Rstack_top, BasicObjectLock::obj_offset_in_bytes()));
+ // store object
+ __ mov(R1, Rstack_top); // monitor entry address
+ __ lock_object(R1);
+}
+
+#ifdef AARCH64
+
+//
+// Generate a fixed interpreter frame. This is identical setup for interpreted methods
+// and for native methods hence the shared code.
+//
+// On entry:
+// R10 = ConstMethod
+// R11 = max expr. stack (in slots), if !native_call
+//
+// On exit:
+// Rbcp, Rstack_top are initialized, SP is extended
+//
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+ // Incoming registers
+ const Register RconstMethod = R10;
+ const Register RmaxStack = R11;
+ // Temporary registers
+ const Register RextendedSP = R0;
+ const Register Rcache = R1;
+ const Register Rmdp = ProfileInterpreter ? R2 : ZR;
+
+ // Generates the following stack layout (stack grows up in this picture):
+ //
+ // [ expr. stack bottom ]
+ // [ saved Rbcp ]
+ // [ current Rlocals ]
+ // [ cache ]
+ // [ mdx ]
+ // [ mirror ]
+ // [ Method* ]
+ // [ extended SP ]
+ // [ expr. stack top ]
+ // [ sender_sp ]
+ // [ saved FP ] <--- FP
+ // [ saved LR ]
+
+ // initialize fixed part of activation frame
+ __ stp(FP, LR, Address(SP, -2*wordSize, pre_indexed));
+ __ mov(FP, SP); // establish new FP
+
+ // setup Rbcp
+ if (native_call) {
+ __ mov(Rbcp, ZR); // bcp = 0 for native calls
+ } else {
+ __ add(Rbcp, RconstMethod, in_bytes(ConstMethod::codes_offset())); // get codebase
+ }
+
+ // Rstack_top & RextendedSP
+ __ sub(Rstack_top, SP, 10*wordSize);
+ if (native_call) {
+ __ sub(RextendedSP, Rstack_top, round_to(wordSize, StackAlignmentInBytes)); // reserve 1 slot for exception handling
+ } else {
+ __ sub(RextendedSP, Rstack_top, AsmOperand(RmaxStack, lsl, Interpreter::logStackElementSize));
+ __ align_reg(RextendedSP, RextendedSP, StackAlignmentInBytes);
+ }
+ __ mov(SP, RextendedSP);
+ __ check_stack_top();
+
+ // Load Rmdp
+ if (ProfileInterpreter) {
+ __ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
+ __ tst(Rtemp, Rtemp);
+ __ add(Rtemp, Rtemp, in_bytes(MethodData::data_offset()));
+ __ csel(Rmdp, ZR, Rtemp, eq);
+ }
+
+ // Load Rcache
+ __ ldr(Rtemp, Address(RconstMethod, ConstMethod::constants_offset()));
+ __ ldr(Rcache, Address(Rtemp, ConstantPool::cache_offset_in_bytes()));
+ // Get mirror and store it in the frame as GC root for this Method*
+ __ load_mirror(Rtemp, Rmethod, Rtemp);
+
+ // Build fixed frame
+ __ stp(Rstack_top, Rbcp, Address(FP, -10*wordSize));
+ __ stp(Rlocals, Rcache, Address(FP, -8*wordSize));
+ __ stp(Rmdp, Rtemp, Address(FP, -6*wordSize));
+ __ stp(Rmethod, RextendedSP, Address(FP, -4*wordSize));
+ __ stp(ZR, Rsender_sp, Address(FP, -2*wordSize));
+ assert(frame::interpreter_frame_initial_sp_offset == -10, "interpreter frame broken");
+ assert(frame::interpreter_frame_stack_top_offset == -2, "stack top broken");
+}
+
+#else // AARCH64
+
+//
+// Generate a fixed interpreter frame. This is identical setup for interpreted methods
+// and for native methods hence the shared code.
+
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+ // Generates the following stack layout:
+ //
+ // [ expr. stack bottom ]
+ // [ saved Rbcp ]
+ // [ current Rlocals ]
+ // [ cache ]
+ // [ mdx ]
+ // [ Method* ]
+ // [ last_sp ]
+ // [ sender_sp ]
+ // [ saved FP ] <--- FP
+ // [ saved LR ]
+
+ // initialize fixed part of activation frame
+ __ push(LR); // save return address
+ __ push(FP); // save FP
+ __ mov(FP, SP); // establish new FP
+
+ __ push(Rsender_sp);
+
+ __ mov(R0, 0);
+ __ push(R0); // leave last_sp as null
+
+ // setup Rbcp
+ if (native_call) {
+ __ mov(Rbcp, 0); // bcp = 0 for native calls
+ } else {
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset())); // get ConstMethod*
+ __ add(Rbcp, Rtemp, ConstMethod::codes_offset()); // get codebase
+ }
+
+ __ push(Rmethod); // save Method*
+ // Get mirror and store it in the frame as GC root for this Method*
+ __ load_mirror(Rtemp, Rmethod, Rtemp);
+ __ push(Rtemp);
+
+ if (ProfileInterpreter) {
+ __ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
+ __ tst(Rtemp, Rtemp);
+ __ add(Rtemp, Rtemp, in_bytes(MethodData::data_offset()), ne);
+ __ push(Rtemp); // set the mdp (method data pointer)
+ } else {
+ __ push(R0);
+ }
+
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ __ ldr(Rtemp, Address(Rtemp, ConstMethod::constants_offset()));
+ __ ldr(Rtemp, Address(Rtemp, ConstantPool::cache_offset_in_bytes()));
+ __ push(Rtemp); // set constant pool cache
+ __ push(Rlocals); // set locals pointer
+ __ push(Rbcp); // set bcp
+ __ push(R0); // reserve word for pointer to expression stack bottom
+ __ str(SP, Address(SP, 0)); // set expression stack bottom
+}
+
+#endif // AARCH64
+
+// End of helpers
+
+//------------------------------------------------------------------------------------------------------------------------
+// Entry points
+//
+// Here we generate the various kind of entries into the interpreter.
+// The two main entry type are generic bytecode methods and native call method.
+// These both come in synchronized and non-synchronized versions but the
+// frame layout they create is very similar. The other method entry
+// types are really just special purpose entries that are really entry
+// and interpretation all in one. These are for trivial methods like
+// accessor, empty, or special math methods.
+//
+// When control flow reaches any of the entry types for the interpreter
+// the following holds ->
+//
+// Arguments:
+//
+// Rmethod: Method*
+// Rthread: thread
+// Rsender_sp: sender sp
+// Rparams (SP on 32-bit ARM): pointer to method parameters
+//
+// LR: return address
+//
+// Stack layout immediately at entry
+//
+// [ optional padding(*)] <--- SP (AArch64)
+// [ parameter n ] <--- Rparams (SP on 32-bit ARM)
+// ...
+// [ parameter 1 ]
+// [ expression stack ] (caller's java expression stack)
+
+// Assuming that we don't go to one of the trivial specialized
+// entries the stack will look like below when we are ready to execute
+// the first bytecode (or call the native routine). The register usage
+// will be as the template based interpreter expects.
+//
+// local variables follow incoming parameters immediately; i.e.
+// the return address is saved at the end of the locals.
+//
+// [ reserved stack (*) ] <--- SP (AArch64)
+// [ expr. stack ] <--- Rstack_top (SP on 32-bit ARM)
+// [ monitor entry ]
+// ...
+// [ monitor entry ]
+// [ expr. stack bottom ]
+// [ saved Rbcp ]
+// [ current Rlocals ]
+// [ cache ]
+// [ mdx ]
+// [ mirror ]
+// [ Method* ]
+//
+// 32-bit ARM:
+// [ last_sp ]
+//
+// AArch64:
+// [ extended SP (*) ]
+// [ stack top (*) ]
+//
+// [ sender_sp ]
+// [ saved FP ] <--- FP
+// [ saved LR ]
+// [ optional padding(*)]
+// [ local variable m ]
+// ...
+// [ local variable 1 ]
+// [ parameter n ]
+// ...
+// [ parameter 1 ] <--- Rlocals
+//
+// (*) - AArch64 only
+//
+
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+ if (UseG1GC) {
+ // Code: _aload_0, _getfield, _areturn
+ // parameter size = 1
+ //
+ // The code that gets generated by this routine is split into 2 parts:
+ // 1. The "intrinsified" code for G1 (or any SATB based GC),
+ // 2. The slow path - which is an expansion of the regular method entry.
+ //
+ // Notes:-
+ // * In the G1 code we do not check whether we need to block for
+ // a safepoint. If G1 is enabled then we must execute the specialized
+ // code for Reference.get (except when the Reference object is null)
+ // so that we can log the value in the referent field with an SATB
+ // update buffer.
+ // If the code for the getfield template is modified so that the
+ // G1 pre-barrier code is executed when the current method is
+ // Reference.get() then going through the normal method entry
+ // will be fine.
+ // * The G1 code can, however, check the receiver object (the instance
+ // of java.lang.Reference) and jump to the slow path if null. If the
+ // Reference object is null then we obviously cannot fetch the referent
+ // and so we don't need to call the G1 pre-barrier. Thus we can use the
+ // regular method entry code to generate the NPE.
+ //
+ // This code is based on generate_accessor_enty.
+ //
+ // Rmethod: Method*
+ // Rthread: thread
+ // Rsender_sp: sender sp, must be preserved for slow path, set SP to it on fast path
+ // Rparams: parameters
+
+ address entry = __ pc();
+ Label slow_path;
+ const Register Rthis = R0;
+ const Register Rret_addr = Rtmp_save1;
+ assert_different_registers(Rthis, Rret_addr, Rsender_sp);
+
+ const int referent_offset = java_lang_ref_Reference::referent_offset;
+ guarantee(referent_offset > 0, "referent offset not initialized");
+
+ // Check if local 0 != NULL
+ // If the receiver is null then it is OK to jump to the slow path.
+ __ ldr(Rthis, Address(Rparams));
+ __ cbz(Rthis, slow_path);
+
+ // Generate the G1 pre-barrier code to log the value of
+ // the referent field in an SATB buffer.
+
+ // Load the value of the referent field.
+ __ load_heap_oop(R0, Address(Rthis, referent_offset));
+
+ // Preserve LR
+ __ mov(Rret_addr, LR);
+
+ __ g1_write_barrier_pre(noreg, // store_addr
+ noreg, // new_val
+ R0, // pre_val
+ Rtemp, // tmp1
+ R1_tmp); // tmp2
+
+ // _areturn
+ __ mov(SP, Rsender_sp);
+ __ ret(Rret_addr);
+
+ // generate a vanilla interpreter entry as the slow path
+ __ bind(slow_path);
+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+ return entry;
+ }
+#endif // INCLUDE_ALL_GCS
+
+ // If G1 is not enabled then attempt to go through the normal entry point
+ return NULL;
+}
+
+// Not supported
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return NULL; }
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+
+//
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the native method
+// than the typical interpreter frame setup.
+//
+
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+ // determine code generation flags
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // Incoming registers:
+ //
+ // Rmethod: Method*
+ // Rthread: thread
+ // Rsender_sp: sender sp
+ // Rparams: parameters
+
+ address entry_point = __ pc();
+
+ // Register allocation
+ const Register Rsize_of_params = AARCH64_ONLY(R20) NOT_AARCH64(R6);
+ const Register Rsig_handler = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0 /* R4 */);
+ const Register Rnative_code = AARCH64_ONLY(R22) NOT_AARCH64(Rtmp_save1 /* R5 */);
+ const Register Rresult_handler = AARCH64_ONLY(Rsig_handler) NOT_AARCH64(R6);
+
+#ifdef AARCH64
+ const Register RconstMethod = R10; // also used in generate_fixed_frame (should match)
+ const Register Rsaved_result = Rnative_code;
+ const FloatRegister Dsaved_result = V8;
+#else
+ const Register Rsaved_result_lo = Rtmp_save0; // R4
+ const Register Rsaved_result_hi = Rtmp_save1; // R5
+ FloatRegister saved_result_fp;
+#endif // AARCH64
+
+
+#ifdef AARCH64
+ __ ldr(RconstMethod, Address(Rmethod, Method::const_offset()));
+ __ ldrh(Rsize_of_params, Address(RconstMethod, ConstMethod::size_of_parameters_offset()));
+#else
+ __ ldr(Rsize_of_params, Address(Rmethod, Method::const_offset()));
+ __ ldrh(Rsize_of_params, Address(Rsize_of_params, ConstMethod::size_of_parameters_offset()));
+#endif // AARCH64
+
+ // native calls don't need the stack size check since they have no expression stack
+ // and the arguments are already on the stack and we only add a handful of words
+ // to the stack
+
+ // compute beginning of parameters (Rlocals)
+ __ sub(Rlocals, Rparams, wordSize);
+ __ add(Rlocals, Rlocals, AsmOperand(Rsize_of_params, lsl, Interpreter::logStackElementSize));
+
+#ifdef AARCH64
+ int extra_stack_reserve = 2*wordSize; // extra space for oop_temp
+ if(__ can_post_interpreter_events()) {
+ // extra space for saved results
+ extra_stack_reserve += 2*wordSize;
+ }
+ // reserve extra stack space and nullify oop_temp slot
+ __ stp(ZR, ZR, Address(SP, -extra_stack_reserve, pre_indexed));
+#else
+ // reserve stack space for oop_temp
+ __ mov(R0, 0);
+ __ push(R0);
+#endif // AARCH64
+
+ generate_fixed_frame(true); // Note: R9 is now saved in the frame
+
+ // make sure method is native & not abstract
+#ifdef ASSERT
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ {
+ Label L;
+ __ tbnz(Rtemp, JVM_ACC_NATIVE_BIT, L);
+ __ stop("tried to execute non-native method as native");
+ __ bind(L);
+ }
+ { Label L;
+ __ tbz(Rtemp, JVM_ACC_ABSTRACT_BIT, L);
+ __ stop("tried to execute abstract method in interpreter");
+ __ bind(L);
+ }
+#endif
+
+ // increment invocation count & check for overflow
+ Label invocation_counter_overflow;
+ if (inc_counter) {
+ if (synchronized) {
+ // Avoid unlocking method's monitor in case of exception, as it has not
+ // been locked yet.
+ __ set_do_not_unlock_if_synchronized(true, Rtemp);
+ }
+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+ }
+
+ Label continue_after_compile;
+ __ bind(continue_after_compile);
+
+ if (inc_counter && synchronized) {
+ __ set_do_not_unlock_if_synchronized(false, Rtemp);
+ }
+
+ // check for synchronized methods
+ // Must happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ //
+ if (synchronized) {
+ lock_method();
+ } else {
+ // no synchronization necessary
+#ifdef ASSERT
+ { Label L;
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ __ tbz(Rtemp, JVM_ACC_SYNCHRONIZED_BIT, L);
+ __ stop("method needs synchronization");
+ __ bind(L);
+ }
+#endif
+ }
+
+ // start execution
+#ifdef ASSERT
+ { Label L;
+ __ ldr(Rtemp, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ __ cmp(Rtemp, Rstack_top);
+ __ b(L, eq);
+ __ stop("broken stack frame setup in interpreter");
+ __ bind(L);
+ }
+#endif
+ __ check_extended_sp(Rtemp);
+
+ // jvmti/dtrace support
+ __ notify_method_entry();
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+
+ {
+ Label L;
+ __ ldr(Rsig_handler, Address(Rmethod, Method::signature_handler_offset()));
+ __ cbnz(Rsig_handler, L);
+ __ mov(R1, Rmethod);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R1, true);
+ __ ldr(Rsig_handler, Address(Rmethod, Method::signature_handler_offset()));
+ __ bind(L);
+ }
+
+ {
+ Label L;
+ __ ldr(Rnative_code, Address(Rmethod, Method::native_function_offset()));
+ __ cbnz(Rnative_code, L);
+ __ mov(R1, Rmethod);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R1);
+ __ ldr(Rnative_code, Address(Rmethod, Method::native_function_offset()));
+ __ bind(L);
+ }
+
+ // Allocate stack space for arguments
+
+#ifdef AARCH64
+ __ sub(Rtemp, SP, Rsize_of_params, ex_uxtw, LogBytesPerWord);
+ __ align_reg(SP, Rtemp, StackAlignmentInBytes);
+
+ // Allocate more stack space to accomodate all arguments passed on GP and FP registers:
+ // 8 * wordSize for GPRs
+ // 8 * wordSize for FPRs
+ int reg_arguments = round_to(8*wordSize + 8*wordSize, StackAlignmentInBytes);
+#else
+
+ // C functions need aligned stack
+ __ bic(SP, SP, StackAlignmentInBytes - 1);
+ // Multiply by BytesPerLong instead of BytesPerWord, because calling convention
+ // may require empty slots due to long alignment, e.g. func(int, jlong, int, jlong)
+ __ sub(SP, SP, AsmOperand(Rsize_of_params, lsl, LogBytesPerLong));
+
+#ifdef __ABI_HARD__
+ // Allocate more stack space to accomodate all GP as well as FP registers:
+ // 4 * wordSize
+ // 8 * BytesPerLong
+ int reg_arguments = round_to((4*wordSize) + (8*BytesPerLong), StackAlignmentInBytes);
+#else
+ // Reserve at least 4 words on the stack for loading
+ // of parameters passed on registers (R0-R3).
+ // See generate_slow_signature_handler().
+ // It is also used for JNIEnv & class additional parameters.
+ int reg_arguments = 4 * wordSize;
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ __ sub(SP, SP, reg_arguments);
+
+
+ // Note: signature handler blows R4 (32-bit ARM) or R21 (AArch64) besides all scratch registers.
+ // See AbstractInterpreterGenerator::generate_slow_signature_handler().
+ __ call(Rsig_handler);
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+ __ mov(Rresult_handler, R0);
+
+ // Pass JNIEnv and mirror for static methods
+ {
+ Label L;
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ __ add(R0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
+ __ tbz(Rtemp, JVM_ACC_STATIC_BIT, L);
+ __ load_mirror(Rtemp, Rmethod, Rtemp);
+ __ add(R1, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+ __ str(Rtemp, Address(R1, 0));
+ __ bind(L);
+ }
+
+ __ set_last_Java_frame(SP, FP, true, Rtemp);
+
+ // Changing state to _thread_in_native must be the last thing to do
+ // before the jump to native code. At this moment stack must be
+ // safepoint-safe and completely prepared for stack walking.
+#ifdef ASSERT
+ {
+ Label L;
+ __ ldr_u32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+ __ cmp_32(Rtemp, _thread_in_Java);
+ __ b(L, eq);
+ __ stop("invalid thread state");
+ __ bind(L);
+ }
+#endif
+
+#ifdef AARCH64
+ __ mov(Rtemp, _thread_in_native);
+ __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
+ // STLR is used to force all preceding writes to be observed prior to thread state change
+ __ stlr_w(Rtemp, Rtemp2);
+#else
+ // Force all preceding writes to be observed prior to thread state change
+ __ membar(MacroAssembler::StoreStore, Rtemp);
+
+ __ mov(Rtemp, _thread_in_native);
+ __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+#endif // AARCH64
+
+ __ call(Rnative_code);
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+
+ // Set FPSCR/FPCR to a known state
+ if (AlwaysRestoreFPU) {
+ __ restore_default_fp_mode();
+ }
+
+ // Do safepoint check
+ __ mov(Rtemp, _thread_in_native_trans);
+ __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+
+ // Force this write out before the read below
+ __ membar(MacroAssembler::StoreLoad, Rtemp);
+
+ __ ldr_global_s32(Rtemp, SafepointSynchronize::address_of_state());
+
+ // Protect the return value in the interleaved code: save it to callee-save registers.
+#ifdef AARCH64
+ __ mov(Rsaved_result, R0);
+ __ fmov_d(Dsaved_result, D0);
+#else
+ __ mov(Rsaved_result_lo, R0);
+ __ mov(Rsaved_result_hi, R1);
+#ifdef __ABI_HARD__
+ // preserve native FP result in a callee-saved register
+ saved_result_fp = D8;
+ __ fcpyd(saved_result_fp, D0);
+#else
+ saved_result_fp = fnoreg;
+#endif // __ABI_HARD__
+#endif // AARCH64
+
+ {
+ __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
+ __ cmp(Rtemp, SafepointSynchronize::_not_synchronized);
+ __ cond_cmp(R3, 0, eq);
+
+#ifdef AARCH64
+ Label L;
+ __ b(L, eq);
+ __ mov(R0, Rthread);
+ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none);
+ __ bind(L);
+#else
+ __ mov(R0, Rthread, ne);
+ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none, ne);
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+#endif // AARCH64
+ }
+
+ // Perform Native->Java thread transition
+ __ mov(Rtemp, _thread_in_Java);
+ __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
+
+ // Zero handles and last_java_sp
+ __ reset_last_Java_frame(Rtemp);
+ __ ldr(R3, Address(Rthread, JavaThread::active_handles_offset()));
+ __ str_32(__ zero_register(Rtemp), Address(R3, JNIHandleBlock::top_offset_in_bytes()));
+ if (CheckJNICalls) {
+ __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
+ }
+
+ // Unbox if the result is non-zero object
+#ifdef AARCH64
+ {
+ Label L, Lnull;
+ __ mov_slow(Rtemp, AbstractInterpreter::result_handler(T_OBJECT));
+ __ cmp(Rresult_handler, Rtemp);
+ __ b(L, ne);
+ __ cbz(Rsaved_result, Lnull);
+ __ ldr(Rsaved_result, Address(Rsaved_result));
+ __ bind(Lnull);
+ // Store oop on the stack for GC
+ __ str(Rsaved_result, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize));
+ __ bind(L);
+ }
+#else
+ __ tst(Rsaved_result_lo, Rresult_handler);
+ __ ldr(Rsaved_result_lo, Address(Rsaved_result_lo), ne);
+
+ // Store oop on the stack for GC
+ __ cmp(Rresult_handler, 0);
+ __ str(Rsaved_result_lo, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize), ne);
+#endif // AARCH64
+
+#ifdef AARCH64
+ // Restore SP (drop native parameters area), to keep SP in sync with extended_sp in frame
+ __ restore_sp_after_call(Rtemp);
+ __ check_stack_top();
+#endif // AARCH64
+
+ // reguard stack if StackOverflow exception happened while in native.
+ {
+ __ ldr_u32(Rtemp, Address(Rthread, JavaThread::stack_guard_state_offset()));
+ __ cmp_32(Rtemp, JavaThread::stack_guard_yellow_reserved_disabled);
+#ifdef AARCH64
+ Label L;
+ __ b(L, ne);
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none);
+ __ bind(L);
+#else
+ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none, eq);
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+#endif // AARCH64
+ }
+
+ // check pending exceptions
+ {
+ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
+#ifdef AARCH64
+ Label L;
+ __ cbz(Rtemp, L);
+ __ mov_pc_to(Rexception_pc);
+ __ b(StubRoutines::forward_exception_entry());
+ __ bind(L);
+#else
+ __ cmp(Rtemp, 0);
+ __ mov(Rexception_pc, PC, ne);
+ __ b(StubRoutines::forward_exception_entry(), ne);
+#endif // AARCH64
+ }
+
+ if (synchronized) {
+ // address of first monitor
+ __ sub(R1, FP, - (frame::interpreter_frame_monitor_block_bottom_offset - frame::interpreter_frame_monitor_size()) * wordSize);
+ __ unlock_object(R1);
+ }
+
+ // jvmti/dtrace support
+ // Note: This must happen _after_ handling/throwing any exceptions since
+ // the exception handler code notifies the runtime of method exits
+ // too. If this happens before, method entry/exit notifications are
+ // not properly paired (was bug - gri 11/22/99).
+#ifdef AARCH64
+ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result, noreg, Dsaved_result);
+#else
+ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result_lo, Rsaved_result_hi, saved_result_fp);
+#endif // AARCH64
+
+ // Restore the result. Oop result is restored from the stack.
+#ifdef AARCH64
+ __ mov(R0, Rsaved_result);
+ __ fmov_d(D0, Dsaved_result);
+
+ __ blr(Rresult_handler);
+#else
+ __ cmp(Rresult_handler, 0);
+ __ ldr(R0, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize), ne);
+ __ mov(R0, Rsaved_result_lo, eq);
+ __ mov(R1, Rsaved_result_hi);
+
+#ifdef __ABI_HARD__
+ // reload native FP result
+ __ fcpyd(D0, D8);
+#endif // __ABI_HARD__
+
+#ifdef ASSERT
+ if (VerifyOops) {
+ Label L;
+ __ cmp(Rresult_handler, 0);
+ __ b(L, eq);
+ __ verify_oop(R0);
+ __ bind(L);
+ }
+#endif // ASSERT
+#endif // AARCH64
+
+ // Restore FP/LR, sender_sp and return
+#ifdef AARCH64
+ __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+ __ ldp(FP, LR, Address(FP));
+ __ mov(SP, Rtemp);
+#else
+ __ mov(Rtemp, FP);
+ __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
+ __ ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ ret();
+
+ if (inc_counter) {
+ // Handle overflow of counter and compile method
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(continue_after_compile);
+ }
+
+ return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+ // determine code generation flags
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // Rmethod: Method*
+ // Rthread: thread
+ // Rsender_sp: sender sp (could differ from SP if we were called via c2i)
+ // Rparams: pointer to the last parameter in the stack
+
+ address entry_point = __ pc();
+
+ const Register RconstMethod = AARCH64_ONLY(R10) NOT_AARCH64(R3);
+
+#ifdef AARCH64
+ const Register RmaxStack = R11;
+ const Register RlocalsBase = R12;
+#endif // AARCH64
+
+ __ ldr(RconstMethod, Address(Rmethod, Method::const_offset()));
+
+ __ ldrh(R2, Address(RconstMethod, ConstMethod::size_of_parameters_offset()));
+ __ ldrh(R3, Address(RconstMethod, ConstMethod::size_of_locals_offset()));
+
+ // setup Rlocals
+ __ sub(Rlocals, Rparams, wordSize);
+ __ add(Rlocals, Rlocals, AsmOperand(R2, lsl, Interpreter::logStackElementSize));
+
+ __ sub(R3, R3, R2); // number of additional locals
+
+#ifdef AARCH64
+ // setup RmaxStack
+ __ ldrh(RmaxStack, Address(RconstMethod, ConstMethod::max_stack_offset()));
+ __ add(RmaxStack, RmaxStack, MAX2(1, Method::extra_stack_entries())); // reserve slots for exception handler and JSR292 appendix argument
+#endif // AARCH64
+
+ // see if we've got enough room on the stack for locals plus overhead.
+ generate_stack_overflow_check();
+
+#ifdef AARCH64
+
+ // allocate space for locals
+ {
+ __ sub(RlocalsBase, Rparams, AsmOperand(R3, lsl, Interpreter::logStackElementSize));
+ __ align_reg(SP, RlocalsBase, StackAlignmentInBytes);
+ }
+
+ // explicitly initialize locals
+ {
+ Label zero_loop, done;
+ __ cbz(R3, done);
+
+ __ tbz(R3, 0, zero_loop);
+ __ subs(R3, R3, 1);
+ __ str(ZR, Address(RlocalsBase, wordSize, post_indexed));
+ __ b(done, eq);
+
+ __ bind(zero_loop);
+ __ subs(R3, R3, 2);
+ __ stp(ZR, ZR, Address(RlocalsBase, 2*wordSize, post_indexed));
+ __ b(zero_loop, ne);
+
+ __ bind(done);
+ }
+
+#else
+ // allocate space for locals
+ // explicitly initialize locals
+
+ // Loop is unrolled 4 times
+ Label loop;
+ __ mov(R0, 0);
+ __ bind(loop);
+
+ // #1
+ __ subs(R3, R3, 1);
+ __ push(R0, ge);
+
+ // #2
+ __ subs(R3, R3, 1, ge);
+ __ push(R0, ge);
+
+ // #3
+ __ subs(R3, R3, 1, ge);
+ __ push(R0, ge);
+
+ // #4
+ __ subs(R3, R3, 1, ge);
+ __ push(R0, ge);
+
+ __ b(loop, gt);
+#endif // AARCH64
+
+ // initialize fixed part of activation frame
+ generate_fixed_frame(false);
+
+ __ restore_dispatch();
+
+ // make sure method is not native & not abstract
+#ifdef ASSERT
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ {
+ Label L;
+ __ tbz(Rtemp, JVM_ACC_NATIVE_BIT, L);
+ __ stop("tried to execute native method as non-native");
+ __ bind(L);
+ }
+ { Label L;
+ __ tbz(Rtemp, JVM_ACC_ABSTRACT_BIT, L);
+ __ stop("tried to execute abstract method in interpreter");
+ __ bind(L);
+ }
+#endif
+
+ // increment invocation count & check for overflow
+ Label invocation_counter_overflow;
+ Label profile_method;
+ Label profile_method_continue;
+ if (inc_counter) {
+ if (synchronized) {
+ // Avoid unlocking method's monitor in case of exception, as it has not
+ // been locked yet.
+ __ set_do_not_unlock_if_synchronized(true, Rtemp);
+ }
+ generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue);
+ if (ProfileInterpreter) {
+ __ bind(profile_method_continue);
+ }
+ }
+ Label continue_after_compile;
+ __ bind(continue_after_compile);
+
+ if (inc_counter && synchronized) {
+ __ set_do_not_unlock_if_synchronized(false, Rtemp);
+ }
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+
+ // check for synchronized methods
+ // Must happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ //
+ if (synchronized) {
+ // Allocate monitor and lock method
+ lock_method();
+ } else {
+ // no synchronization necessary
+#ifdef ASSERT
+ { Label L;
+ __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
+ __ tbz(Rtemp, JVM_ACC_SYNCHRONIZED_BIT, L);
+ __ stop("method needs synchronization");
+ __ bind(L);
+ }
+#endif
+ }
+
+ // start execution
+#ifdef ASSERT
+ { Label L;
+ __ ldr(Rtemp, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ __ cmp(Rtemp, Rstack_top);
+ __ b(L, eq);
+ __ stop("broken stack frame setup in interpreter");
+ __ bind(L);
+ }
+#endif
+ __ check_extended_sp(Rtemp);
+
+ // jvmti support
+ __ notify_method_entry();
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+
+ __ dispatch_next(vtos);
+
+ // invocation counter overflow
+ if (inc_counter) {
+ if (ProfileInterpreter) {
+ // We have decided to profile this method in the interpreter
+ __ bind(profile_method);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+ __ set_method_data_pointer_for_bcp();
+
+ __ b(profile_method_continue);
+ }
+
+ // Handle overflow of counter and compile method
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(continue_after_compile);
+ }
+
+ return entry_point;
+}
+
+//------------------------------------------------------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+ // Entry point in previous activation (i.e., if the caller was interpreted)
+ Interpreter::_rethrow_exception_entry = __ pc();
+ // Rexception_obj: exception
+
+#ifndef AARCH64
+ // Clear interpreter_frame_last_sp.
+ __ mov(Rtemp, 0);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // !AARCH64
+
+#if R9_IS_SCRATCHED
+ __ restore_method();
+#endif
+ __ restore_bcp();
+ __ restore_dispatch();
+ __ restore_locals();
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp);
+#endif // AARCH64
+
+ // Entry point for exceptions thrown within interpreter code
+ Interpreter::_throw_exception_entry = __ pc();
+
+ // expression stack is undefined here
+ // Rexception_obj: exception
+ // Rbcp: exception bcp
+ __ verify_oop(Rexception_obj);
+
+ // expression stack must be empty before entering the VM in case of an exception
+ __ empty_expression_stack();
+ // find exception handler address and preserve exception oop
+ __ mov(R1, Rexception_obj);
+ __ call_VM(Rexception_obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), R1);
+ // R0: exception handler entry point
+ // Rexception_obj: preserved exception oop
+ // Rbcp: bcp for exception handler
+ __ push_ptr(Rexception_obj); // push exception which is now the only value on the stack
+ __ jump(R0); // jump to exception handler (may be _remove_activation_entry!)
+
+ // If the exception is not handled in the current frame the frame is removed and
+ // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
+ //
+ // Note: At this point the bci is still the bxi for the instruction which caused
+ // the exception and the expression stack is empty. Thus, for any VM calls
+ // at this point, GC will find a legal oop map (with empty expression stack).
+
+ // In current activation
+ // tos: exception
+ // Rbcp: exception bcp
+
+ //
+ // JVMTI PopFrame support
+ //
+ Interpreter::_remove_activation_preserving_args_entry = __ pc();
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp); // restore SP to extended SP
+#endif // AARCH64
+
+ __ empty_expression_stack();
+
+ // Set the popframe_processing bit in _popframe_condition indicating that we are
+ // currently handling popframe, so that call_VMs that may happen later do not trigger new
+ // popframe handling cycles.
+
+ __ ldr_s32(Rtemp, Address(Rthread, JavaThread::popframe_condition_offset()));
+ __ orr(Rtemp, Rtemp, (unsigned)JavaThread::popframe_processing_bit);
+ __ str_32(Rtemp, Address(Rthread, JavaThread::popframe_condition_offset()));
+
+ {
+ // Check to see whether we are returning to a deoptimized frame.
+ // (The PopFrame call ensures that the caller of the popped frame is
+ // either interpreted or compiled and deoptimizes it if compiled.)
+ // In this case, we can't call dispatch_next() after the frame is
+ // popped, but instead must save the incoming arguments and restore
+ // them after deoptimization has occurred.
+ //
+ // Note that we don't compare the return PC against the
+ // deoptimization blob's unpack entry because of the presence of
+ // adapter frames in C2.
+ Label caller_not_deoptimized;
+ __ ldr(R0, Address(FP, frame::return_addr_offset * wordSize));
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), R0);
+ __ cbnz_32(R0, caller_not_deoptimized);
+#ifdef AARCH64
+ __ NOT_TESTED();
+#endif
+
+ // Compute size of arguments for saving when returning to deoptimized caller
+ __ restore_method();
+ __ ldr(R0, Address(Rmethod, Method::const_offset()));
+ __ ldrh(R0, Address(R0, ConstMethod::size_of_parameters_offset()));
+
+ __ logical_shift_left(R1, R0, Interpreter::logStackElementSize);
+ // Save these arguments
+ __ restore_locals();
+ __ sub(R2, Rlocals, R1);
+ __ add(R2, R2, wordSize);
+ __ mov(R0, Rthread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), R0, R1, R2);
+
+ __ remove_activation(vtos, LR,
+ /* throw_monitor_exception */ false,
+ /* install_monitor_exception */ false,
+ /* notify_jvmdi */ false);
+
+ // Inform deoptimization that it is responsible for restoring these arguments
+ __ mov(Rtemp, JavaThread::popframe_force_deopt_reexecution_bit);
+ __ str_32(Rtemp, Address(Rthread, JavaThread::popframe_condition_offset()));
+
+ // Continue in deoptimization handler
+ __ ret();
+
+ __ bind(caller_not_deoptimized);
+ }
+
+ __ remove_activation(vtos, R4,
+ /* throw_monitor_exception */ false,
+ /* install_monitor_exception */ false,
+ /* notify_jvmdi */ false);
+
+#ifndef AARCH64
+ // Finish with popframe handling
+ // A previous I2C followed by a deoptimization might have moved the
+ // outgoing arguments further up the stack. PopFrame expects the
+ // mutations to those outgoing arguments to be preserved and other
+ // constraints basically require this frame to look exactly as
+ // though it had previously invoked an interpreted activation with
+ // no space between the top of the expression stack (current
+ // last_sp) and the top of stack. Rather than force deopt to
+ // maintain this kind of invariant all the time we call a small
+ // fixup routine to move the mutated arguments onto the top of our
+ // expression stack if necessary.
+ __ mov(R1, SP);
+ __ ldr(R2, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+ // PC must point into interpreter here
+ __ set_last_Java_frame(SP, FP, true, Rtemp);
+ __ mov(R0, Rthread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), R0, R1, R2);
+ __ reset_last_Java_frame(Rtemp);
+#endif // !AARCH64
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp);
+ __ restore_stack_top();
+#else
+ // Restore the last_sp and null it out
+ __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+ __ mov(Rtemp, (int)NULL_WORD);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ restore_bcp();
+ __ restore_dispatch();
+ __ restore_locals();
+ __ restore_method();
+
+ // The method data pointer was incremented already during
+ // call profiling. We have to restore the mdp for the current bcp.
+ if (ProfileInterpreter) {
+ __ set_method_data_pointer_for_bcp();
+ }
+
+ // Clear the popframe condition flag
+ assert(JavaThread::popframe_inactive == 0, "adjust this code");
+ __ str_32(__ zero_register(Rtemp), Address(Rthread, JavaThread::popframe_condition_offset()));
+
+#if INCLUDE_JVMTI
+ {
+ Label L_done;
+
+ __ ldrb(Rtemp, Address(Rbcp, 0));
+ __ cmp(Rtemp, Bytecodes::_invokestatic);
+ __ b(L_done, ne);
+
+ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
+
+ // get local0
+ __ ldr(R1, Address(Rlocals, 0));
+ __ mov(R2, Rmethod);
+ __ mov(R3, Rbcp);
+ __ call_VM(R0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R1, R2, R3);
+
+ __ cbz(R0, L_done);
+
+ __ str(R0, Address(Rstack_top));
+ __ bind(L_done);
+ }
+#endif // INCLUDE_JVMTI
+
+ __ dispatch_next(vtos);
+ // end of PopFrame support
+
+ Interpreter::_remove_activation_entry = __ pc();
+
+ // preserve exception over this code sequence
+ __ pop_ptr(R0_tos);
+ __ str(R0_tos, Address(Rthread, JavaThread::vm_result_offset()));
+ // remove the activation (without doing throws on illegalMonitorExceptions)
+ __ remove_activation(vtos, Rexception_pc, false, true, false);
+ // restore exception
+ __ get_vm_result(Rexception_obj, Rtemp);
+
+ // Inbetween activations - previous activation type unknown yet
+ // compute continuation point - the continuation point expects
+ // the following registers set up:
+ //
+ // Rexception_obj: exception
+ // Rexception_pc: return address/pc that threw exception
+ // SP: expression stack of caller
+ // FP: frame pointer of caller
+ __ mov(c_rarg0, Rthread);
+ __ mov(c_rarg1, Rexception_pc);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), c_rarg0, c_rarg1);
+ // Note that an "issuing PC" is actually the next PC after the call
+
+ __ jump(R0); // jump to exception handler of caller
+}
+
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+ address entry = __ pc();
+
+#ifdef AARCH64
+ __ restore_sp_after_call(Rtemp); // restore SP to extended SP
+#endif // AARCH64
+
+ __ restore_bcp();
+ __ restore_dispatch();
+ __ restore_locals();
+
+ __ empty_expression_stack();
+
+ __ load_earlyret_value(state);
+
+ // Clear the earlyret state
+ __ ldr(Rtemp, Address(Rthread, JavaThread::jvmti_thread_state_offset()));
+
+ assert(JvmtiThreadState::earlyret_inactive == 0, "adjust this code");
+ __ str_32(__ zero_register(R2), Address(Rtemp, JvmtiThreadState::earlyret_state_offset()));
+
+ __ remove_activation(state, LR,
+ false, /* throw_monitor_exception */
+ false, /* install_monitor_exception */
+ true); /* notify_jvmdi */
+
+#ifndef AARCH64
+ // According to interpreter calling conventions, result is returned in R0/R1,
+ // so ftos (S0) and dtos (D0) are moved to R0/R1.
+ // This conversion should be done after remove_activation, as it uses
+ // push(state) & pop(state) to preserve return value.
+ __ convert_tos_to_retval(state);
+#endif // !AARCH64
+ __ ret();
+
+ return entry;
+} // end of ForceEarlyReturn support
+
+
+//------------------------------------------------------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points (Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) {
+ assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+ Label L;
+
+#ifdef __SOFTFP__
+ dep = __ pc(); // fall through
+#else
+ fep = __ pc(); __ push(ftos); __ b(L);
+ dep = __ pc(); __ push(dtos); __ b(L);
+#endif // __SOFTFP__
+
+ lep = __ pc(); __ push(ltos); __ b(L);
+
+ if (AARCH64_ONLY(true) NOT_AARCH64(VerifyOops)) { // can't share atos entry with itos on AArch64 or if VerifyOops
+ aep = __ pc(); __ push(atos); __ b(L);
+ } else {
+ aep = __ pc(); // fall through
+ }
+
+#ifdef __SOFTFP__
+ fep = __ pc(); // fall through
+#endif // __SOFTFP__
+
+ bep = cep = sep = // fall through
+ iep = __ pc(); __ push(itos); // fall through
+ vep = __ pc(); __ bind(L); // fall through
+ generate_and_dispatch(t);
+}
+
+//------------------------------------------------------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+ address entry = __ pc();
+
+ // prepare expression stack
+ __ push(state); // save tosca
+
+ // pass tosca registers as arguments
+ __ mov(R2, R0_tos);
+#ifdef AARCH64
+ __ mov(R3, ZR);
+#else
+ __ mov(R3, R1_tos_hi);
+#endif // AARCH64
+ __ mov(R1, LR); // save return address
+
+ // call tracer
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), R1, R2, R3);
+
+ __ mov(LR, R0); // restore return address
+ __ pop(state); // restore tosca
+
+ // return
+ __ ret();
+
+ return entry;
+}
+
+
+void TemplateInterpreterGenerator::count_bytecode() {
+ __ inc_global_counter((address) &BytecodeCounter::_counter_value, 0, Rtemp, R2_tmp, true);
+}
+
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
+ __ inc_global_counter((address)&BytecodeHistogram::_counters[0], sizeof(BytecodeHistogram::_counters[0]) * t->bytecode(), Rtemp, R2_tmp, true);
+}
+
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
+ const Register Rindex_addr = R2_tmp;
+ Label Lcontinue;
+ InlinedAddress Lcounters((address)BytecodePairHistogram::_counters);
+ InlinedAddress Lindex((address)&BytecodePairHistogram::_index);
+ const Register Rcounters_addr = R2_tmp;
+ const Register Rindex = R4_tmp;
+
+ // calculate new index for counter:
+ // index = (_index >> log2_number_of_codes) | (bytecode << log2_number_of_codes).
+ // (_index >> log2_number_of_codes) is previous bytecode
+
+ __ ldr_literal(Rindex_addr, Lindex);
+ __ ldr_s32(Rindex, Address(Rindex_addr));
+ __ mov_slow(Rtemp, ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
+ __ orr(Rindex, Rtemp, AsmOperand(Rindex, lsr, BytecodePairHistogram::log2_number_of_codes));
+ __ str_32(Rindex, Address(Rindex_addr));
+
+ // Rindex (R4) contains index of counter
+
+ __ ldr_literal(Rcounters_addr, Lcounters);
+ __ ldr_s32(Rtemp, Address::indexed_32(Rcounters_addr, Rindex));
+ __ adds_32(Rtemp, Rtemp, 1);
+ __ b(Lcontinue, mi); // avoid overflow
+ __ str_32(Rtemp, Address::indexed_32(Rcounters_addr, Rindex));
+
+ __ b(Lcontinue);
+
+ __ bind_literal(Lindex);
+ __ bind_literal(Lcounters);
+
+ __ bind(Lcontinue);
+}
+
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+ // Call a little run-time stub to avoid blow-up for each bytecode.
+ // The run-time runtime saves the right registers, depending on
+ // the tosca in-state for the given template.
+ assert(Interpreter::trace_code(t->tos_in()) != NULL,
+ "entry must have been generated");
+ address trace_entry = Interpreter::trace_code(t->tos_in());
+ __ call(trace_entry, relocInfo::none);
+}
+
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+ Label Lcontinue;
+ const Register stop_at = R2_tmp;
+
+ __ ldr_global_s32(Rtemp, (address) &BytecodeCounter::_counter_value);
+ __ mov_slow(stop_at, StopInterpreterAt);
+
+ // test bytecode counter
+ __ cmp(Rtemp, stop_at);
+ __ b(Lcontinue, ne);
+
+ __ trace_state("stop_interpreter_at");
+ __ breakpoint();
+
+ __ bind(Lcontinue);
+}
+#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/templateTable_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,5030 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/cpCache.hpp"
+#include "oops/methodData.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+
+#define __ _masm->
+
+//----------------------------------------------------------------------------------------------------
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+ // No arm specific initialization
+}
+
+//----------------------------------------------------------------------------------------------------
+// Address computation
+
+// local variables
+static inline Address iaddress(int n) {
+ return Address(Rlocals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) { return iaddress(n + 1); }
+#ifndef AARCH64
+static inline Address haddress(int n) { return iaddress(n + 0); }
+#endif // !AARCH64
+
+static inline Address faddress(int n) { return iaddress(n); }
+static inline Address daddress(int n) { return laddress(n); }
+static inline Address aaddress(int n) { return iaddress(n); }
+
+
+void TemplateTable::get_local_base_addr(Register r, Register index) {
+ __ sub(r, Rlocals, AsmOperand(index, lsl, Interpreter::logStackElementSize));
+}
+
+Address TemplateTable::load_iaddress(Register index, Register scratch) {
+#ifdef AARCH64
+ get_local_base_addr(scratch, index);
+ return Address(scratch);
+#else
+ return Address(Rlocals, index, lsl, Interpreter::logStackElementSize, basic_offset, sub_offset);
+#endif // AARCH64
+}
+
+Address TemplateTable::load_aaddress(Register index, Register scratch) {
+ return load_iaddress(index, scratch);
+}
+
+Address TemplateTable::load_faddress(Register index, Register scratch) {
+#ifdef __SOFTFP__
+ return load_iaddress(index, scratch);
+#else
+ get_local_base_addr(scratch, index);
+ return Address(scratch);
+#endif // __SOFTFP__
+}
+
+Address TemplateTable::load_daddress(Register index, Register scratch) {
+ get_local_base_addr(scratch, index);
+ return Address(scratch, Interpreter::local_offset_in_bytes(1));
+}
+
+// At top of Java expression stack which may be different than SP.
+// It isn't for category 1 objects.
+static inline Address at_tos() {
+ return Address(Rstack_top, Interpreter::expr_offset_in_bytes(0));
+}
+
+static inline Address at_tos_p1() {
+ return Address(Rstack_top, Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+ return Address(Rstack_top, Interpreter::expr_offset_in_bytes(2));
+}
+
+
+// 32-bit ARM:
+// Loads double/long local into R0_tos_lo/R1_tos_hi with two
+// separate ldr instructions (supports nonadjacent values).
+// Used for longs in all modes, and for doubles in SOFTFP mode.
+//
+// AArch64: loads long local into R0_tos.
+//
+void TemplateTable::load_category2_local(Register Rlocal_index, Register tmp) {
+ const Register Rlocal_base = tmp;
+ assert_different_registers(Rlocal_index, tmp);
+
+ get_local_base_addr(Rlocal_base, Rlocal_index);
+#ifdef AARCH64
+ __ ldr(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
+#else
+ __ ldr(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
+ __ ldr(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0)));
+#endif // AARCH64
+}
+
+
+// 32-bit ARM:
+// Stores R0_tos_lo/R1_tos_hi to double/long local with two
+// separate str instructions (supports nonadjacent values).
+// Used for longs in all modes, and for doubles in SOFTFP mode
+//
+// AArch64: stores R0_tos to long local.
+//
+void TemplateTable::store_category2_local(Register Rlocal_index, Register tmp) {
+ const Register Rlocal_base = tmp;
+ assert_different_registers(Rlocal_index, tmp);
+
+ get_local_base_addr(Rlocal_base, Rlocal_index);
+#ifdef AARCH64
+ __ str(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
+#else
+ __ str(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
+ __ str(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0)));
+#endif // AARCH64
+}
+
+// Returns address of Java array element using temp register as address base.
+Address TemplateTable::get_array_elem_addr(BasicType elemType, Register array, Register index, Register temp) {
+ int logElemSize = exact_log2(type2aelembytes(elemType));
+ __ add_ptr_scaled_int32(temp, array, index, logElemSize);
+ return Address(temp, arrayOopDesc::base_offset_in_bytes(elemType));
+}
+
+//----------------------------------------------------------------------------------------------------
+// Condition conversion
+AsmCondition convNegCond(TemplateTable::Condition cc) {
+ switch (cc) {
+ case TemplateTable::equal : return ne;
+ case TemplateTable::not_equal : return eq;
+ case TemplateTable::less : return ge;
+ case TemplateTable::less_equal : return gt;
+ case TemplateTable::greater : return le;
+ case TemplateTable::greater_equal: return lt;
+ }
+ ShouldNotReachHere();
+ return nv;
+}
+
+//----------------------------------------------------------------------------------------------------
+// Miscelaneous helper routines
+
+// Store an oop (or NULL) at the address described by obj.
+// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Also destroys new_val and obj.base().
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+ Address obj,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ BarrierSet::Name barrier,
+ bool precise,
+ bool is_null) {
+
+ assert_different_registers(obj.base(), new_val, tmp1, tmp2, tmp3, noreg);
+ switch (barrier) {
+#if INCLUDE_ALL_GCS
+ case BarrierSet::G1SATBCTLogging:
+ {
+ // flatten object address if needed
+ assert (obj.mode() == basic_offset, "pre- or post-indexing is not supported here");
+
+ const Register store_addr = obj.base();
+ if (obj.index() != noreg) {
+ assert (obj.disp() == 0, "index or displacement, not both");
+#ifdef AARCH64
+ __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm());
+#else
+ assert(obj.offset_op() == add_offset, "addition is expected");
+ __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm()));
+#endif // AARCH64
+ } else if (obj.disp() != 0) {
+ __ add(store_addr, obj.base(), obj.disp());
+ }
+
+ __ g1_write_barrier_pre(store_addr, new_val, tmp1, tmp2, tmp3);
+ if (is_null) {
+ __ store_heap_oop_null(new_val, Address(store_addr));
+ } else {
+ // G1 barrier needs uncompressed oop for region cross check.
+ Register val_to_store = new_val;
+ if (UseCompressedOops) {
+ val_to_store = tmp1;
+ __ mov(val_to_store, new_val);
+ }
+ __ store_heap_oop(val_to_store, Address(store_addr)); // blows val_to_store:
+ val_to_store = noreg;
+ __ g1_write_barrier_post(store_addr, new_val, tmp1, tmp2, tmp3);
+ }
+ }
+ break;
+#endif // INCLUDE_ALL_GCS
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ {
+ if (is_null) {
+ __ store_heap_oop_null(new_val, obj);
+ } else {
+ assert (!precise || (obj.index() == noreg && obj.disp() == 0),
+ "store check address should be calculated beforehand");
+
+ __ store_check_part1(tmp1);
+ __ store_heap_oop(new_val, obj); // blows new_val:
+ new_val = noreg;
+ __ store_check_part2(obj.base(), tmp1, tmp2);
+ }
+ }
+ break;
+ case BarrierSet::ModRef:
+ ShouldNotReachHere();
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+}
+
+Address TemplateTable::at_bcp(int offset) {
+ assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+ return Address(Rbcp, offset);
+}
+
+
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR.
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+ Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+ int byte_no) {
+ assert_different_registers(bc_reg, temp_reg);
+ if (!RewriteBytecodes) return;
+ Label L_patch_done;
+
+ switch (bc) {
+ case Bytecodes::_fast_aputfield:
+ case Bytecodes::_fast_bputfield:
+ case Bytecodes::_fast_zputfield:
+ case Bytecodes::_fast_cputfield:
+ case Bytecodes::_fast_dputfield:
+ case Bytecodes::_fast_fputfield:
+ case Bytecodes::_fast_iputfield:
+ case Bytecodes::_fast_lputfield:
+ case Bytecodes::_fast_sputfield:
+ {
+ // We skip bytecode quickening for putfield instructions when
+ // the put_code written to the constant pool cache is zero.
+ // This is required so that every execution of this instruction
+ // calls out to InterpreterRuntime::resolve_get_put to do
+ // additional, required work.
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+ __ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1, sizeof(u2));
+ __ mov(bc_reg, bc);
+ __ cbz(temp_reg, L_patch_done); // test if bytecode is zero
+ }
+ break;
+ default:
+ assert(byte_no == -1, "sanity");
+ // the pair bytecodes have already done the load.
+ if (load_bc_into_bc_reg) {
+ __ mov(bc_reg, bc);
+ }
+ }
+
+ if (__ can_post_breakpoint()) {
+ Label L_fast_patch;
+ // if a breakpoint is present we can't rewrite the stream directly
+ __ ldrb(temp_reg, at_bcp(0));
+ __ cmp(temp_reg, Bytecodes::_breakpoint);
+ __ b(L_fast_patch, ne);
+ if (bc_reg != R3) {
+ __ mov(R3, bc_reg);
+ }
+ __ mov(R1, Rmethod);
+ __ mov(R2, Rbcp);
+ // Let breakpoint table handling rewrite to quicker bytecode
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R1, R2, R3);
+ __ b(L_patch_done);
+ __ bind(L_fast_patch);
+ }
+
+#ifdef ASSERT
+ Label L_okay;
+ __ ldrb(temp_reg, at_bcp(0));
+ __ cmp(temp_reg, (int)Bytecodes::java_code(bc));
+ __ b(L_okay, eq);
+ __ cmp(temp_reg, bc_reg);
+ __ b(L_okay, eq);
+ __ stop("patching the wrong bytecode");
+ __ bind(L_okay);
+#endif
+
+ // patch bytecode
+ __ strb(bc_reg, at_bcp(0));
+ __ bind(L_patch_done);
+}
+
+//----------------------------------------------------------------------------------------------------
+// Individual instructions
+
+void TemplateTable::nop() {
+ transition(vtos, vtos);
+ // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+ transition(vtos, vtos);
+ __ stop("shouldnotreachhere bytecode");
+}
+
+
+
+void TemplateTable::aconst_null() {
+ transition(vtos, atos);
+ __ mov(R0_tos, 0);
+}
+
+
+void TemplateTable::iconst(int value) {
+ transition(vtos, itos);
+ __ mov_slow(R0_tos, value);
+}
+
+
+void TemplateTable::lconst(int value) {
+ transition(vtos, ltos);
+ assert((value == 0) || (value == 1), "unexpected long constant");
+ __ mov(R0_tos, value);
+#ifndef AARCH64
+ __ mov(R1_tos_hi, 0);
+#endif // !AARCH64
+}
+
+
+void TemplateTable::fconst(int value) {
+ transition(vtos, ftos);
+#ifdef AARCH64
+ switch(value) {
+ case 0: __ fmov_sw(S0_tos, ZR); break;
+ case 1: __ fmov_s (S0_tos, 0x70); break;
+ case 2: __ fmov_s (S0_tos, 0x00); break;
+ default: ShouldNotReachHere(); break;
+ }
+#else
+ const int zero = 0; // 0.0f
+ const int one = 0x3f800000; // 1.0f
+ const int two = 0x40000000; // 2.0f
+
+ switch(value) {
+ case 0: __ mov(R0_tos, zero); break;
+ case 1: __ mov(R0_tos, one); break;
+ case 2: __ mov(R0_tos, two); break;
+ default: ShouldNotReachHere(); break;
+ }
+
+#ifndef __SOFTFP__
+ __ fmsr(S0_tos, R0_tos);
+#endif // !__SOFTFP__
+#endif // AARCH64
+}
+
+
+void TemplateTable::dconst(int value) {
+ transition(vtos, dtos);
+#ifdef AARCH64
+ switch(value) {
+ case 0: __ fmov_dx(D0_tos, ZR); break;
+ case 1: __ fmov_d (D0_tos, 0x70); break;
+ default: ShouldNotReachHere(); break;
+ }
+#else
+ const int one_lo = 0; // low part of 1.0
+ const int one_hi = 0x3ff00000; // high part of 1.0
+
+ if (value == 0) {
+#ifdef __SOFTFP__
+ __ mov(R0_tos_lo, 0);
+ __ mov(R1_tos_hi, 0);
+#else
+ __ mov(R0_tmp, 0);
+ __ fmdrr(D0_tos, R0_tmp, R0_tmp);
+#endif // __SOFTFP__
+ } else if (value == 1) {
+ __ mov(R0_tos_lo, one_lo);
+ __ mov_slow(R1_tos_hi, one_hi);
+#ifndef __SOFTFP__
+ __ fmdrr(D0_tos, R0_tos_lo, R1_tos_hi);
+#endif // !__SOFTFP__
+ } else {
+ ShouldNotReachHere();
+ }
+#endif // AARCH64
+}
+
+
+void TemplateTable::bipush() {
+ transition(vtos, itos);
+ __ ldrsb(R0_tos, at_bcp(1));
+}
+
+
+void TemplateTable::sipush() {
+ transition(vtos, itos);
+ __ ldrsb(R0_tmp, at_bcp(1));
+ __ ldrb(R1_tmp, at_bcp(2));
+ __ orr(R0_tos, R1_tmp, AsmOperand(R0_tmp, lsl, BitsPerByte));
+}
+
+
+void TemplateTable::ldc(bool wide) {
+ transition(vtos, vtos);
+ Label fastCase, Done;
+
+ const Register Rindex = R1_tmp;
+ const Register Rcpool = R2_tmp;
+ const Register Rtags = R3_tmp;
+ const Register RtagType = R3_tmp;
+
+ if (wide) {
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+ } else {
+ __ ldrb(Rindex, at_bcp(1));
+ }
+ __ get_cpool_and_tags(Rcpool, Rtags);
+
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+ // get const type
+ __ add(Rtemp, Rtags, tags_offset);
+#ifdef AARCH64
+ __ add(Rtemp, Rtemp, Rindex);
+ __ ldarb(RtagType, Rtemp); // TODO-AARCH64 figure out if barrier is needed here, or control dependency is enough
+#else
+ __ ldrb(RtagType, Address(Rtemp, Rindex));
+ volatile_barrier(MacroAssembler::LoadLoad, Rtemp);
+#endif // AARCH64
+
+ // unresolved class - get the resolved class
+ __ cmp(RtagType, JVM_CONSTANT_UnresolvedClass);
+
+ // unresolved class in error (resolution failed) - call into runtime
+ // so that the same error from first resolution attempt is thrown.
+#ifdef AARCH64
+ __ mov(Rtemp, JVM_CONSTANT_UnresolvedClassInError); // this constant does not fit into 5-bit immediate constraint
+ __ cond_cmp(RtagType, Rtemp, ne);
+#else
+ __ cond_cmp(RtagType, JVM_CONSTANT_UnresolvedClassInError, ne);
+#endif // AARCH64
+
+ // resolved class - need to call vm to get java mirror of the class
+ __ cond_cmp(RtagType, JVM_CONSTANT_Class, ne);
+
+ __ b(fastCase, ne);
+
+ // slow case - call runtime
+ __ mov(R1, wide);
+ call_VM(R0_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), R1);
+ __ push(atos);
+ __ b(Done);
+
+ // int, float, String
+ __ bind(fastCase);
+#ifdef ASSERT
+ { Label L;
+ __ cmp(RtagType, JVM_CONSTANT_Integer);
+ __ cond_cmp(RtagType, JVM_CONSTANT_Float, ne);
+ __ b(L, eq);
+ __ stop("unexpected tag type in ldc");
+ __ bind(L);
+ }
+#endif // ASSERT
+ // itos, ftos
+ __ add(Rtemp, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr_u32(R0_tos, Address(Rtemp, base_offset));
+
+ // floats and ints are placed on stack in the same way, so
+ // we can use push(itos) to transfer float value without VFP
+ __ push(itos);
+ __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+void TemplateTable::fast_aldc(bool wide) {
+ transition(vtos, atos);
+ int index_size = wide ? sizeof(u2) : sizeof(u1);
+ Label resolved;
+
+ // We are resolved if the resolved reference cache entry contains a
+ // non-null object (CallSite, etc.)
+ assert_different_registers(R0_tos, R2_tmp);
+ __ get_index_at_bcp(R2_tmp, 1, R0_tos, index_size);
+ __ load_resolved_reference_at_index(R0_tos, R2_tmp);
+ __ cbnz(R0_tos, resolved);
+
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+
+ // first time invocation - must resolve first
+ __ mov(R1, (int)bytecode());
+ __ call_VM(R0_tos, entry, R1);
+ __ bind(resolved);
+
+ if (VerifyOops) {
+ __ verify_oop(R0_tos);
+ }
+}
+
+void TemplateTable::ldc2_w() {
+ transition(vtos, vtos);
+ const Register Rtags = R2_tmp;
+ const Register Rindex = R3_tmp;
+ const Register Rcpool = R4_tmp;
+ const Register Rbase = R5_tmp;
+
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+
+ __ get_cpool_and_tags(Rcpool, Rtags);
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+ __ add(Rbase, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord));
+
+#ifdef __ABI_HARD__
+ Label Long, exit;
+ // get type from tags
+ __ add(Rtemp, Rtags, tags_offset);
+ __ ldrb(Rtemp, Address(Rtemp, Rindex));
+ __ cmp(Rtemp, JVM_CONSTANT_Double);
+ __ b(Long, ne);
+ __ ldr_double(D0_tos, Address(Rbase, base_offset));
+
+ __ push(dtos);
+ __ b(exit);
+ __ bind(Long);
+#endif
+
+#ifdef AARCH64
+ __ ldr(R0_tos, Address(Rbase, base_offset));
+#else
+ __ ldr(R0_tos_lo, Address(Rbase, base_offset + 0 * wordSize));
+ __ ldr(R1_tos_hi, Address(Rbase, base_offset + 1 * wordSize));
+#endif // AARCH64
+ __ push(ltos);
+
+#ifdef __ABI_HARD__
+ __ bind(exit);
+#endif
+}
+
+
+void TemplateTable::locals_index(Register reg, int offset) {
+ __ ldrb(reg, at_bcp(offset));
+}
+
+void TemplateTable::iload() {
+ iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+ iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
+ transition(vtos, itos);
+
+ if ((rc == may_rewrite) && __ rewrite_frequent_pairs()) {
+ Label rewrite, done;
+ const Register next_bytecode = R1_tmp;
+ const Register target_bytecode = R2_tmp;
+
+ // get next byte
+ __ ldrb(next_bytecode, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+ // if _iload, wait to rewrite to iload2. We only want to rewrite the
+ // last two iloads in a pair. Comparing against fast_iload means that
+ // the next bytecode is neither an iload or a caload, and therefore
+ // an iload pair.
+ __ cmp(next_bytecode, Bytecodes::_iload);
+ __ b(done, eq);
+
+ __ cmp(next_bytecode, Bytecodes::_fast_iload);
+ __ mov(target_bytecode, Bytecodes::_fast_iload2);
+ __ b(rewrite, eq);
+
+ // if _caload, rewrite to fast_icaload
+ __ cmp(next_bytecode, Bytecodes::_caload);
+ __ mov(target_bytecode, Bytecodes::_fast_icaload);
+ __ b(rewrite, eq);
+
+ // rewrite so iload doesn't check again.
+ __ mov(target_bytecode, Bytecodes::_fast_iload);
+
+ // rewrite
+ // R2: fast bytecode
+ __ bind(rewrite);
+ patch_bytecode(Bytecodes::_iload, target_bytecode, Rtemp, false);
+ __ bind(done);
+ }
+
+ // Get the local value into tos
+ const Register Rlocal_index = R1_tmp;
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(R0_tos, local);
+}
+
+
+void TemplateTable::fast_iload2() {
+ transition(vtos, itos);
+ const Register Rlocal_index = R1_tmp;
+
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(R0_tos, local);
+ __ push(itos);
+
+ locals_index(Rlocal_index, 3);
+ local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(R0_tos, local);
+}
+
+void TemplateTable::fast_iload() {
+ transition(vtos, itos);
+ const Register Rlocal_index = R1_tmp;
+
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(R0_tos, local);
+}
+
+
+void TemplateTable::lload() {
+ transition(vtos, ltos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+ load_category2_local(Rlocal_index, R3_tmp);
+}
+
+
+void TemplateTable::fload() {
+ transition(vtos, ftos);
+ const Register Rlocal_index = R2_tmp;
+
+ // Get the local value into tos
+ locals_index(Rlocal_index);
+ Address local = load_faddress(Rlocal_index, Rtemp);
+#ifdef __SOFTFP__
+ __ ldr(R0_tos, local);
+#else
+ __ ldr_float(S0_tos, local);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dload() {
+ transition(vtos, dtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+
+#ifdef __SOFTFP__
+ load_category2_local(Rlocal_index, R3_tmp);
+#else
+ __ ldr_double(D0_tos, load_daddress(Rlocal_index, Rtemp));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::aload() {
+ transition(vtos, atos);
+ const Register Rlocal_index = R1_tmp;
+
+ locals_index(Rlocal_index);
+ Address local = load_aaddress(Rlocal_index, Rtemp);
+ __ ldr(R0_tos, local);
+}
+
+
+void TemplateTable::locals_index_wide(Register reg) {
+ assert_different_registers(reg, Rtemp);
+ __ ldrb(Rtemp, at_bcp(2));
+ __ ldrb(reg, at_bcp(3));
+ __ orr(reg, reg, AsmOperand(Rtemp, lsl, 8));
+}
+
+
+void TemplateTable::wide_iload() {
+ transition(vtos, itos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index_wide(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(R0_tos, local);
+}
+
+
+void TemplateTable::wide_lload() {
+ transition(vtos, ltos);
+ const Register Rlocal_index = R2_tmp;
+ const Register Rlocal_base = R3_tmp;
+
+ locals_index_wide(Rlocal_index);
+ load_category2_local(Rlocal_index, R3_tmp);
+}
+
+
+void TemplateTable::wide_fload() {
+ transition(vtos, ftos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index_wide(Rlocal_index);
+ Address local = load_faddress(Rlocal_index, Rtemp);
+#ifdef __SOFTFP__
+ __ ldr(R0_tos, local);
+#else
+ __ ldr_float(S0_tos, local);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::wide_dload() {
+ transition(vtos, dtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index_wide(Rlocal_index);
+#ifdef __SOFTFP__
+ load_category2_local(Rlocal_index, R3_tmp);
+#else
+ __ ldr_double(D0_tos, load_daddress(Rlocal_index, Rtemp));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::wide_aload() {
+ transition(vtos, atos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index_wide(Rlocal_index);
+ Address local = load_aaddress(Rlocal_index, Rtemp);
+ __ ldr(R0_tos, local);
+}
+
+void TemplateTable::index_check(Register array, Register index) {
+ // Pop ptr into array
+ __ pop_ptr(array);
+ index_check_without_pop(array, index);
+}
+
+void TemplateTable::index_check_without_pop(Register array, Register index) {
+ assert_different_registers(array, index, Rtemp);
+ // check array
+ __ null_check(array, Rtemp, arrayOopDesc::length_offset_in_bytes());
+ // check index
+ __ ldr_s32(Rtemp, Address(array, arrayOopDesc::length_offset_in_bytes()));
+ __ cmp_32(index, Rtemp);
+ if (index != R4_ArrayIndexOutOfBounds_index) {
+ // convention with generate_ArrayIndexOutOfBounds_handler()
+ __ mov(R4_ArrayIndexOutOfBounds_index, index, hs);
+ }
+ __ b(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry, hs);
+}
+
+
+void TemplateTable::iaload() {
+ transition(itos, itos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+ __ ldr_s32(R0_tos, get_array_elem_addr(T_INT, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::laload() {
+ transition(itos, ltos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+
+#ifdef AARCH64
+ __ ldr(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp));
+#else
+ __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
+ __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_LONG));
+ __ ldmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+}
+
+
+void TemplateTable::faload() {
+ transition(itos, ftos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+
+ Address addr = get_array_elem_addr(T_FLOAT, Rarray, Rindex, Rtemp);
+#ifdef __SOFTFP__
+ __ ldr(R0_tos, addr);
+#else
+ __ ldr_float(S0_tos, addr);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::daload() {
+ transition(itos, dtos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+
+#ifdef __SOFTFP__
+ __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
+ __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+ __ ldmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#else
+ __ ldr_double(D0_tos, get_array_elem_addr(T_DOUBLE, Rarray, Rindex, Rtemp));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::aaload() {
+ transition(itos, atos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+ __ load_heap_oop(R0_tos, get_array_elem_addr(T_OBJECT, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::baload() {
+ transition(itos, itos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+ __ ldrsb(R0_tos, get_array_elem_addr(T_BYTE, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::caload() {
+ transition(itos, itos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+ __ ldrh(R0_tos, get_array_elem_addr(T_CHAR, Rarray, Rindex, Rtemp));
+}
+
+
+// iload followed by caload frequent pair
+void TemplateTable::fast_icaload() {
+ transition(vtos, itos);
+ const Register Rlocal_index = R1_tmp;
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R4_tmp; // index_check prefers index on R4
+ assert_different_registers(Rlocal_index, Rindex);
+ assert_different_registers(Rarray, Rindex);
+
+ // load index out of locals
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(Rindex, local);
+
+ // get array element
+ index_check(Rarray, Rindex);
+ __ ldrh(R0_tos, get_array_elem_addr(T_CHAR, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::saload() {
+ transition(itos, itos);
+ const Register Rarray = R1_tmp;
+ const Register Rindex = R0_tos;
+
+ index_check(Rarray, Rindex);
+ __ ldrsh(R0_tos, get_array_elem_addr(T_SHORT, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::iload(int n) {
+ transition(vtos, itos);
+ __ ldr_s32(R0_tos, iaddress(n));
+}
+
+
+void TemplateTable::lload(int n) {
+ transition(vtos, ltos);
+#ifdef AARCH64
+ __ ldr(R0_tos, laddress(n));
+#else
+ __ ldr(R0_tos_lo, laddress(n));
+ __ ldr(R1_tos_hi, haddress(n));
+#endif // AARCH64
+}
+
+
+void TemplateTable::fload(int n) {
+ transition(vtos, ftos);
+#ifdef __SOFTFP__
+ __ ldr(R0_tos, faddress(n));
+#else
+ __ ldr_float(S0_tos, faddress(n));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dload(int n) {
+ transition(vtos, dtos);
+#ifdef __SOFTFP__
+ __ ldr(R0_tos_lo, laddress(n));
+ __ ldr(R1_tos_hi, haddress(n));
+#else
+ __ ldr_double(D0_tos, daddress(n));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::aload(int n) {
+ transition(vtos, atos);
+ __ ldr(R0_tos, aaddress(n));
+}
+
+void TemplateTable::aload_0() {
+ aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+ aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
+ transition(vtos, atos);
+ // According to bytecode histograms, the pairs:
+ //
+ // _aload_0, _fast_igetfield
+ // _aload_0, _fast_agetfield
+ // _aload_0, _fast_fgetfield
+ //
+ // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
+ // bytecode checks if the next bytecode is either _fast_igetfield,
+ // _fast_agetfield or _fast_fgetfield and then rewrites the
+ // current bytecode into a pair bytecode; otherwise it rewrites the current
+ // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
+ //
+ // Note: If the next bytecode is _getfield, the rewrite must be delayed,
+ // otherwise we may miss an opportunity for a pair.
+ //
+ // Also rewrite frequent pairs
+ // aload_0, aload_1
+ // aload_0, iload_1
+ // These bytecodes with a small amount of code are most profitable to rewrite
+ if ((rc == may_rewrite) && __ rewrite_frequent_pairs()) {
+ Label rewrite, done;
+ const Register next_bytecode = R1_tmp;
+ const Register target_bytecode = R2_tmp;
+
+ // get next byte
+ __ ldrb(next_bytecode, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+ // if _getfield then wait with rewrite
+ __ cmp(next_bytecode, Bytecodes::_getfield);
+ __ b(done, eq);
+
+ // if _igetfield then rewrite to _fast_iaccess_0
+ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ cmp(next_bytecode, Bytecodes::_fast_igetfield);
+ __ mov(target_bytecode, Bytecodes::_fast_iaccess_0);
+ __ b(rewrite, eq);
+
+ // if _agetfield then rewrite to _fast_aaccess_0
+ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ cmp(next_bytecode, Bytecodes::_fast_agetfield);
+ __ mov(target_bytecode, Bytecodes::_fast_aaccess_0);
+ __ b(rewrite, eq);
+
+ // if _fgetfield then rewrite to _fast_faccess_0, else rewrite to _fast_aload0
+ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
+
+ __ cmp(next_bytecode, Bytecodes::_fast_fgetfield);
+#ifdef AARCH64
+ __ mov(Rtemp, Bytecodes::_fast_faccess_0);
+ __ mov(target_bytecode, Bytecodes::_fast_aload_0);
+ __ mov(target_bytecode, Rtemp, eq);
+#else
+ __ mov(target_bytecode, Bytecodes::_fast_faccess_0, eq);
+ __ mov(target_bytecode, Bytecodes::_fast_aload_0, ne);
+#endif // AARCH64
+
+ // rewrite
+ __ bind(rewrite);
+ patch_bytecode(Bytecodes::_aload_0, target_bytecode, Rtemp, false);
+
+ __ bind(done);
+ }
+
+ aload(0);
+}
+
+void TemplateTable::istore() {
+ transition(itos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ str_32(R0_tos, local);
+}
+
+
+void TemplateTable::lstore() {
+ transition(ltos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+ store_category2_local(Rlocal_index, R3_tmp);
+}
+
+
+void TemplateTable::fstore() {
+ transition(ftos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+ Address local = load_faddress(Rlocal_index, Rtemp);
+#ifdef __SOFTFP__
+ __ str(R0_tos, local);
+#else
+ __ str_float(S0_tos, local);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dstore() {
+ transition(dtos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ locals_index(Rlocal_index);
+
+#ifdef __SOFTFP__
+ store_category2_local(Rlocal_index, R3_tmp);
+#else
+ __ str_double(D0_tos, load_daddress(Rlocal_index, Rtemp));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::astore() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R1_tmp;
+
+ __ pop_ptr(R0_tos);
+ locals_index(Rlocal_index);
+ Address local = load_aaddress(Rlocal_index, Rtemp);
+ __ str(R0_tos, local);
+}
+
+
+void TemplateTable::wide_istore() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ __ pop_i(R0_tos);
+ locals_index_wide(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ str_32(R0_tos, local);
+}
+
+
+void TemplateTable::wide_lstore() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R2_tmp;
+ const Register Rlocal_base = R3_tmp;
+
+#ifdef AARCH64
+ __ pop_l(R0_tos);
+#else
+ __ pop_l(R0_tos_lo, R1_tos_hi);
+#endif // AARCH64
+
+ locals_index_wide(Rlocal_index);
+ store_category2_local(Rlocal_index, R3_tmp);
+}
+
+
+void TemplateTable::wide_fstore() {
+ wide_istore();
+}
+
+
+void TemplateTable::wide_dstore() {
+ wide_lstore();
+}
+
+
+void TemplateTable::wide_astore() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R2_tmp;
+
+ __ pop_ptr(R0_tos);
+ locals_index_wide(Rlocal_index);
+ Address local = load_aaddress(Rlocal_index, Rtemp);
+ __ str(R0_tos, local);
+}
+
+
+void TemplateTable::iastore() {
+ transition(itos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // R0_tos: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+ __ str_32(R0_tos, get_array_elem_addr(T_INT, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::lastore() {
+ transition(ltos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // R0_tos_lo:R1_tos_hi: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+
+#ifdef AARCH64
+ __ str(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp));
+#else
+ __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
+ __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_LONG));
+ __ stmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+}
+
+
+void TemplateTable::fastore() {
+ transition(ftos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // S0_tos/R0_tos: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+ Address addr = get_array_elem_addr(T_FLOAT, Rarray, Rindex, Rtemp);
+
+#ifdef __SOFTFP__
+ __ str(R0_tos, addr);
+#else
+ __ str_float(S0_tos, addr);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dastore() {
+ transition(dtos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // D0_tos / R0_tos_lo:R1_to_hi: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+
+#ifdef __SOFTFP__
+ __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
+ __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+ __ stmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#else
+ __ str_double(D0_tos, get_array_elem_addr(T_DOUBLE, Rarray, Rindex, Rtemp));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::aastore() {
+ transition(vtos, vtos);
+ Label is_null, throw_array_store, done;
+
+ const Register Raddr_1 = R1_tmp;
+ const Register Rvalue_2 = R2_tmp;
+ const Register Rarray_3 = R3_tmp;
+ const Register Rindex_4 = R4_tmp; // preferred by index_check_without_pop()
+ const Register Rsub_5 = R5_tmp;
+ const Register Rsuper_LR = LR_tmp;
+
+ // stack: ..., array, index, value
+ __ ldr(Rvalue_2, at_tos()); // Value
+ __ ldr_s32(Rindex_4, at_tos_p1()); // Index
+ __ ldr(Rarray_3, at_tos_p2()); // Array
+
+ index_check_without_pop(Rarray_3, Rindex_4);
+
+ // Compute the array base
+ __ add(Raddr_1, Rarray_3, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+ // do array store check - check for NULL value first
+ __ cbz(Rvalue_2, is_null);
+
+ // Load subklass
+ __ load_klass(Rsub_5, Rvalue_2);
+ // Load superklass
+ __ load_klass(Rtemp, Rarray_3);
+ __ ldr(Rsuper_LR, Address(Rtemp, ObjArrayKlass::element_klass_offset()));
+
+ __ gen_subtype_check(Rsub_5, Rsuper_LR, throw_array_store, R0_tmp, R3_tmp);
+ // Come here on success
+
+ // Store value
+ __ add(Raddr_1, Raddr_1, AsmOperand(Rindex_4, lsl, LogBytesPerHeapOop));
+
+ // Now store using the appropriate barrier
+ do_oop_store(_masm, Raddr_1, Rvalue_2, Rtemp, R0_tmp, R3_tmp, _bs->kind(), true, false);
+ __ b(done);
+
+ __ bind(throw_array_store);
+
+ // Come here on failure of subtype check
+ __ profile_typecheck_failed(R0_tmp);
+
+ // object is at TOS
+ __ b(Interpreter::_throw_ArrayStoreException_entry);
+
+ // Have a NULL in Rvalue_2, store NULL at array[index].
+ __ bind(is_null);
+ __ profile_null_seen(R0_tmp);
+
+ // Store a NULL
+ do_oop_store(_masm, Address::indexed_oop(Raddr_1, Rindex_4), Rvalue_2, Rtemp, R0_tmp, R3_tmp, _bs->kind(), true, true);
+
+ // Pop stack arguments
+ __ bind(done);
+ __ add(Rstack_top, Rstack_top, 3 * Interpreter::stackElementSize);
+}
+
+
+void TemplateTable::bastore() {
+ transition(itos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // R0_tos: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+
+ // Need to check whether array is boolean or byte
+ // since both types share the bastore bytecode.
+ __ load_klass(Rtemp, Rarray);
+ __ ldr_u32(Rtemp, Address(Rtemp, Klass::layout_helper_offset()));
+ Label L_skip;
+ __ tst(Rtemp, Klass::layout_helper_boolean_diffbit());
+ __ b(L_skip, eq);
+ __ and_32(R0_tos, R0_tos, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
+ __ bind(L_skip);
+ __ strb(R0_tos, get_array_elem_addr(T_BYTE, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::castore() {
+ transition(itos, vtos);
+ const Register Rindex = R4_tmp; // index_check prefers index in R4
+ const Register Rarray = R3_tmp;
+ // R0_tos: value
+
+ __ pop_i(Rindex);
+ index_check(Rarray, Rindex);
+
+ __ strh(R0_tos, get_array_elem_addr(T_CHAR, Rarray, Rindex, Rtemp));
+}
+
+
+void TemplateTable::sastore() {
+ assert(arrayOopDesc::base_offset_in_bytes(T_CHAR) ==
+ arrayOopDesc::base_offset_in_bytes(T_SHORT),
+ "base offsets for char and short should be equal");
+ castore();
+}
+
+
+void TemplateTable::istore(int n) {
+ transition(itos, vtos);
+ __ str_32(R0_tos, iaddress(n));
+}
+
+
+void TemplateTable::lstore(int n) {
+ transition(ltos, vtos);
+#ifdef AARCH64
+ __ str(R0_tos, laddress(n));
+#else
+ __ str(R0_tos_lo, laddress(n));
+ __ str(R1_tos_hi, haddress(n));
+#endif // AARCH64
+}
+
+
+void TemplateTable::fstore(int n) {
+ transition(ftos, vtos);
+#ifdef __SOFTFP__
+ __ str(R0_tos, faddress(n));
+#else
+ __ str_float(S0_tos, faddress(n));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dstore(int n) {
+ transition(dtos, vtos);
+#ifdef __SOFTFP__
+ __ str(R0_tos_lo, laddress(n));
+ __ str(R1_tos_hi, haddress(n));
+#else
+ __ str_double(D0_tos, daddress(n));
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::astore(int n) {
+ transition(vtos, vtos);
+ __ pop_ptr(R0_tos);
+ __ str(R0_tos, aaddress(n));
+}
+
+
+void TemplateTable::pop() {
+ transition(vtos, vtos);
+ __ add(Rstack_top, Rstack_top, Interpreter::stackElementSize);
+}
+
+
+void TemplateTable::pop2() {
+ transition(vtos, vtos);
+ __ add(Rstack_top, Rstack_top, 2*Interpreter::stackElementSize);
+}
+
+
+void TemplateTable::dup() {
+ transition(vtos, vtos);
+ // stack: ..., a
+ __ load_ptr(0, R0_tmp);
+ __ push_ptr(R0_tmp);
+ // stack: ..., a, a
+}
+
+
+void TemplateTable::dup_x1() {
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ load_ptr(0, R0_tmp); // load b
+ __ load_ptr(1, R2_tmp); // load a
+ __ store_ptr(1, R0_tmp); // store b
+ __ store_ptr(0, R2_tmp); // store a
+ __ push_ptr(R0_tmp); // push b
+ // stack: ..., b, a, b
+}
+
+
+void TemplateTable::dup_x2() {
+ transition(vtos, vtos);
+ // stack: ..., a, b, c
+ __ load_ptr(0, R0_tmp); // load c
+ __ load_ptr(1, R2_tmp); // load b
+ __ load_ptr(2, R4_tmp); // load a
+
+ __ push_ptr(R0_tmp); // push c
+
+ // stack: ..., a, b, c, c
+ __ store_ptr(1, R2_tmp); // store b
+ __ store_ptr(2, R4_tmp); // store a
+ __ store_ptr(3, R0_tmp); // store c
+ // stack: ..., c, a, b, c
+}
+
+
+void TemplateTable::dup2() {
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ load_ptr(1, R0_tmp); // load a
+ __ push_ptr(R0_tmp); // push a
+ __ load_ptr(1, R0_tmp); // load b
+ __ push_ptr(R0_tmp); // push b
+ // stack: ..., a, b, a, b
+}
+
+
+void TemplateTable::dup2_x1() {
+ transition(vtos, vtos);
+
+ // stack: ..., a, b, c
+ __ load_ptr(0, R4_tmp); // load c
+ __ load_ptr(1, R2_tmp); // load b
+ __ load_ptr(2, R0_tmp); // load a
+
+ __ push_ptr(R2_tmp); // push b
+ __ push_ptr(R4_tmp); // push c
+
+ // stack: ..., a, b, c, b, c
+
+ __ store_ptr(2, R0_tmp); // store a
+ __ store_ptr(3, R4_tmp); // store c
+ __ store_ptr(4, R2_tmp); // store b
+
+ // stack: ..., b, c, a, b, c
+}
+
+
+void TemplateTable::dup2_x2() {
+ transition(vtos, vtos);
+ // stack: ..., a, b, c, d
+ __ load_ptr(0, R0_tmp); // load d
+ __ load_ptr(1, R2_tmp); // load c
+ __ push_ptr(R2_tmp); // push c
+ __ push_ptr(R0_tmp); // push d
+ // stack: ..., a, b, c, d, c, d
+ __ load_ptr(4, R4_tmp); // load b
+ __ store_ptr(4, R0_tmp); // store d in b
+ __ store_ptr(2, R4_tmp); // store b in d
+ // stack: ..., a, d, c, b, c, d
+ __ load_ptr(5, R4_tmp); // load a
+ __ store_ptr(5, R2_tmp); // store c in a
+ __ store_ptr(3, R4_tmp); // store a in c
+ // stack: ..., c, d, a, b, c, d
+}
+
+
+void TemplateTable::swap() {
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ load_ptr(1, R0_tmp); // load a
+ __ load_ptr(0, R2_tmp); // load b
+ __ store_ptr(0, R0_tmp); // store a in b
+ __ store_ptr(1, R2_tmp); // store b in a
+ // stack: ..., b, a
+}
+
+
+void TemplateTable::iop2(Operation op) {
+ transition(itos, itos);
+ const Register arg1 = R1_tmp;
+ const Register arg2 = R0_tos;
+
+ __ pop_i(arg1);
+ switch (op) {
+ case add : __ add_32 (R0_tos, arg1, arg2); break;
+ case sub : __ sub_32 (R0_tos, arg1, arg2); break;
+ case mul : __ mul_32 (R0_tos, arg1, arg2); break;
+ case _and : __ and_32 (R0_tos, arg1, arg2); break;
+ case _or : __ orr_32 (R0_tos, arg1, arg2); break;
+ case _xor : __ eor_32 (R0_tos, arg1, arg2); break;
+#ifdef AARCH64
+ case shl : __ lslv_w (R0_tos, arg1, arg2); break;
+ case shr : __ asrv_w (R0_tos, arg1, arg2); break;
+ case ushr : __ lsrv_w (R0_tos, arg1, arg2); break;
+#else
+ case shl : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsl, arg2)); break;
+ case shr : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, asr, arg2)); break;
+ case ushr : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsr, arg2)); break;
+#endif // AARCH64
+ default : ShouldNotReachHere();
+ }
+}
+
+
+void TemplateTable::lop2(Operation op) {
+ transition(ltos, ltos);
+#ifdef AARCH64
+ const Register arg1 = R1_tmp;
+ const Register arg2 = R0_tos;
+
+ __ pop_l(arg1);
+ switch (op) {
+ case add : __ add (R0_tos, arg1, arg2); break;
+ case sub : __ sub (R0_tos, arg1, arg2); break;
+ case _and : __ andr(R0_tos, arg1, arg2); break;
+ case _or : __ orr (R0_tos, arg1, arg2); break;
+ case _xor : __ eor (R0_tos, arg1, arg2); break;
+ default : ShouldNotReachHere();
+ }
+#else
+ const Register arg1_lo = R2_tmp;
+ const Register arg1_hi = R3_tmp;
+ const Register arg2_lo = R0_tos_lo;
+ const Register arg2_hi = R1_tos_hi;
+
+ __ pop_l(arg1_lo, arg1_hi);
+ switch (op) {
+ case add : __ adds(R0_tos_lo, arg1_lo, arg2_lo); __ adc (R1_tos_hi, arg1_hi, arg2_hi); break;
+ case sub : __ subs(R0_tos_lo, arg1_lo, arg2_lo); __ sbc (R1_tos_hi, arg1_hi, arg2_hi); break;
+ case _and: __ andr(R0_tos_lo, arg1_lo, arg2_lo); __ andr(R1_tos_hi, arg1_hi, arg2_hi); break;
+ case _or : __ orr (R0_tos_lo, arg1_lo, arg2_lo); __ orr (R1_tos_hi, arg1_hi, arg2_hi); break;
+ case _xor: __ eor (R0_tos_lo, arg1_lo, arg2_lo); __ eor (R1_tos_hi, arg1_hi, arg2_hi); break;
+ default : ShouldNotReachHere();
+ }
+#endif // AARCH64
+}
+
+
+void TemplateTable::idiv() {
+ transition(itos, itos);
+#ifdef AARCH64
+ const Register divisor = R0_tos;
+ const Register dividend = R1_tmp;
+
+ __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry);
+ __ pop_i(dividend);
+ __ sdiv_w(R0_tos, dividend, divisor);
+#else
+ __ mov(R2, R0_tos);
+ __ pop_i(R0);
+ // R0 - dividend
+ // R2 - divisor
+ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none);
+ // R1 - result
+ __ mov(R0_tos, R1);
+#endif // AARCH64
+}
+
+
+void TemplateTable::irem() {
+ transition(itos, itos);
+#ifdef AARCH64
+ const Register divisor = R0_tos;
+ const Register dividend = R1_tmp;
+ const Register quotient = R2_tmp;
+
+ __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry);
+ __ pop_i(dividend);
+ __ sdiv_w(quotient, dividend, divisor);
+ __ msub_w(R0_tos, divisor, quotient, dividend);
+#else
+ __ mov(R2, R0_tos);
+ __ pop_i(R0);
+ // R0 - dividend
+ // R2 - divisor
+ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none);
+ // R0 - remainder
+#endif // AARCH64
+}
+
+
+void TemplateTable::lmul() {
+ transition(ltos, ltos);
+#ifdef AARCH64
+ const Register arg1 = R0_tos;
+ const Register arg2 = R1_tmp;
+
+ __ pop_l(arg2);
+ __ mul(R0_tos, arg1, arg2);
+#else
+ const Register arg1_lo = R0_tos_lo;
+ const Register arg1_hi = R1_tos_hi;
+ const Register arg2_lo = R2_tmp;
+ const Register arg2_hi = R3_tmp;
+
+ __ pop_l(arg2_lo, arg2_hi);
+
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lmul), arg1_lo, arg1_hi, arg2_lo, arg2_hi);
+#endif // AARCH64
+}
+
+
+void TemplateTable::ldiv() {
+ transition(ltos, ltos);
+#ifdef AARCH64
+ const Register divisor = R0_tos;
+ const Register dividend = R1_tmp;
+
+ __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry);
+ __ pop_l(dividend);
+ __ sdiv(R0_tos, dividend, divisor);
+#else
+ const Register x_lo = R2_tmp;
+ const Register x_hi = R3_tmp;
+ const Register y_lo = R0_tos_lo;
+ const Register y_hi = R1_tos_hi;
+
+ __ pop_l(x_lo, x_hi);
+
+ // check if y = 0
+ __ orrs(Rtemp, y_lo, y_hi);
+ __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), y_lo, y_hi, x_lo, x_hi);
+#endif // AARCH64
+}
+
+
+void TemplateTable::lrem() {
+ transition(ltos, ltos);
+#ifdef AARCH64
+ const Register divisor = R0_tos;
+ const Register dividend = R1_tmp;
+ const Register quotient = R2_tmp;
+
+ __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry);
+ __ pop_l(dividend);
+ __ sdiv(quotient, dividend, divisor);
+ __ msub(R0_tos, divisor, quotient, dividend);
+#else
+ const Register x_lo = R2_tmp;
+ const Register x_hi = R3_tmp;
+ const Register y_lo = R0_tos_lo;
+ const Register y_hi = R1_tos_hi;
+
+ __ pop_l(x_lo, x_hi);
+
+ // check if y = 0
+ __ orrs(Rtemp, y_lo, y_hi);
+ __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), y_lo, y_hi, x_lo, x_hi);
+#endif // AARCH64
+}
+
+
+void TemplateTable::lshl() {
+ transition(itos, ltos);
+#ifdef AARCH64
+ const Register val = R1_tmp;
+ const Register shift_cnt = R0_tos;
+ __ pop_l(val);
+ __ lslv(R0_tos, val, shift_cnt);
+#else
+ const Register shift_cnt = R4_tmp;
+ const Register val_lo = R2_tmp;
+ const Register val_hi = R3_tmp;
+
+ __ pop_l(val_lo, val_hi);
+ __ andr(shift_cnt, R0_tos, 63);
+ __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsl, shift_cnt);
+#endif // AARCH64
+}
+
+
+void TemplateTable::lshr() {
+ transition(itos, ltos);
+#ifdef AARCH64
+ const Register val = R1_tmp;
+ const Register shift_cnt = R0_tos;
+ __ pop_l(val);
+ __ asrv(R0_tos, val, shift_cnt);
+#else
+ const Register shift_cnt = R4_tmp;
+ const Register val_lo = R2_tmp;
+ const Register val_hi = R3_tmp;
+
+ __ pop_l(val_lo, val_hi);
+ __ andr(shift_cnt, R0_tos, 63);
+ __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, asr, shift_cnt);
+#endif // AARCH64
+}
+
+
+void TemplateTable::lushr() {
+ transition(itos, ltos);
+#ifdef AARCH64
+ const Register val = R1_tmp;
+ const Register shift_cnt = R0_tos;
+ __ pop_l(val);
+ __ lsrv(R0_tos, val, shift_cnt);
+#else
+ const Register shift_cnt = R4_tmp;
+ const Register val_lo = R2_tmp;
+ const Register val_hi = R3_tmp;
+
+ __ pop_l(val_lo, val_hi);
+ __ andr(shift_cnt, R0_tos, 63);
+ __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsr, shift_cnt);
+#endif // AARCH64
+}
+
+
+void TemplateTable::fop2(Operation op) {
+ transition(ftos, ftos);
+#ifdef __SOFTFP__
+ __ mov(R1, R0_tos);
+ __ pop_i(R0);
+ switch (op) {
+ case add: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_fadd_glibc), R0, R1); break;
+ case sub: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_fsub_glibc), R0, R1); break;
+ case mul: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_fmul), R0, R1); break;
+ case div: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_fdiv), R0, R1); break;
+ case rem: __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), R0, R1); break;
+ default : ShouldNotReachHere();
+ }
+#else
+ const FloatRegister arg1 = S1_tmp;
+ const FloatRegister arg2 = S0_tos;
+
+ switch (op) {
+ case add: __ pop_f(arg1); __ add_float(S0_tos, arg1, arg2); break;
+ case sub: __ pop_f(arg1); __ sub_float(S0_tos, arg1, arg2); break;
+ case mul: __ pop_f(arg1); __ mul_float(S0_tos, arg1, arg2); break;
+ case div: __ pop_f(arg1); __ div_float(S0_tos, arg1, arg2); break;
+ case rem:
+#ifndef __ABI_HARD__
+ __ pop_f(arg1);
+ __ fmrs(R0, arg1);
+ __ fmrs(R1, arg2);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), R0, R1);
+ __ fmsr(S0_tos, R0);
+#else
+ __ mov_float(S1_reg, arg2);
+ __ pop_f(S0);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
+#endif // !__ABI_HARD__
+ break;
+ default : ShouldNotReachHere();
+ }
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dop2(Operation op) {
+ transition(dtos, dtos);
+#ifdef __SOFTFP__
+ __ mov(R2, R0_tos_lo);
+ __ mov(R3, R1_tos_hi);
+ __ pop_l(R0, R1);
+ switch (op) {
+ // __aeabi_XXXX_glibc: Imported code from glibc soft-fp bundle for calculation accuracy improvement. See CR 6757269.
+ case add: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_dadd_glibc), R0, R1, R2, R3); break;
+ case sub: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_dsub_glibc), R0, R1, R2, R3); break;
+ case mul: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_dmul), R0, R1, R2, R3); break;
+ case div: __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_ddiv), R0, R1, R2, R3); break;
+ case rem: __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), R0, R1, R2, R3); break;
+ default : ShouldNotReachHere();
+ }
+#else
+ const FloatRegister arg1 = D1_tmp;
+ const FloatRegister arg2 = D0_tos;
+
+ switch (op) {
+ case add: __ pop_d(arg1); __ add_double(D0_tos, arg1, arg2); break;
+ case sub: __ pop_d(arg1); __ sub_double(D0_tos, arg1, arg2); break;
+ case mul: __ pop_d(arg1); __ mul_double(D0_tos, arg1, arg2); break;
+ case div: __ pop_d(arg1); __ div_double(D0_tos, arg1, arg2); break;
+ case rem:
+#ifndef __ABI_HARD__
+ __ pop_d(arg1);
+ __ fmrrd(R0, R1, arg1);
+ __ fmrrd(R2, R3, arg2);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), R0, R1, R2, R3);
+ __ fmdrr(D0_tos, R0, R1);
+#else
+ __ mov_double(D1, arg2);
+ __ pop_d(D0);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
+#endif // !__ABI_HARD__
+ break;
+ default : ShouldNotReachHere();
+ }
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::ineg() {
+ transition(itos, itos);
+ __ neg_32(R0_tos, R0_tos);
+}
+
+
+void TemplateTable::lneg() {
+ transition(ltos, ltos);
+#ifdef AARCH64
+ __ neg(R0_tos, R0_tos);
+#else
+ __ rsbs(R0_tos_lo, R0_tos_lo, 0);
+ __ rsc (R1_tos_hi, R1_tos_hi, 0);
+#endif // AARCH64
+}
+
+
+void TemplateTable::fneg() {
+ transition(ftos, ftos);
+#ifdef __SOFTFP__
+ // Invert sign bit
+ const int sign_mask = 0x80000000;
+ __ eor(R0_tos, R0_tos, sign_mask);
+#else
+ __ neg_float(S0_tos, S0_tos);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::dneg() {
+ transition(dtos, dtos);
+#ifdef __SOFTFP__
+ // Invert sign bit in the high part of the double
+ const int sign_mask_hi = 0x80000000;
+ __ eor(R1_tos_hi, R1_tos_hi, sign_mask_hi);
+#else
+ __ neg_double(D0_tos, D0_tos);
+#endif // __SOFTFP__
+}
+
+
+void TemplateTable::iinc() {
+ transition(vtos, vtos);
+ const Register Rconst = R2_tmp;
+ const Register Rlocal_index = R1_tmp;
+ const Register Rval = R0_tmp;
+
+ __ ldrsb(Rconst, at_bcp(2));
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(Rval, local);
+ __ add(Rval, Rval, Rconst);
+ __ str_32(Rval, local);
+}
+
+
+void TemplateTable::wide_iinc() {
+ transition(vtos, vtos);
+ const Register Rconst = R2_tmp;
+ const Register Rlocal_index = R1_tmp;
+ const Register Rval = R0_tmp;
+
+ // get constant in Rconst
+ __ ldrsb(R2_tmp, at_bcp(4));
+ __ ldrb(R3_tmp, at_bcp(5));
+ __ orr(Rconst, R3_tmp, AsmOperand(R2_tmp, lsl, 8));
+
+ locals_index_wide(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(Rval, local);
+ __ add(Rval, Rval, Rconst);
+ __ str_32(Rval, local);
+}
+
+
+void TemplateTable::convert() {
+ // Checking
+#ifdef ASSERT
+ { TosState tos_in = ilgl;
+ TosState tos_out = ilgl;
+ switch (bytecode()) {
+ case Bytecodes::_i2l: // fall through
+ case Bytecodes::_i2f: // fall through
+ case Bytecodes::_i2d: // fall through
+ case Bytecodes::_i2b: // fall through
+ case Bytecodes::_i2c: // fall through
+ case Bytecodes::_i2s: tos_in = itos; break;
+ case Bytecodes::_l2i: // fall through
+ case Bytecodes::_l2f: // fall through
+ case Bytecodes::_l2d: tos_in = ltos; break;
+ case Bytecodes::_f2i: // fall through
+ case Bytecodes::_f2l: // fall through
+ case Bytecodes::_f2d: tos_in = ftos; break;
+ case Bytecodes::_d2i: // fall through
+ case Bytecodes::_d2l: // fall through
+ case Bytecodes::_d2f: tos_in = dtos; break;
+ default : ShouldNotReachHere();
+ }
+ switch (bytecode()) {
+ case Bytecodes::_l2i: // fall through
+ case Bytecodes::_f2i: // fall through
+ case Bytecodes::_d2i: // fall through
+ case Bytecodes::_i2b: // fall through
+ case Bytecodes::_i2c: // fall through
+ case Bytecodes::_i2s: tos_out = itos; break;
+ case Bytecodes::_i2l: // fall through
+ case Bytecodes::_f2l: // fall through
+ case Bytecodes::_d2l: tos_out = ltos; break;
+ case Bytecodes::_i2f: // fall through
+ case Bytecodes::_l2f: // fall through
+ case Bytecodes::_d2f: tos_out = ftos; break;
+ case Bytecodes::_i2d: // fall through
+ case Bytecodes::_l2d: // fall through
+ case Bytecodes::_f2d: tos_out = dtos; break;
+ default : ShouldNotReachHere();
+ }
+ transition(tos_in, tos_out);
+ }
+#endif // ASSERT
+
+ // Conversion
+ switch (bytecode()) {
+ case Bytecodes::_i2l:
+#ifdef AARCH64
+ __ sign_extend(R0_tos, R0_tos, 32);
+#else
+ __ mov(R1_tos_hi, AsmOperand(R0_tos, asr, BitsPerWord-1));
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_i2f:
+#ifdef AARCH64
+ __ scvtf_sw(S0_tos, R0_tos);
+#else
+#ifdef __SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2f), R0_tos);
+#else
+ __ fmsr(S0_tmp, R0_tos);
+ __ fsitos(S0_tos, S0_tmp);
+#endif // __SOFTFP__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_i2d:
+#ifdef AARCH64
+ __ scvtf_dw(D0_tos, R0_tos);
+#else
+#ifdef __SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2d), R0_tos);
+#else
+ __ fmsr(S0_tmp, R0_tos);
+ __ fsitod(D0_tos, S0_tmp);
+#endif // __SOFTFP__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_i2b:
+ __ sign_extend(R0_tos, R0_tos, 8);
+ break;
+
+ case Bytecodes::_i2c:
+ __ zero_extend(R0_tos, R0_tos, 16);
+ break;
+
+ case Bytecodes::_i2s:
+ __ sign_extend(R0_tos, R0_tos, 16);
+ break;
+
+ case Bytecodes::_l2i:
+ /* nothing to do */
+ break;
+
+ case Bytecodes::_l2f:
+#ifdef AARCH64
+ __ scvtf_sx(S0_tos, R0_tos);
+#else
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f), R0_tos_lo, R1_tos_hi);
+#if !defined(__SOFTFP__) && !defined(__ABI_HARD__)
+ __ fmsr(S0_tos, R0);
+#endif // !__SOFTFP__ && !__ABI_HARD__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_l2d:
+#ifdef AARCH64
+ __ scvtf_dx(D0_tos, R0_tos);
+#else
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2d), R0_tos_lo, R1_tos_hi);
+#if !defined(__SOFTFP__) && !defined(__ABI_HARD__)
+ __ fmdrr(D0_tos, R0, R1);
+#endif // !__SOFTFP__ && !__ABI_HARD__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_f2i:
+#ifdef AARCH64
+ __ fcvtzs_ws(R0_tos, S0_tos);
+#else
+#ifndef __SOFTFP__
+ __ ftosizs(S0_tos, S0_tos);
+ __ fmrs(R0_tos, S0_tos);
+#else
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), R0_tos);
+#endif // !__SOFTFP__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_f2l:
+#ifdef AARCH64
+ __ fcvtzs_xs(R0_tos, S0_tos);
+#else
+#ifndef __SOFTFP__
+ __ fmrs(R0_tos, S0_tos);
+#endif // !__SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), R0_tos);
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_f2d:
+#ifdef __SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_f2d), R0_tos);
+#else
+ __ convert_f2d(D0_tos, S0_tos);
+#endif // __SOFTFP__
+ break;
+
+ case Bytecodes::_d2i:
+#ifdef AARCH64
+ __ fcvtzs_wd(R0_tos, D0_tos);
+#else
+#ifndef __SOFTFP__
+ __ ftosizd(Stemp, D0);
+ __ fmrs(R0, Stemp);
+#else
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), R0_tos_lo, R1_tos_hi);
+#endif // !__SOFTFP__
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_d2l:
+#ifdef AARCH64
+ __ fcvtzs_xd(R0_tos, D0_tos);
+#else
+#ifndef __SOFTFP__
+ __ fmrrd(R0_tos_lo, R1_tos_hi, D0_tos);
+#endif // !__SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), R0_tos_lo, R1_tos_hi);
+#endif // AARCH64
+ break;
+
+ case Bytecodes::_d2f:
+#ifdef __SOFTFP__
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_d2f), R0_tos_lo, R1_tos_hi);
+#else
+ __ convert_d2f(S0_tos, D0_tos);
+#endif // __SOFTFP__
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void TemplateTable::lcmp() {
+ transition(ltos, itos);
+#ifdef AARCH64
+ const Register arg1 = R1_tmp;
+ const Register arg2 = R0_tos;
+
+ __ pop_l(arg1);
+
+ __ cmp(arg1, arg2);
+ __ cset(R0_tos, gt); // 1 if '>', else 0
+ __ csinv(R0_tos, R0_tos, ZR, ge); // previous value if '>=', else -1
+#else
+ const Register arg1_lo = R2_tmp;
+ const Register arg1_hi = R3_tmp;
+ const Register arg2_lo = R0_tos_lo;
+ const Register arg2_hi = R1_tos_hi;
+ const Register res = R4_tmp;
+
+ __ pop_l(arg1_lo, arg1_hi);
+
+ // long compare arg1 with arg2
+ // result is -1/0/+1 if '<'/'='/'>'
+ Label done;
+
+ __ mov (res, 0);
+ __ cmp (arg1_hi, arg2_hi);
+ __ mvn (res, 0, lt);
+ __ mov (res, 1, gt);
+ __ b(done, ne);
+ __ cmp (arg1_lo, arg2_lo);
+ __ mvn (res, 0, lo);
+ __ mov (res, 1, hi);
+ __ bind(done);
+ __ mov (R0_tos, res);
+#endif // AARCH64
+}
+
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+ assert((unordered_result == 1) || (unordered_result == -1), "invalid unordered result");
+
+#ifdef AARCH64
+ if (is_float) {
+ transition(ftos, itos);
+ __ pop_f(S1_tmp);
+ __ fcmp_s(S1_tmp, S0_tos);
+ } else {
+ transition(dtos, itos);
+ __ pop_d(D1_tmp);
+ __ fcmp_d(D1_tmp, D0_tos);
+ }
+
+ if (unordered_result < 0) {
+ __ cset(R0_tos, gt); // 1 if '>', else 0
+ __ csinv(R0_tos, R0_tos, ZR, ge); // previous value if '>=', else -1
+ } else {
+ __ cset(R0_tos, hi); // 1 if '>' or unordered, else 0
+ __ csinv(R0_tos, R0_tos, ZR, pl); // previous value if '>=' or unordered, else -1
+ }
+
+#else
+
+#ifdef __SOFTFP__
+
+ if (is_float) {
+ transition(ftos, itos);
+ const Register Rx = R0;
+ const Register Ry = R1;
+
+ __ mov(Ry, R0_tos);
+ __ pop_i(Rx);
+
+ if (unordered_result == 1) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg), Rx, Ry);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl), Rx, Ry);
+ }
+
+ } else {
+
+ transition(dtos, itos);
+ const Register Rx_lo = R0;
+ const Register Rx_hi = R1;
+ const Register Ry_lo = R2;
+ const Register Ry_hi = R3;
+
+ __ mov(Ry_lo, R0_tos_lo);
+ __ mov(Ry_hi, R1_tos_hi);
+ __ pop_l(Rx_lo, Rx_hi);
+
+ if (unordered_result == 1) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg), Rx_lo, Rx_hi, Ry_lo, Ry_hi);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl), Rx_lo, Rx_hi, Ry_lo, Ry_hi);
+ }
+ }
+
+#else
+
+ if (is_float) {
+ transition(ftos, itos);
+ __ pop_f(S1_tmp);
+ __ fcmps(S1_tmp, S0_tos);
+ } else {
+ transition(dtos, itos);
+ __ pop_d(D1_tmp);
+ __ fcmpd(D1_tmp, D0_tos);
+ }
+
+ __ fmstat();
+
+ // comparison result | flag N | flag Z | flag C | flag V
+ // "<" | 1 | 0 | 0 | 0
+ // "==" | 0 | 1 | 1 | 0
+ // ">" | 0 | 0 | 1 | 0
+ // unordered | 0 | 0 | 1 | 1
+
+ if (unordered_result < 0) {
+ __ mov(R0_tos, 1); // result == 1 if greater
+ __ mvn(R0_tos, 0, lt); // result == -1 if less or unordered (N!=V)
+ } else {
+ __ mov(R0_tos, 1); // result == 1 if greater or unordered
+ __ mvn(R0_tos, 0, mi); // result == -1 if less (N=1)
+ }
+ __ mov(R0_tos, 0, eq); // result == 0 if equ (Z=1)
+#endif // __SOFTFP__
+#endif // AARCH64
+}
+
+
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+
+ const Register Rdisp = R0_tmp;
+ const Register Rbumped_taken_count = R5_tmp;
+
+ __ profile_taken_branch(R0_tmp, Rbumped_taken_count); // R0 holds updated MDP, Rbumped_taken_count holds bumped taken count
+
+ const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
+ InvocationCounter::counter_offset();
+ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
+ InvocationCounter::counter_offset();
+ const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+
+ // Load up R0 with the branch displacement
+ if (is_wide) {
+ __ ldrsb(R0_tmp, at_bcp(1));
+ __ ldrb(R1_tmp, at_bcp(2));
+ __ ldrb(R2_tmp, at_bcp(3));
+ __ ldrb(R3_tmp, at_bcp(4));
+ __ orr(R0_tmp, R1_tmp, AsmOperand(R0_tmp, lsl, BitsPerByte));
+ __ orr(R0_tmp, R2_tmp, AsmOperand(R0_tmp, lsl, BitsPerByte));
+ __ orr(Rdisp, R3_tmp, AsmOperand(R0_tmp, lsl, BitsPerByte));
+ } else {
+ __ ldrsb(R0_tmp, at_bcp(1));
+ __ ldrb(R1_tmp, at_bcp(2));
+ __ orr(Rdisp, R1_tmp, AsmOperand(R0_tmp, lsl, BitsPerByte));
+ }
+
+ // Handle all the JSR stuff here, then exit.
+ // It's much shorter and cleaner than intermingling with the
+ // non-JSR normal-branch stuff occuring below.
+ if (is_jsr) {
+ // compute return address as bci in R1
+ const Register Rret_addr = R1_tmp;
+ assert_different_registers(Rdisp, Rret_addr, Rtemp);
+
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ __ sub(Rret_addr, Rbcp, - (is_wide ? 5 : 3) + in_bytes(ConstMethod::codes_offset()));
+ __ sub(Rret_addr, Rret_addr, Rtemp);
+
+ // Load the next target bytecode into R3_bytecode and advance Rbcp
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, Rdisp);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed));
+#endif // AARCH64
+
+ // Push return address
+ __ push_i(Rret_addr);
+ // jsr returns vtos
+ __ dispatch_only_noverify(vtos);
+ return;
+ }
+
+ // Normal (non-jsr) branch handling
+
+ // Adjust the bcp by the displacement in Rdisp and load next bytecode.
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, Rdisp);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed));
+#endif // AARCH64
+
+ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
+ Label backedge_counter_overflow;
+ Label profile_method;
+ Label dispatch;
+
+ if (UseLoopCounter) {
+ // increment backedge counter for backward branches
+ // Rdisp (R0): target offset
+
+ const Register Rcnt = R2_tmp;
+ const Register Rcounters = R1_tmp;
+
+ // count only if backward branch
+#ifdef AARCH64
+ __ tbz(Rdisp, (BitsPerWord - 1), dispatch); // TODO-AARCH64: check performance of this variant on 32-bit ARM
+#else
+ __ tst(Rdisp, Rdisp);
+ __ b(dispatch, pl);
+#endif // AARCH64
+
+ if (TieredCompilation) {
+ Label no_mdo;
+ int increment = InvocationCounter::count_increment;
+ if (ProfileInterpreter) {
+ // Are we profiling?
+ __ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
+ __ cbz(Rtemp, no_mdo);
+ // Increment the MDO backedge counter
+ const Address mdo_backedge_counter(Rtemp, in_bytes(MethodData::backedge_counter_offset()) +
+ in_bytes(InvocationCounter::counter_offset()));
+ const Address mask(Rtemp, in_bytes(MethodData::backedge_mask_offset()));
+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+ Rcnt, R4_tmp, eq, &backedge_counter_overflow);
+ __ b(dispatch);
+ }
+ __ bind(no_mdo);
+ // Increment backedge counter in MethodCounters*
+ __ get_method_counters(Rmethod, Rcounters, dispatch);
+ const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset()));
+ __ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask,
+ Rcnt, R4_tmp, eq, &backedge_counter_overflow);
+ } else {
+ // increment counter
+ __ get_method_counters(Rmethod, Rcounters, dispatch);
+ __ ldr_u32(Rtemp, Address(Rcounters, be_offset)); // load backedge counter
+ __ add(Rtemp, Rtemp, InvocationCounter::count_increment); // increment counter
+ __ str_32(Rtemp, Address(Rcounters, be_offset)); // store counter
+
+ __ ldr_u32(Rcnt, Address(Rcounters, inv_offset)); // load invocation counter
+#ifdef AARCH64
+ __ andr(Rcnt, Rcnt, (unsigned int)InvocationCounter::count_mask_value); // and the status bits
+#else
+ __ bic(Rcnt, Rcnt, ~InvocationCounter::count_mask_value); // and the status bits
+#endif // AARCH64
+ __ add(Rcnt, Rcnt, Rtemp); // add both counters
+
+ if (ProfileInterpreter) {
+ // Test to see if we should create a method data oop
+ const Address profile_limit(Rcounters, in_bytes(MethodCounters::interpreter_profile_limit_offset()));
+ __ ldr_s32(Rtemp, profile_limit);
+ __ cmp_32(Rcnt, Rtemp);
+ __ b(dispatch, lt);
+
+ // if no method data exists, go to profile method
+ __ test_method_data_pointer(R4_tmp, profile_method);
+
+ if (UseOnStackReplacement) {
+ // check for overflow against Rbumped_taken_count, which is the MDO taken count
+ const Address backward_branch_limit(Rcounters, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()));
+ __ ldr_s32(Rtemp, backward_branch_limit);
+ __ cmp(Rbumped_taken_count, Rtemp);
+ __ b(dispatch, lo);
+
+ // When ProfileInterpreter is on, the backedge_count comes from the
+ // MethodData*, which value does not get reset on the call to
+ // frequency_counter_overflow(). To avoid excessive calls to the overflow
+ // routine while the method is being compiled, add a second test to make
+ // sure the overflow function is called only once every overflow_frequency.
+ const int overflow_frequency = 1024;
+
+#ifdef AARCH64
+ __ tst(Rbumped_taken_count, (unsigned)(overflow_frequency-1));
+#else
+ // was '__ andrs(...,overflow_frequency-1)', testing if lowest 10 bits are 0
+ assert(overflow_frequency == (1 << 10),"shift by 22 not correct for expected frequency");
+ __ movs(Rbumped_taken_count, AsmOperand(Rbumped_taken_count, lsl, 22));
+#endif // AARCH64
+
+ __ b(backedge_counter_overflow, eq);
+ }
+ } else {
+ if (UseOnStackReplacement) {
+ // check for overflow against Rcnt, which is the sum of the counters
+ const Address backward_branch_limit(Rcounters, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()));
+ __ ldr_s32(Rtemp, backward_branch_limit);
+ __ cmp_32(Rcnt, Rtemp);
+ __ b(backedge_counter_overflow, hs);
+
+ }
+ }
+ }
+ __ bind(dispatch);
+ }
+
+ if (!UseOnStackReplacement) {
+ __ bind(backedge_counter_overflow);
+ }
+
+ // continue with the bytecode @ target
+ __ dispatch_only(vtos);
+
+ if (UseLoopCounter) {
+ if (ProfileInterpreter) {
+ // Out-of-line code to allocate method data oop.
+ __ bind(profile_method);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+ __ set_method_data_pointer_for_bcp();
+ // reload next bytecode
+ __ ldrb(R3_bytecode, Address(Rbcp));
+ __ b(dispatch);
+ }
+
+ if (UseOnStackReplacement) {
+ // invocation counter overflow
+ __ bind(backedge_counter_overflow);
+
+ __ sub(R1, Rbcp, Rdisp); // branch bcp
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R1);
+
+ // R0: osr nmethod (osr ok) or NULL (osr not possible)
+ const Register Rnmethod = R0;
+
+ __ ldrb(R3_bytecode, Address(Rbcp)); // reload next bytecode
+
+ __ cbz(Rnmethod, dispatch); // test result, no osr if null
+
+ // nmethod may have been invalidated (VM may block upon call_VM return)
+ __ ldrb(R1_tmp, Address(Rnmethod, nmethod::state_offset()));
+ __ cmp(R1_tmp, nmethod::in_use);
+ __ b(dispatch, ne);
+
+ // We have the address of an on stack replacement routine in Rnmethod,
+ // We need to prepare to execute the OSR method. First we must
+ // migrate the locals and monitors off of the stack.
+
+ __ mov(Rtmp_save0, Rnmethod); // save the nmethod
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+ // R0 is OSR buffer
+
+ __ ldr(R1_tmp, Address(Rtmp_save0, nmethod::osr_entry_point_offset()));
+ __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+
+#ifdef AARCH64
+ __ ldp(FP, LR, Address(FP));
+ __ mov(SP, Rtemp);
+#else
+ __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
+ __ bic(SP, Rtemp, StackAlignmentInBytes - 1); // Remove frame and align stack
+#endif // AARCH64
+
+ __ jump(R1_tmp);
+ }
+ }
+}
+
+
+void TemplateTable::if_0cmp(Condition cc) {
+ transition(itos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+#ifdef AARCH64
+ if (cc == equal) {
+ __ cbnz_w(R0_tos, not_taken);
+ } else if (cc == not_equal) {
+ __ cbz_w(R0_tos, not_taken);
+ } else {
+ __ cmp_32(R0_tos, 0);
+ __ b(not_taken, convNegCond(cc));
+ }
+#else
+ __ cmp_32(R0_tos, 0);
+ __ b(not_taken, convNegCond(cc));
+#endif // AARCH64
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(R0_tmp);
+}
+
+
+void TemplateTable::if_icmp(Condition cc) {
+ transition(itos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ __ pop_i(R1_tmp);
+ __ cmp_32(R1_tmp, R0_tos);
+ __ b(not_taken, convNegCond(cc));
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(R0_tmp);
+}
+
+
+void TemplateTable::if_nullcmp(Condition cc) {
+ transition(atos, vtos);
+ assert(cc == equal || cc == not_equal, "invalid condition");
+
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ if (cc == equal) {
+ __ cbnz(R0_tos, not_taken);
+ } else {
+ __ cbz(R0_tos, not_taken);
+ }
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(R0_tmp);
+}
+
+
+void TemplateTable::if_acmp(Condition cc) {
+ transition(atos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ __ pop_ptr(R1_tmp);
+ __ cmp(R1_tmp, R0_tos);
+ __ b(not_taken, convNegCond(cc));
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(R0_tmp);
+}
+
+
+void TemplateTable::ret() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R1_tmp;
+ const Register Rret_bci = Rtmp_save0; // R4/R19
+
+ locals_index(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(Rret_bci, local); // get return bci, compute return bcp
+ __ profile_ret(Rtmp_save1, Rret_bci);
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ __ add(Rtemp, Rtemp, in_bytes(ConstMethod::codes_offset()));
+ __ add(Rbcp, Rtemp, Rret_bci);
+ __ dispatch_next(vtos);
+}
+
+
+void TemplateTable::wide_ret() {
+ transition(vtos, vtos);
+ const Register Rlocal_index = R1_tmp;
+ const Register Rret_bci = Rtmp_save0; // R4/R19
+
+ locals_index_wide(Rlocal_index);
+ Address local = load_iaddress(Rlocal_index, Rtemp);
+ __ ldr_s32(Rret_bci, local); // get return bci, compute return bcp
+ __ profile_ret(Rtmp_save1, Rret_bci);
+ __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
+ __ add(Rtemp, Rtemp, in_bytes(ConstMethod::codes_offset()));
+ __ add(Rbcp, Rtemp, Rret_bci);
+ __ dispatch_next(vtos);
+}
+
+
+void TemplateTable::tableswitch() {
+ transition(itos, vtos);
+
+ const Register Rindex = R0_tos;
+#ifndef AARCH64
+ const Register Rtemp2 = R1_tmp;
+#endif // !AARCH64
+ const Register Rabcp = R2_tmp; // aligned bcp
+ const Register Rlow = R3_tmp;
+ const Register Rhigh = R4_tmp;
+ const Register Roffset = R5_tmp;
+
+ // align bcp
+ __ add(Rtemp, Rbcp, 1 + (2*BytesPerInt-1));
+ __ align_reg(Rabcp, Rtemp, BytesPerInt);
+
+ // load lo & hi
+#ifdef AARCH64
+ __ ldp_w(Rlow, Rhigh, Address(Rabcp, 2*BytesPerInt, post_indexed));
+#else
+ __ ldmia(Rabcp, RegisterSet(Rlow) | RegisterSet(Rhigh), writeback);
+#endif // AARCH64
+ __ byteswap_u32(Rlow, Rtemp, Rtemp2);
+ __ byteswap_u32(Rhigh, Rtemp, Rtemp2);
+
+ // compare index with high bound
+ __ cmp_32(Rhigh, Rindex);
+
+#ifdef AARCH64
+ Label default_case, do_dispatch;
+ __ ccmp_w(Rindex, Rlow, Assembler::flags_for_condition(lt), ge);
+ __ b(default_case, lt);
+
+ __ sub_w(Rindex, Rindex, Rlow);
+ __ ldr_s32(Roffset, Address(Rabcp, Rindex, ex_sxtw, LogBytesPerInt));
+ if(ProfileInterpreter) {
+ __ sxtw(Rindex, Rindex);
+ __ profile_switch_case(Rabcp, Rindex, Rtemp2, R0_tmp);
+ }
+ __ b(do_dispatch);
+
+ __ bind(default_case);
+ __ ldr_s32(Roffset, Address(Rabcp, -3 * BytesPerInt));
+ if(ProfileInterpreter) {
+ __ profile_switch_default(R0_tmp);
+ }
+
+ __ bind(do_dispatch);
+#else
+
+ // if Rindex <= Rhigh then calculate index in table (Rindex - Rlow)
+ __ subs(Rindex, Rindex, Rlow, ge);
+
+ // if Rindex <= Rhigh and (Rindex - Rlow) >= 0
+ // ("ge" status accumulated from cmp and subs instructions) then load
+ // offset from table, otherwise load offset for default case
+
+ if(ProfileInterpreter) {
+ Label default_case, continue_execution;
+
+ __ b(default_case, lt);
+ __ ldr(Roffset, Address(Rabcp, Rindex, lsl, LogBytesPerInt));
+ __ profile_switch_case(Rabcp, Rindex, Rtemp2, R0_tmp);
+ __ b(continue_execution);
+
+ __ bind(default_case);
+ __ profile_switch_default(R0_tmp);
+ __ ldr(Roffset, Address(Rabcp, -3 * BytesPerInt));
+
+ __ bind(continue_execution);
+ } else {
+ __ ldr(Roffset, Address(Rabcp, -3 * BytesPerInt), lt);
+ __ ldr(Roffset, Address(Rabcp, Rindex, lsl, LogBytesPerInt), ge);
+ }
+#endif // AARCH64
+
+ __ byteswap_u32(Roffset, Rtemp, Rtemp2);
+
+ // load the next bytecode to R3_bytecode and advance Rbcp
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, Roffset, ex_sxtw);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
+#endif // AARCH64
+ __ dispatch_only(vtos);
+
+}
+
+
+void TemplateTable::lookupswitch() {
+ transition(itos, itos);
+ __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+
+void TemplateTable::fast_linearswitch() {
+ transition(itos, vtos);
+ Label loop, found, default_case, continue_execution;
+
+ const Register Rkey = R0_tos;
+ const Register Rabcp = R2_tmp; // aligned bcp
+ const Register Rdefault = R3_tmp;
+ const Register Rcount = R4_tmp;
+ const Register Roffset = R5_tmp;
+
+ // bswap Rkey, so we can avoid bswapping the table entries
+ __ byteswap_u32(Rkey, R1_tmp, Rtemp);
+
+ // align bcp
+ __ add(Rtemp, Rbcp, 1 + (BytesPerInt-1));
+ __ align_reg(Rabcp, Rtemp, BytesPerInt);
+
+ // load default & counter
+#ifdef AARCH64
+ __ ldp_w(Rdefault, Rcount, Address(Rabcp, 2*BytesPerInt, post_indexed));
+#else
+ __ ldmia(Rabcp, RegisterSet(Rdefault) | RegisterSet(Rcount), writeback);
+#endif // AARCH64
+ __ byteswap_u32(Rcount, R1_tmp, Rtemp);
+
+#ifdef AARCH64
+ __ cbz_w(Rcount, default_case);
+#else
+ __ cmp_32(Rcount, 0);
+ __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne);
+ __ b(default_case, eq);
+#endif // AARCH64
+
+ // table search
+ __ bind(loop);
+#ifdef AARCH64
+ __ ldr_s32(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed));
+#endif // AARCH64
+ __ cmp_32(Rtemp, Rkey);
+ __ b(found, eq);
+ __ subs(Rcount, Rcount, 1);
+#ifndef AARCH64
+ __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne);
+#endif // !AARCH64
+ __ b(loop, ne);
+
+ // default case
+ __ bind(default_case);
+ __ profile_switch_default(R0_tmp);
+ __ mov(Roffset, Rdefault);
+ __ b(continue_execution);
+
+ // entry found -> get offset
+ __ bind(found);
+ // Rabcp is already incremented and points to the next entry
+ __ ldr_s32(Roffset, Address(Rabcp, -BytesPerInt));
+ if (ProfileInterpreter) {
+ // Calculate index of the selected case.
+ assert_different_registers(Roffset, Rcount, Rtemp, R0_tmp, R1_tmp, R2_tmp);
+
+ // align bcp
+ __ add(Rtemp, Rbcp, 1 + (BytesPerInt-1));
+ __ align_reg(R2_tmp, Rtemp, BytesPerInt);
+
+ // load number of cases
+ __ ldr_u32(R2_tmp, Address(R2_tmp, BytesPerInt));
+ __ byteswap_u32(R2_tmp, R1_tmp, Rtemp);
+
+ // Selected index = <number of cases> - <current loop count>
+ __ sub(R1_tmp, R2_tmp, Rcount);
+ __ profile_switch_case(R0_tmp, R1_tmp, Rtemp, R1_tmp);
+ }
+
+ // continue execution
+ __ bind(continue_execution);
+ __ byteswap_u32(Roffset, R1_tmp, Rtemp);
+
+ // load the next bytecode to R3_bytecode and advance Rbcp
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, Roffset, ex_sxtw);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
+#endif // AARCH64
+ __ dispatch_only(vtos);
+}
+
+
+void TemplateTable::fast_binaryswitch() {
+ transition(itos, vtos);
+ // Implementation using the following core algorithm:
+ //
+ // int binary_search(int key, LookupswitchPair* array, int n) {
+ // // Binary search according to "Methodik des Programmierens" by
+ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+ // int i = 0;
+ // int j = n;
+ // while (i+1 < j) {
+ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+ // // with Q: for all i: 0 <= i < n: key < a[i]
+ // // where a stands for the array and assuming that the (inexisting)
+ // // element a[n] is infinitely big.
+ // int h = (i + j) >> 1;
+ // // i < h < j
+ // if (key < array[h].fast_match()) {
+ // j = h;
+ // } else {
+ // i = h;
+ // }
+ // }
+ // // R: a[i] <= key < a[i+1] or Q
+ // // (i.e., if key is within array, i is the correct index)
+ // return i;
+ // }
+
+ // register allocation
+ const Register key = R0_tos; // already set (tosca)
+ const Register array = R1_tmp;
+ const Register i = R2_tmp;
+ const Register j = R3_tmp;
+ const Register h = R4_tmp;
+ const Register val = R5_tmp;
+ const Register temp1 = Rtemp;
+ const Register temp2 = LR_tmp;
+ const Register offset = R3_tmp;
+
+ // set 'array' = aligned bcp + 2 ints
+ __ add(temp1, Rbcp, 1 + (BytesPerInt-1) + 2*BytesPerInt);
+ __ align_reg(array, temp1, BytesPerInt);
+
+ // initialize i & j
+ __ mov(i, 0); // i = 0;
+ __ ldr_s32(j, Address(array, -BytesPerInt)); // j = length(array);
+ // Convert j into native byteordering
+ __ byteswap_u32(j, temp1, temp2);
+
+ // and start
+ Label entry;
+ __ b(entry);
+
+ // binary search loop
+ { Label loop;
+ __ bind(loop);
+ // int h = (i + j) >> 1;
+ __ add(h, i, j); // h = i + j;
+ __ logical_shift_right(h, h, 1); // h = (i + j) >> 1;
+ // if (key < array[h].fast_match()) {
+ // j = h;
+ // } else {
+ // i = h;
+ // }
+#ifdef AARCH64
+ __ add(temp1, array, AsmOperand(h, lsl, 1+LogBytesPerInt));
+ __ ldr_s32(val, Address(temp1));
+#else
+ __ ldr_s32(val, Address(array, h, lsl, 1+LogBytesPerInt));
+#endif // AARCH64
+ // Convert array[h].match to native byte-ordering before compare
+ __ byteswap_u32(val, temp1, temp2);
+ __ cmp_32(key, val);
+ __ mov(j, h, lt); // j = h if (key < array[h].fast_match())
+ __ mov(i, h, ge); // i = h if (key >= array[h].fast_match())
+ // while (i+1 < j)
+ __ bind(entry);
+ __ add(temp1, i, 1); // i+1
+ __ cmp(temp1, j); // i+1 < j
+ __ b(loop, lt);
+ }
+
+ // end of binary search, result index is i (must check again!)
+ Label default_case;
+ // Convert array[i].match to native byte-ordering before compare
+#ifdef AARCH64
+ __ add(temp1, array, AsmOperand(i, lsl, 1+LogBytesPerInt));
+ __ ldr_s32(val, Address(temp1));
+#else
+ __ ldr_s32(val, Address(array, i, lsl, 1+LogBytesPerInt));
+#endif // AARCH64
+ __ byteswap_u32(val, temp1, temp2);
+ __ cmp_32(key, val);
+ __ b(default_case, ne);
+
+ // entry found
+ __ add(temp1, array, AsmOperand(i, lsl, 1+LogBytesPerInt));
+ __ ldr_s32(offset, Address(temp1, 1*BytesPerInt));
+ __ profile_switch_case(R0, i, R1, i);
+ __ byteswap_u32(offset, temp1, temp2);
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, offset, ex_sxtw);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed));
+#endif // AARCH64
+ __ dispatch_only(vtos);
+
+ // default case
+ __ bind(default_case);
+ __ profile_switch_default(R0);
+ __ ldr_s32(offset, Address(array, -2*BytesPerInt));
+ __ byteswap_u32(offset, temp1, temp2);
+#ifdef AARCH64
+ __ add(Rbcp, Rbcp, offset, ex_sxtw);
+ __ ldrb(R3_bytecode, Address(Rbcp));
+#else
+ __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed));
+#endif // AARCH64
+ __ dispatch_only(vtos);
+}
+
+
+void TemplateTable::_return(TosState state) {
+ transition(state, state);
+ assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
+
+ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+ Label skip_register_finalizer;
+ assert(state == vtos, "only valid state");
+ __ ldr(R1, aaddress(0));
+ __ load_klass(Rtemp, R1);
+ __ ldr_u32(Rtemp, Address(Rtemp, Klass::access_flags_offset()));
+ __ tbz(Rtemp, exact_log2(JVM_ACC_HAS_FINALIZER), skip_register_finalizer);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), R1);
+
+ __ bind(skip_register_finalizer);
+ }
+
+ // Narrow result if state is itos but result type is smaller.
+ // Need to narrow in the return bytecode rather than in generate_return_entry
+ // since compiled code callers expect the result to already be narrowed.
+ if (state == itos) {
+ __ narrow(R0_tos);
+ }
+ __ remove_activation(state, LR);
+
+ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
+
+#ifndef AARCH64
+ // According to interpreter calling conventions, result is returned in R0/R1,
+ // so ftos (S0) and dtos (D0) are moved to R0/R1.
+ // This conversion should be done after remove_activation, as it uses
+ // push(state) & pop(state) to preserve return value.
+ __ convert_tos_to_retval(state);
+#endif // !AARCH64
+
+ __ ret();
+
+ __ nop(); // to avoid filling CPU pipeline with invalid instructions
+ __ nop();
+}
+
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's in
+// order. Store buffers on most chips allow reads & writes to reorder; the
+// JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of
+// memory barrier (i.e., it's not sufficient that the interpreter does not
+// reorder volatile references, the hardware also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other.
+// ALSO reads & writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that happen after
+// the read float up to before the read. It's OK for non-volatile memory refs
+// that happen before the volatile read to float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile memory refs
+// that happen BEFORE the write float down to after the write. It's OK for
+// non-volatile memory refs that happen after the volatile write to float up
+// before it.
+//
+// We only put in barriers around volatile refs (they are expensive), not
+// _between_ memory refs (that would require us to track the flavor of the
+// previous memory refs). Requirements (2) and (3) require some barriers
+// before volatile stores and after volatile loads. These nearly cover
+// requirement (1) but miss the volatile-store-volatile-load case. This final
+// case is placed after volatile-stores although it could just as well go
+// before volatile-loads.
+// TODO-AARCH64: consider removing extra unused parameters
+void TemplateTable::volatile_barrier(MacroAssembler::Membar_mask_bits order_constraint,
+ Register tmp,
+ bool preserve_flags,
+ Register load_tgt) {
+#ifdef AARCH64
+ __ membar(order_constraint);
+#else
+ __ membar(order_constraint, tmp, preserve_flags, load_tgt);
+#endif
+}
+
+// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR.
+void TemplateTable::resolve_cache_and_index(int byte_no,
+ Register Rcache,
+ Register Rindex,
+ size_t index_size) {
+ assert_different_registers(Rcache, Rindex, Rtemp);
+
+ Label resolved;
+ Bytecodes::Code code = bytecode();
+ switch (code) {
+ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+ }
+
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, Rindex, Rtemp, byte_no, 1, index_size);
+ __ cmp(Rtemp, code); // have we resolved this bytecode?
+ __ b(resolved, eq);
+
+ // resolve first time through
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+ __ mov(R1, code);
+ __ call_VM(noreg, entry, R1);
+ // Update registers with resolved info
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1, index_size);
+ __ bind(resolved);
+}
+
+
+// The Rcache and Rindex registers must be set before call
+void TemplateTable::load_field_cp_cache_entry(Register Rcache,
+ Register Rindex,
+ Register Roffset,
+ Register Rflags,
+ Register Robj,
+ bool is_static = false) {
+
+ assert_different_registers(Rcache, Rindex, Rtemp);
+ assert_different_registers(Roffset, Rflags, Robj, Rtemp);
+
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+ __ add(Rtemp, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+
+ // Field offset
+ __ ldr(Roffset, Address(Rtemp,
+ cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+
+ // Flags
+ __ ldr_u32(Rflags, Address(Rtemp,
+ cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+ if (is_static) {
+ __ ldr(Robj, Address(Rtemp,
+ cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+ __ ldr(Robj, Address(Robj, mirror_offset));
+ }
+}
+
+
+// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR.
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+ Register method,
+ Register itable_index,
+ Register flags,
+ bool is_invokevirtual,
+ bool is_invokevfinal/*unused*/,
+ bool is_invokedynamic) {
+ // setup registers
+ const Register cache = R2_tmp;
+ const Register index = R3_tmp;
+ const Register temp_reg = Rtemp;
+ assert_different_registers(cache, index, temp_reg);
+ assert_different_registers(method, itable_index, temp_reg);
+
+ // determine constant pool cache field offsets
+ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+ const int method_offset = in_bytes(
+ ConstantPoolCache::base_offset() +
+ ((byte_no == f2_byte)
+ ? ConstantPoolCacheEntry::f2_offset()
+ : ConstantPoolCacheEntry::f1_offset()
+ )
+ );
+ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::flags_offset());
+ // access constant pool cache fields
+ const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::f2_offset());
+
+ size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+ resolve_cache_and_index(byte_no, cache, index, index_size);
+ __ add(temp_reg, cache, AsmOperand(index, lsl, LogBytesPerWord));
+ __ ldr(method, Address(temp_reg, method_offset));
+
+ if (itable_index != noreg) {
+ __ ldr(itable_index, Address(temp_reg, index_offset));
+ }
+ __ ldr_u32(flags, Address(temp_reg, flags_offset));
+}
+
+
+// The registers cache and index expected to be set before call, and should not be Rtemp.
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// except cache and index registers which are preserved.
+void TemplateTable::jvmti_post_field_access(Register Rcache,
+ Register Rindex,
+ bool is_static,
+ bool has_tos) {
+ assert_different_registers(Rcache, Rindex, Rtemp);
+
+ if (__ can_post_field_access()) {
+ // Check to see if a field access watch has been set before we take
+ // the time to call into the VM.
+
+ Label Lcontinue;
+
+ __ ldr_global_s32(Rtemp, (address)JvmtiExport::get_field_access_count_addr());
+ __ cbz(Rtemp, Lcontinue);
+
+ // cache entry pointer
+ __ add(R2, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ add(R2, R2, in_bytes(ConstantPoolCache::base_offset()));
+ if (is_static) {
+ __ mov(R1, 0); // NULL object reference
+ } else {
+ __ pop(atos); // Get the object
+ __ mov(R1, R0_tos);
+ __ verify_oop(R1);
+ __ push(atos); // Restore stack state
+ }
+ // R1: object pointer or NULL
+ // R2: cache entry pointer
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+ R1, R2);
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1);
+
+ __ bind(Lcontinue);
+ }
+}
+
+
+void TemplateTable::pop_and_check_object(Register r) {
+ __ pop_ptr(r);
+ __ null_check(r, Rtemp); // for field access must check obj.
+ __ verify_oop(r);
+}
+
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+ transition(vtos, vtos);
+
+ const Register Roffset = R2_tmp;
+ const Register Robj = R3_tmp;
+ const Register Rcache = R4_tmp;
+ const Register Rflagsav = Rtmp_save0; // R4/R19
+ const Register Rindex = R5_tmp;
+ const Register Rflags = R5_tmp;
+
+ const bool gen_volatile_check = os::is_MP();
+
+ resolve_cache_and_index(byte_no, Rcache, Rindex, sizeof(u2));
+ jvmti_post_field_access(Rcache, Rindex, is_static, false);
+ load_field_cp_cache_entry(Rcache, Rindex, Roffset, Rflags, Robj, is_static);
+
+ if (gen_volatile_check) {
+ __ mov(Rflagsav, Rflags);
+ }
+
+ if (!is_static) pop_and_check_object(Robj);
+
+ Label Done, Lint, Ltable, shouldNotReachHere;
+ Label Lbtos, Lztos, Lctos, Lstos, Litos, Lltos, Lftos, Ldtos, Latos;
+
+ // compute type
+ __ logical_shift_right(Rflags, Rflags, ConstantPoolCacheEntry::tos_state_shift);
+ // Make sure we don't need to mask flags after the above shift
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+
+ // There are actually two versions of implementation of getfield/getstatic:
+ //
+ // 32-bit ARM:
+ // 1) Table switch using add(PC,...) instruction (fast_version)
+ // 2) Table switch using ldr(PC,...) instruction
+ //
+ // AArch64:
+ // 1) Table switch using adr/add/br instructions (fast_version)
+ // 2) Table switch using adr/ldr/br instructions
+ //
+ // First version requires fixed size of code block for each case and
+ // can not be used in RewriteBytecodes and VerifyOops
+ // modes.
+
+ // Size of fixed size code block for fast_version
+ const int log_max_block_size = 2;
+ const int max_block_size = 1 << log_max_block_size;
+
+ // Decide if fast version is enabled
+ bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops && !VerifyInterpreterStackTop;
+
+ // On 32-bit ARM atos and itos cases can be merged only for fast version, because
+ // atos requires additional processing in slow version.
+ // On AArch64 atos and itos cannot be merged.
+ bool atos_merged_with_itos = AARCH64_ONLY(false) NOT_AARCH64(fast_version);
+
+ assert(number_of_states == 10, "number of tos states should be equal to 9");
+
+ __ cmp(Rflags, itos);
+#ifdef AARCH64
+ __ b(Lint, eq);
+
+ if(fast_version) {
+ __ adr(Rtemp, Lbtos);
+ __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize));
+ __ br(Rtemp);
+ } else {
+ __ adr(Rtemp, Ltable);
+ __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags));
+ __ br(Rtemp);
+ }
+#else
+ if(atos_merged_with_itos) {
+ __ cmp(Rflags, atos, ne);
+ }
+
+ // table switch by type
+ if(fast_version) {
+ __ add(PC, PC, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize), ne);
+ } else {
+ __ ldr(PC, Address(PC, Rflags, lsl, LogBytesPerWord), ne);
+ }
+
+ // jump to itos/atos case
+ __ b(Lint);
+#endif // AARCH64
+
+ // table with addresses for slow version
+ if (fast_version) {
+ // nothing to do
+ } else {
+ AARCH64_ONLY(__ align(wordSize));
+ __ bind(Ltable);
+ __ emit_address(Lbtos);
+ __ emit_address(Lztos);
+ __ emit_address(Lctos);
+ __ emit_address(Lstos);
+ __ emit_address(Litos);
+ __ emit_address(Lltos);
+ __ emit_address(Lftos);
+ __ emit_address(Ldtos);
+ __ emit_address(Latos);
+ }
+
+#ifdef ASSERT
+ int seq = 0;
+#endif
+ // btos
+ {
+ assert(btos == seq++, "btos has unexpected value");
+ FixedSizeCodeBlock btos_block(_masm, max_block_size, fast_version);
+ __ bind(Lbtos);
+ __ ldrsb(R0_tos, Address(Robj, Roffset));
+ __ push(btos);
+ // Rewrite bytecode to be faster
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // ztos (same as btos for getfield)
+ {
+ assert(ztos == seq++, "btos has unexpected value");
+ FixedSizeCodeBlock ztos_block(_masm, max_block_size, fast_version);
+ __ bind(Lztos);
+ __ ldrsb(R0_tos, Address(Robj, Roffset));
+ __ push(ztos);
+ // Rewrite bytecode to be faster (use btos fast getfield)
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // ctos
+ {
+ assert(ctos == seq++, "ctos has unexpected value");
+ FixedSizeCodeBlock ctos_block(_masm, max_block_size, fast_version);
+ __ bind(Lctos);
+ __ ldrh(R0_tos, Address(Robj, Roffset));
+ __ push(ctos);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // stos
+ {
+ assert(stos == seq++, "stos has unexpected value");
+ FixedSizeCodeBlock stos_block(_masm, max_block_size, fast_version);
+ __ bind(Lstos);
+ __ ldrsh(R0_tos, Address(Robj, Roffset));
+ __ push(stos);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // itos
+ {
+ assert(itos == seq++, "itos has unexpected value");
+ FixedSizeCodeBlock itos_block(_masm, max_block_size, fast_version);
+ __ bind(Litos);
+ __ b(shouldNotReachHere);
+ }
+
+ // ltos
+ {
+ assert(ltos == seq++, "ltos has unexpected value");
+ FixedSizeCodeBlock ltos_block(_masm, max_block_size, fast_version);
+ __ bind(Lltos);
+#ifdef AARCH64
+ __ ldr(R0_tos, Address(Robj, Roffset));
+#else
+ __ add(Roffset, Robj, Roffset);
+ __ ldmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+ __ push(ltos);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // ftos
+ {
+ assert(ftos == seq++, "ftos has unexpected value");
+ FixedSizeCodeBlock ftos_block(_masm, max_block_size, fast_version);
+ __ bind(Lftos);
+ // floats and ints are placed on stack in same way, so
+ // we can use push(itos) to transfer value without using VFP
+ __ ldr_u32(R0_tos, Address(Robj, Roffset));
+ __ push(itos);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // dtos
+ {
+ assert(dtos == seq++, "dtos has unexpected value");
+ FixedSizeCodeBlock dtos_block(_masm, max_block_size, fast_version);
+ __ bind(Ldtos);
+ // doubles and longs are placed on stack in the same way, so
+ // we can use push(ltos) to transfer value without using VFP
+#ifdef AARCH64
+ __ ldr(R0_tos, Address(Robj, Roffset));
+#else
+ __ add(Rtemp, Robj, Roffset);
+ __ ldmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+ __ push(ltos);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dgetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+
+ // atos
+ {
+ assert(atos == seq++, "atos has unexpected value");
+
+ // atos case for AArch64 and slow version on 32-bit ARM
+ if(!atos_merged_with_itos) {
+ __ bind(Latos);
+ __ load_heap_oop(R0_tos, Address(Robj, Roffset));
+ __ push(atos);
+ // Rewrite bytecode to be faster
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_agetfield, R0_tmp, Rtemp);
+ }
+ __ b(Done);
+ }
+ }
+
+ assert(vtos == seq++, "vtos has unexpected value");
+
+ __ bind(shouldNotReachHere);
+ __ should_not_reach_here();
+
+ // itos and atos cases are frequent so it makes sense to move them out of table switch
+ // atos case can be merged with itos case (and thus moved out of table switch) on 32-bit ARM, fast version only
+
+ __ bind(Lint);
+ __ ldr_s32(R0_tos, Address(Robj, Roffset));
+ __ push(itos);
+ // Rewrite bytecode to be faster
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_igetfield, R0_tmp, Rtemp);
+ }
+
+ __ bind(Done);
+
+ if (gen_volatile_check) {
+ // Check for volatile field
+ Label notVolatile;
+ __ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
+
+ __ bind(notVolatile);
+ }
+
+}
+
+void TemplateTable::getfield(int byte_no) {
+ getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_getfield(int byte_no) {
+ getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+ getfield_or_static(byte_no, true);
+}
+
+
+// The registers cache and index expected to be set before call, and should not be R1 or Rtemp.
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// except cache and index registers which are preserved.
+void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rindex, bool is_static) {
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+ assert_different_registers(Rcache, Rindex, R1, Rtemp);
+
+ if (__ can_post_field_modification()) {
+ // Check to see if a field modification watch has been set before we take
+ // the time to call into the VM.
+ Label Lcontinue;
+
+ __ ldr_global_s32(Rtemp, (address)JvmtiExport::get_field_modification_count_addr());
+ __ cbz(Rtemp, Lcontinue);
+
+ if (is_static) {
+ // Life is simple. Null out the object pointer.
+ __ mov(R1, 0);
+ } else {
+ // Life is harder. The stack holds the value on top, followed by the object.
+ // We don't know the size of the value, though; it could be one or two words
+ // depending on its type. As a result, we must find the type to determine where
+ // the object is.
+
+ __ add(Rtemp, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr_u32(Rtemp, Address(Rtemp, cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+ __ logical_shift_right(Rtemp, Rtemp, ConstantPoolCacheEntry::tos_state_shift);
+ // Make sure we don't need to mask Rtemp after the above shift
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+
+ __ cmp(Rtemp, ltos);
+ __ cond_cmp(Rtemp, dtos, ne);
+#ifdef AARCH64
+ __ mov(Rtemp, Interpreter::expr_offset_in_bytes(2));
+ __ mov(R1, Interpreter::expr_offset_in_bytes(1));
+ __ mov(R1, Rtemp, eq);
+ __ ldr(R1, Address(Rstack_top, R1));
+#else
+ // two word value (ltos/dtos)
+ __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(2)), eq);
+
+ // one word value (not ltos, dtos)
+ __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(1)), ne);
+#endif // AARCH64
+ }
+
+ // cache entry pointer
+ __ add(R2, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ add(R2, R2, in_bytes(cp_base_offset));
+
+ // object (tos)
+ __ mov(R3, Rstack_top);
+
+ // R1: object pointer set up above (NULL if static)
+ // R2: cache entry pointer
+ // R3: value object on the stack
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
+ R1, R2, R3);
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1);
+
+ __ bind(Lcontinue);
+ }
+}
+
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+ transition(vtos, vtos);
+
+ const Register Roffset = R2_tmp;
+ const Register Robj = R3_tmp;
+ const Register Rcache = R4_tmp;
+ const Register Rflagsav = Rtmp_save0; // R4/R19
+ const Register Rindex = R5_tmp;
+ const Register Rflags = R5_tmp;
+
+ const bool gen_volatile_check = os::is_MP();
+
+ resolve_cache_and_index(byte_no, Rcache, Rindex, sizeof(u2));
+ jvmti_post_field_mod(Rcache, Rindex, is_static);
+ load_field_cp_cache_entry(Rcache, Rindex, Roffset, Rflags, Robj, is_static);
+
+ if (gen_volatile_check) {
+ // Check for volatile field
+ Label notVolatile;
+ __ mov(Rflagsav, Rflags);
+ __ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
+
+ __ bind(notVolatile);
+ }
+
+ Label Done, Lint, shouldNotReachHere;
+ Label Ltable, Lbtos, Lztos, Lctos, Lstos, Litos, Lltos, Lftos, Ldtos, Latos;
+
+ // compute type
+ __ logical_shift_right(Rflags, Rflags, ConstantPoolCacheEntry::tos_state_shift);
+ // Make sure we don't need to mask flags after the above shift
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+
+ // There are actually two versions of implementation of putfield/putstatic:
+ //
+ // 32-bit ARM:
+ // 1) Table switch using add(PC,...) instruction (fast_version)
+ // 2) Table switch using ldr(PC,...) instruction
+ //
+ // AArch64:
+ // 1) Table switch using adr/add/br instructions (fast_version)
+ // 2) Table switch using adr/ldr/br instructions
+ //
+ // First version requires fixed size of code block for each case and
+ // can not be used in RewriteBytecodes and VerifyOops
+ // modes.
+
+ // Size of fixed size code block for fast_version (in instructions)
+ const int log_max_block_size = AARCH64_ONLY(is_static ? 2 : 3) NOT_AARCH64(3);
+ const int max_block_size = 1 << log_max_block_size;
+
+ // Decide if fast version is enabled
+ bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops && !ZapHighNonSignificantBits;
+
+ assert(number_of_states == 10, "number of tos states should be equal to 9");
+
+ // itos case is frequent and is moved outside table switch
+ __ cmp(Rflags, itos);
+
+#ifdef AARCH64
+ __ b(Lint, eq);
+
+ if (fast_version) {
+ __ adr(Rtemp, Lbtos);
+ __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize));
+ __ br(Rtemp);
+ } else {
+ __ adr(Rtemp, Ltable);
+ __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags));
+ __ br(Rtemp);
+ }
+#else
+ // table switch by type
+ if (fast_version) {
+ __ add(PC, PC, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize), ne);
+ } else {
+ __ ldr(PC, Address(PC, Rflags, lsl, LogBytesPerWord), ne);
+ }
+
+ // jump to itos case
+ __ b(Lint);
+#endif // AARCH64
+
+ // table with addresses for slow version
+ if (fast_version) {
+ // nothing to do
+ } else {
+ AARCH64_ONLY(__ align(wordSize));
+ __ bind(Ltable);
+ __ emit_address(Lbtos);
+ __ emit_address(Lztos);
+ __ emit_address(Lctos);
+ __ emit_address(Lstos);
+ __ emit_address(Litos);
+ __ emit_address(Lltos);
+ __ emit_address(Lftos);
+ __ emit_address(Ldtos);
+ __ emit_address(Latos);
+ }
+
+#ifdef ASSERT
+ int seq = 0;
+#endif
+ // btos
+ {
+ assert(btos == seq++, "btos has unexpected value");
+ FixedSizeCodeBlock btos_block(_masm, max_block_size, fast_version);
+ __ bind(Lbtos);
+ __ pop(btos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ strb(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // ztos
+ {
+ assert(ztos == seq++, "ztos has unexpected value");
+ FixedSizeCodeBlock ztos_block(_masm, max_block_size, fast_version);
+ __ bind(Lztos);
+ __ pop(ztos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ and_32(R0_tos, R0_tos, 1);
+ __ strb(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_zputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // ctos
+ {
+ assert(ctos == seq++, "ctos has unexpected value");
+ FixedSizeCodeBlock ctos_block(_masm, max_block_size, fast_version);
+ __ bind(Lctos);
+ __ pop(ctos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ strh(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // stos
+ {
+ assert(stos == seq++, "stos has unexpected value");
+ FixedSizeCodeBlock stos_block(_masm, max_block_size, fast_version);
+ __ bind(Lstos);
+ __ pop(stos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ strh(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // itos
+ {
+ assert(itos == seq++, "itos has unexpected value");
+ FixedSizeCodeBlock itos_block(_masm, max_block_size, fast_version);
+ __ bind(Litos);
+ __ b(shouldNotReachHere);
+ }
+
+ // ltos
+ {
+ assert(ltos == seq++, "ltos has unexpected value");
+ FixedSizeCodeBlock ltos_block(_masm, max_block_size, fast_version);
+ __ bind(Lltos);
+ __ pop(ltos);
+ if (!is_static) pop_and_check_object(Robj);
+#ifdef AARCH64
+ __ str(R0_tos, Address(Robj, Roffset));
+#else
+ __ add(Roffset, Robj, Roffset);
+ __ stmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // ftos
+ {
+ assert(ftos == seq++, "ftos has unexpected value");
+ FixedSizeCodeBlock ftos_block(_masm, max_block_size, fast_version);
+ __ bind(Lftos);
+ // floats and ints are placed on stack in the same way, so
+ // we can use pop(itos) to transfer value without using VFP
+ __ pop(itos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ str_32(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // dtos
+ {
+ assert(dtos == seq++, "dtos has unexpected value");
+ FixedSizeCodeBlock dtos_block(_masm, max_block_size, fast_version);
+ __ bind(Ldtos);
+ // doubles and longs are placed on stack in the same way, so
+ // we can use pop(ltos) to transfer value without using VFP
+ __ pop(ltos);
+ if (!is_static) pop_and_check_object(Robj);
+#ifdef AARCH64
+ __ str(R0_tos, Address(Robj, Roffset));
+#else
+ __ add(Rtemp, Robj, Roffset);
+ __ stmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
+#endif // AARCH64
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ // atos
+ {
+ assert(atos == seq++, "dtos has unexpected value");
+ __ bind(Latos);
+ __ pop(atos);
+ if (!is_static) pop_and_check_object(Robj);
+ // Store into the field
+ do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R5_tmp, _bs->kind(), false, false);
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_aputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+ __ b(Done);
+ }
+
+ __ bind(shouldNotReachHere);
+ __ should_not_reach_here();
+
+ // itos case is frequent and is moved outside table switch
+ __ bind(Lint);
+ __ pop(itos);
+ if (!is_static) pop_and_check_object(Robj);
+ __ str_32(R0_tos, Address(Robj, Roffset));
+ if (!is_static && rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_iputfield, R0_tmp, Rtemp, true, byte_no);
+ }
+
+ __ bind(Done);
+
+ if (gen_volatile_check) {
+ Label notVolatile;
+ if (is_static) {
+ // Just check for volatile. Memory barrier for static final field
+ // is handled by class initialization.
+ __ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+ volatile_barrier(MacroAssembler::StoreLoad, Rtemp);
+ __ bind(notVolatile);
+ } else {
+ // Check for volatile field and final field
+ Label skipMembar;
+
+ __ tst(Rflagsav, 1 << ConstantPoolCacheEntry::is_volatile_shift |
+ 1 << ConstantPoolCacheEntry::is_final_shift);
+ __ b(skipMembar, eq);
+
+ __ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ // StoreLoad barrier after volatile field write
+ volatile_barrier(MacroAssembler::StoreLoad, Rtemp);
+ __ b(skipMembar);
+
+ // StoreStore barrier after final field write
+ __ bind(notVolatile);
+ volatile_barrier(MacroAssembler::StoreStore, Rtemp);
+
+ __ bind(skipMembar);
+ }
+ }
+
+}
+
+void TemplateTable::putfield(int byte_no) {
+ putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_putfield(int byte_no) {
+ putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+ putfield_or_static(byte_no, true);
+}
+
+
+void TemplateTable::jvmti_post_fast_field_mod() {
+ // This version of jvmti_post_fast_field_mod() is not used on ARM
+ Unimplemented();
+}
+
+// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// but preserves tosca with the given state.
+void TemplateTable::jvmti_post_fast_field_mod(TosState state) {
+ if (__ can_post_field_modification()) {
+ // Check to see if a field modification watch has been set before we take
+ // the time to call into the VM.
+ Label done;
+
+ __ ldr_global_s32(R2, (address)JvmtiExport::get_field_modification_count_addr());
+ __ cbz(R2, done);
+
+ __ pop_ptr(R3); // copy the object pointer from tos
+ __ verify_oop(R3);
+ __ push_ptr(R3); // put the object pointer back on tos
+
+ __ push(state); // save value on the stack
+
+ // access constant pool cache entry
+ __ get_cache_entry_pointer_at_bcp(R2, R1, 1);
+
+ __ mov(R1, R3);
+ assert(Interpreter::expr_offset_in_bytes(0) == 0, "adjust this code");
+ __ mov(R3, Rstack_top); // put tos addr into R3
+
+ // R1: object pointer copied above
+ // R2: cache entry pointer
+ // R3: jvalue object on the stack
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), R1, R2, R3);
+
+ __ pop(state); // restore value
+
+ __ bind(done);
+ }
+}
+
+
+void TemplateTable::fast_storefield(TosState state) {
+ transition(state, vtos);
+
+ ByteSize base = ConstantPoolCache::base_offset();
+
+ jvmti_post_fast_field_mod(state);
+
+ const Register Rcache = R2_tmp;
+ const Register Rindex = R3_tmp;
+ const Register Roffset = R3_tmp;
+ const Register Rflags = Rtmp_save0; // R4/R19
+ const Register Robj = R5_tmp;
+
+ const bool gen_volatile_check = os::is_MP();
+
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1);
+
+ __ add(Rcache, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+
+ if (gen_volatile_check) {
+ // load flags to test volatile
+ __ ldr_u32(Rflags, Address(Rcache, base + ConstantPoolCacheEntry::flags_offset()));
+ }
+
+ // replace index with field offset from cache entry
+ __ ldr(Roffset, Address(Rcache, base + ConstantPoolCacheEntry::f2_offset()));
+
+ if (gen_volatile_check) {
+ // Check for volatile store
+ Label notVolatile;
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ // TODO-AARCH64 on AArch64, store-release instructions can be used to get rid of this explict barrier
+ volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
+
+ __ bind(notVolatile);
+ }
+
+ // Get object from stack
+ pop_and_check_object(Robj);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_zputfield: __ and_32(R0_tos, R0_tos, 1);
+ // fall through
+ case Bytecodes::_fast_bputfield: __ strb(R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_sputfield: // fall through
+ case Bytecodes::_fast_cputfield: __ strh(R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_iputfield: __ str_32(R0_tos, Address(Robj, Roffset)); break;
+#ifdef AARCH64
+ case Bytecodes::_fast_lputfield: __ str (R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_fputfield: __ str_s(S0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_dputfield: __ str_d(D0_tos, Address(Robj, Roffset)); break;
+#else
+ case Bytecodes::_fast_lputfield: __ add(Robj, Robj, Roffset);
+ __ stmia(Robj, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
+
+#ifdef __SOFTFP__
+ case Bytecodes::_fast_fputfield: __ str(R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_dputfield: __ add(Robj, Robj, Roffset);
+ __ stmia(Robj, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
+#else
+ case Bytecodes::_fast_fputfield: __ add(Robj, Robj, Roffset);
+ __ fsts(S0_tos, Address(Robj)); break;
+ case Bytecodes::_fast_dputfield: __ add(Robj, Robj, Roffset);
+ __ fstd(D0_tos, Address(Robj)); break;
+#endif // __SOFTFP__
+#endif // AARCH64
+
+ case Bytecodes::_fast_aputfield:
+ do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R2_tmp, _bs->kind(), false, false);
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ if (gen_volatile_check) {
+ Label notVolatile;
+ Label skipMembar;
+ __ tst(Rflags, 1 << ConstantPoolCacheEntry::is_volatile_shift |
+ 1 << ConstantPoolCacheEntry::is_final_shift);
+ __ b(skipMembar, eq);
+
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ // StoreLoad barrier after volatile field write
+ volatile_barrier(MacroAssembler::StoreLoad, Rtemp);
+ __ b(skipMembar);
+
+ // StoreStore barrier after final field write
+ __ bind(notVolatile);
+ volatile_barrier(MacroAssembler::StoreStore, Rtemp);
+
+ __ bind(skipMembar);
+ }
+}
+
+
+void TemplateTable::fast_accessfield(TosState state) {
+ transition(atos, state);
+
+ // do the JVMTI work here to avoid disturbing the register state below
+ if (__ can_post_field_access()) {
+ // Check to see if a field access watch has been set before we take
+ // the time to call into the VM.
+ Label done;
+ __ ldr_global_s32(R2, (address) JvmtiExport::get_field_access_count_addr());
+ __ cbz(R2, done);
+ // access constant pool cache entry
+ __ get_cache_entry_pointer_at_bcp(R2, R1, 1);
+ __ push_ptr(R0_tos); // save object pointer before call_VM() clobbers it
+ __ verify_oop(R0_tos);
+ __ mov(R1, R0_tos);
+ // R1: object pointer copied above
+ // R2: cache entry pointer
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R1, R2);
+ __ pop_ptr(R0_tos); // restore object pointer
+
+ __ bind(done);
+ }
+
+ const Register Robj = R0_tos;
+ const Register Rcache = R2_tmp;
+ const Register Rflags = R2_tmp;
+ const Register Rindex = R3_tmp;
+ const Register Roffset = R3_tmp;
+
+ const bool gen_volatile_check = os::is_MP();
+
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 1);
+ // replace index with field offset from cache entry
+ __ add(Rtemp, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr(Roffset, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+ if (gen_volatile_check) {
+ // load flags to test volatile
+ __ ldr_u32(Rflags, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+ }
+
+ __ verify_oop(Robj);
+ __ null_check(Robj, Rtemp);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_bgetfield: __ ldrsb(R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_sgetfield: __ ldrsh(R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_cgetfield: __ ldrh (R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_igetfield: __ ldr_s32(R0_tos, Address(Robj, Roffset)); break;
+#ifdef AARCH64
+ case Bytecodes::_fast_lgetfield: __ ldr (R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_fgetfield: __ ldr_s(S0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_dgetfield: __ ldr_d(D0_tos, Address(Robj, Roffset)); break;
+#else
+ case Bytecodes::_fast_lgetfield: __ add(Roffset, Robj, Roffset);
+ __ ldmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
+#ifdef __SOFTFP__
+ case Bytecodes::_fast_fgetfield: __ ldr (R0_tos, Address(Robj, Roffset)); break;
+ case Bytecodes::_fast_dgetfield: __ add(Roffset, Robj, Roffset);
+ __ ldmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
+#else
+ case Bytecodes::_fast_fgetfield: __ add(Roffset, Robj, Roffset); __ flds(S0_tos, Address(Roffset)); break;
+ case Bytecodes::_fast_dgetfield: __ add(Roffset, Robj, Roffset); __ fldd(D0_tos, Address(Roffset)); break;
+#endif // __SOFTFP__
+#endif // AARCH64
+ case Bytecodes::_fast_agetfield: __ load_heap_oop(R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ if (gen_volatile_check) {
+ // Check for volatile load
+ Label notVolatile;
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ // TODO-AARCH64 on AArch64, load-acquire instructions can be used to get rid of this explict barrier
+ volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
+
+ __ bind(notVolatile);
+ }
+}
+
+
+void TemplateTable::fast_xaccess(TosState state) {
+ transition(vtos, state);
+
+ const Register Robj = R1_tmp;
+ const Register Rcache = R2_tmp;
+ const Register Rindex = R3_tmp;
+ const Register Roffset = R3_tmp;
+ const Register Rflags = R4_tmp;
+ Label done;
+
+ // get receiver
+ __ ldr(Robj, aaddress(0));
+
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(Rcache, Rindex, 2);
+ __ add(Rtemp, Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr(Roffset, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+ const bool gen_volatile_check = os::is_MP();
+
+ if (gen_volatile_check) {
+ // load flags to test volatile
+ __ ldr_u32(Rflags, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+ }
+
+ // make sure exception is reported in correct bcp range (getfield is next instruction)
+ __ add(Rbcp, Rbcp, 1);
+ __ null_check(Robj, Rtemp);
+ __ sub(Rbcp, Rbcp, 1);
+
+#ifdef AARCH64
+ if (gen_volatile_check) {
+ Label notVolatile;
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ __ add(Rtemp, Robj, Roffset);
+
+ if (state == itos) {
+ __ ldar_w(R0_tos, Rtemp);
+ } else if (state == atos) {
+ if (UseCompressedOops) {
+ __ ldar_w(R0_tos, Rtemp);
+ __ decode_heap_oop(R0_tos);
+ } else {
+ __ ldar(R0_tos, Rtemp);
+ }
+ __ verify_oop(R0_tos);
+ } else if (state == ftos) {
+ __ ldar_w(R0_tos, Rtemp);
+ __ fmov_sw(S0_tos, R0_tos);
+ } else {
+ ShouldNotReachHere();
+ }
+ __ b(done);
+
+ __ bind(notVolatile);
+ }
+#endif // AARCH64
+
+ if (state == itos) {
+ __ ldr_s32(R0_tos, Address(Robj, Roffset));
+ } else if (state == atos) {
+ __ load_heap_oop(R0_tos, Address(Robj, Roffset));
+ __ verify_oop(R0_tos);
+ } else if (state == ftos) {
+#ifdef AARCH64
+ __ ldr_s(S0_tos, Address(Robj, Roffset));
+#else
+#ifdef __SOFTFP__
+ __ ldr(R0_tos, Address(Robj, Roffset));
+#else
+ __ add(Roffset, Robj, Roffset);
+ __ flds(S0_tos, Address(Roffset));
+#endif // __SOFTFP__
+#endif // AARCH64
+ } else {
+ ShouldNotReachHere();
+ }
+
+#ifndef AARCH64
+ if (gen_volatile_check) {
+ // Check for volatile load
+ Label notVolatile;
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+
+ volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
+
+ __ bind(notVolatile);
+ }
+#endif // !AARCH64
+
+ __ bind(done);
+}
+
+
+
+//----------------------------------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::count_calls(Register method, Register temp) {
+ // implemented elsewhere
+ ShouldNotReachHere();
+}
+
+
+void TemplateTable::prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index, // itable index, MethodType, etc.
+ Register recv, // if caller wants to see it
+ Register flags // if caller wants to test it
+ ) {
+ // determine flags
+ const Bytecodes::Code code = bytecode();
+ const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
+ const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
+ const bool is_invokehandle = code == Bytecodes::_invokehandle;
+ const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
+ const bool is_invokespecial = code == Bytecodes::_invokespecial;
+ const bool load_receiver = (recv != noreg);
+ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+ assert(recv == noreg || recv == R2, "");
+ assert(flags == noreg || flags == R3, "");
+
+ // setup registers & access constant pool cache
+ if (recv == noreg) recv = R2;
+ if (flags == noreg) flags = R3;
+ const Register temp = Rtemp;
+ const Register ret_type = R1_tmp;
+ assert_different_registers(method, index, flags, recv, LR, ret_type, temp);
+
+ // save 'interpreter return address'
+ __ save_bcp();
+
+ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+ // maybe push extra argument
+ if (is_invokedynamic || is_invokehandle) {
+ Label L_no_push;
+ __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push);
+ __ mov(temp, index);
+ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
+ __ load_resolved_reference_at_index(index, temp);
+ __ verify_oop(index);
+ __ push_ptr(index); // push appendix (MethodType, CallSite, etc.)
+ __ bind(L_no_push);
+ }
+
+ // load receiver if needed (after extra argument is pushed so parameter size is correct)
+ if (load_receiver) {
+ __ andr(temp, flags, (uintx)ConstantPoolCacheEntry::parameter_size_mask); // get parameter size
+ Address recv_addr = __ receiver_argument_address(Rstack_top, temp, recv);
+ __ ldr(recv, recv_addr);
+ __ verify_oop(recv);
+ }
+
+ // compute return type
+ __ logical_shift_right(ret_type, flags, ConstantPoolCacheEntry::tos_state_shift);
+ // Make sure we don't need to mask flags after the above shift
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+ // load return address
+ { const address table = (address) Interpreter::invoke_return_entry_table_for(code);
+ __ mov_slow(temp, table);
+ __ ldr(LR, Address::indexed_ptr(temp, ret_type));
+ }
+}
+
+
+void TemplateTable::invokevirtual_helper(Register index,
+ Register recv,
+ Register flags) {
+
+ const Register recv_klass = R2_tmp;
+
+ assert_different_registers(index, recv, flags, Rtemp);
+ assert_different_registers(index, recv_klass, R0_tmp, Rtemp);
+
+ // Test for an invoke of a final method
+ Label notFinal;
+ __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal);
+
+ assert(index == Rmethod, "Method* must be Rmethod, for interpreter calling convention");
+
+ // do the call - the index is actually the method to call
+
+ // It's final, need a null check here!
+ __ null_check(recv, Rtemp);
+
+ // profile this call
+ __ profile_final_call(R0_tmp);
+
+ __ jump_from_interpreted(Rmethod);
+
+ __ bind(notFinal);
+
+ // get receiver klass
+ __ null_check(recv, Rtemp, oopDesc::klass_offset_in_bytes());
+ __ load_klass(recv_klass, recv);
+
+ // profile this call
+ __ profile_virtual_call(R0_tmp, recv_klass);
+
+ // get target Method* & entry point
+ const int base = in_bytes(Klass::vtable_start_offset());
+ assert(vtableEntry::size() == 1, "adjust the scaling in the code below");
+ __ add(Rtemp, recv_klass, AsmOperand(index, lsl, LogHeapWordSize));
+ __ ldr(Rmethod, Address(Rtemp, base + vtableEntry::method_offset_in_bytes()));
+ __ jump_from_interpreted(Rmethod);
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f2_byte, "use this argument");
+
+ const Register Rrecv = R2_tmp;
+ const Register Rflags = R3_tmp;
+
+ prepare_invoke(byte_no, Rmethod, noreg, Rrecv, Rflags);
+
+ // Rmethod: index
+ // Rrecv: receiver
+ // Rflags: flags
+ // LR: return address
+
+ invokevirtual_helper(Rmethod, Rrecv, Rflags);
+}
+
+
+void TemplateTable::invokespecial(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+ const Register Rrecv = R2_tmp;
+ prepare_invoke(byte_no, Rmethod, noreg, Rrecv);
+ __ verify_oop(Rrecv);
+ __ null_check(Rrecv, Rtemp);
+ // do the call
+ __ profile_call(Rrecv);
+ __ jump_from_interpreted(Rmethod);
+}
+
+
+void TemplateTable::invokestatic(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+ prepare_invoke(byte_no, Rmethod);
+ // do the call
+ __ profile_call(R2_tmp);
+ __ jump_from_interpreted(Rmethod);
+}
+
+
+void TemplateTable::fast_invokevfinal(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f2_byte, "use this argument");
+ __ stop("fast_invokevfinal is not used on ARM");
+}
+
+
+void TemplateTable::invokeinterface(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+
+ const Register Ritable = R1_tmp;
+ const Register Rrecv = R2_tmp;
+ const Register Rinterf = R5_tmp;
+ const Register Rindex = R4_tmp;
+ const Register Rflags = R3_tmp;
+ const Register Rklass = R3_tmp;
+
+ prepare_invoke(byte_no, Rinterf, Rindex, Rrecv, Rflags);
+
+ // Special case of invokeinterface called for virtual method of
+ // java.lang.Object. See cpCacheOop.cpp for details.
+ // This code isn't produced by javac, but could be produced by
+ // another compliant java compiler.
+ Label notMethod;
+ __ tbz(Rflags, ConstantPoolCacheEntry::is_forced_virtual_shift, notMethod);
+
+ __ mov(Rmethod, Rindex);
+ invokevirtual_helper(Rmethod, Rrecv, Rflags);
+ __ bind(notMethod);
+
+ // Get receiver klass into Rklass - also a null check
+ __ load_klass(Rklass, Rrecv);
+
+ // profile this call
+ __ profile_virtual_call(R0_tmp, Rklass);
+
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+ const int base = in_bytes(Klass::vtable_start_offset());
+ assert(vtableEntry::size() == 1, "adjust the scaling in the code below");
+ __ ldr_s32(Rtemp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
+ __ add(Ritable, Rklass, base);
+ __ add(Ritable, Ritable, AsmOperand(Rtemp, lsl, LogBytesPerWord));
+
+ Label entry, search, interface_ok;
+
+ __ b(entry);
+
+ __ bind(search);
+ __ add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize);
+
+ __ bind(entry);
+
+ // Check that the entry is non-null. A null entry means that the receiver
+ // class doesn't implement the interface, and wasn't the same as the
+ // receiver class checked when the interface was resolved.
+
+ __ ldr(Rtemp, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes()));
+ __ cbnz(Rtemp, interface_ok);
+
+ // throw exception
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_IncompatibleClassChangeError));
+
+ // the call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+
+ __ bind(interface_ok);
+
+ __ cmp(Rinterf, Rtemp);
+ __ b(search, ne);
+
+ __ ldr_s32(Rtemp, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes()));
+ __ add(Rtemp, Rtemp, Rklass); // Add offset to Klass*
+ assert(itableMethodEntry::size() == 1, "adjust the scaling in the code below");
+
+ __ ldr(Rmethod, Address::indexed_ptr(Rtemp, Rindex));
+
+ // Rmethod: Method* to call
+
+ // Check for abstract method error
+ // Note: This should be done more efficiently via a throw_abstract_method_error
+ // interpreter entry point and a conditional jump to it in case of a null
+ // method.
+ { Label L;
+ __ cbnz(Rmethod, L);
+ // throw exception
+ // note: must restore interpreter registers to canonical
+ // state for exception handling to work correctly!
+ __ restore_method();
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+ // the call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+
+ // do the call
+ __ jump_from_interpreted(Rmethod);
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+ transition(vtos, vtos);
+
+ // TODO-AARCH64 review register usage
+ const Register Rrecv = R2_tmp;
+ const Register Rmtype = R4_tmp;
+ const Register R5_method = R5_tmp; // can't reuse Rmethod!
+
+ prepare_invoke(byte_no, R5_method, Rmtype, Rrecv);
+ __ null_check(Rrecv, Rtemp);
+
+ // Rmtype: MethodType object (from cpool->resolved_references[f1], if necessary)
+ // Rmethod: MH.invokeExact_MT method (from f2)
+
+ // Note: Rmtype is already pushed (if necessary) by prepare_invoke
+
+ // do the call
+ __ profile_final_call(R3_tmp); // FIXME: profile the LambdaForm also
+ __ mov(Rmethod, R5_method);
+ __ jump_from_interpreted(Rmethod);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+ transition(vtos, vtos);
+
+ // TODO-AARCH64 review register usage
+ const Register Rcallsite = R4_tmp;
+ const Register R5_method = R5_tmp; // can't reuse Rmethod!
+
+ prepare_invoke(byte_no, R5_method, Rcallsite);
+
+ // Rcallsite: CallSite object (from cpool->resolved_references[f1])
+ // Rmethod: MH.linkToCallSite method (from f2)
+
+ // Note: Rcallsite is already pushed by prepare_invoke
+
+ if (ProfileInterpreter) {
+ __ profile_call(R2_tmp);
+ }
+
+ // do the call
+ __ mov(Rmethod, R5_method);
+ __ jump_from_interpreted(Rmethod);
+}
+
+//----------------------------------------------------------------------------------------------------
+// Allocation
+
+void TemplateTable::_new() {
+ transition(vtos, atos);
+
+ const Register Robj = R0_tos;
+ const Register Rcpool = R1_tmp;
+ const Register Rindex = R2_tmp;
+ const Register Rtags = R3_tmp;
+ const Register Rsize = R3_tmp;
+
+ Register Rklass = R4_tmp;
+ assert_different_registers(Rcpool, Rindex, Rtags, Rklass, Rtemp);
+ assert_different_registers(Rcpool, Rindex, Rklass, Rsize);
+
+ Label slow_case;
+ Label done;
+ Label initialize_header;
+ Label initialize_object; // including clearing the fields
+ Label allocate_shared;
+
+ const bool allow_shared_alloc =
+ Universe::heap()->supports_inline_contig_alloc();
+
+ // Literals
+ InlinedAddress Lheap_top_addr(allow_shared_alloc ? (address)Universe::heap()->top_addr() : NULL);
+
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+ __ get_cpool_and_tags(Rcpool, Rtags);
+
+ // Make sure the class we're about to instantiate has been resolved.
+ // This is done before loading InstanceKlass to be consistent with the order
+ // how Constant Pool is updated (see ConstantPool::klass_at_put)
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
+ __ add(Rtemp, Rtags, Rindex);
+
+#ifdef AARCH64
+ __ add(Rtemp, Rtemp, tags_offset);
+ __ ldarb(Rtemp, Rtemp);
+#else
+ __ ldrb(Rtemp, Address(Rtemp, tags_offset));
+
+ // use Rklass as a scratch
+ volatile_barrier(MacroAssembler::LoadLoad, Rklass);
+#endif // AARCH64
+
+ // get InstanceKlass
+ __ add(Rklass, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr(Rklass, Address(Rklass, sizeof(ConstantPool)));
+ __ cmp(Rtemp, JVM_CONSTANT_Class);
+ __ b(slow_case, ne);
+
+ // make sure klass is initialized & doesn't have finalizer
+ // make sure klass is fully initialized
+ __ ldrb(Rtemp, Address(Rklass, InstanceKlass::init_state_offset()));
+ __ cmp(Rtemp, InstanceKlass::fully_initialized);
+ __ b(slow_case, ne);
+
+ // get instance_size in InstanceKlass (scaled to a count of bytes)
+ __ ldr_u32(Rsize, Address(Rklass, Klass::layout_helper_offset()));
+
+ // test to see if it has a finalizer or is malformed in some way
+ // Klass::_lh_instance_slow_path_bit is really a bit mask, not bit number
+ __ tbnz(Rsize, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case);
+
+ //
+ // Allocate the instance
+ // 1) Try to allocate in the TLAB
+ // 2) if fail and the object is large allocate in the shared Eden
+ // 3) if the above fails (or is not applicable), go to a slow case
+ // (creates a new TLAB, etc.)
+
+ if (UseTLAB) {
+ const Register Rtlab_top = R1_tmp;
+ const Register Rtlab_end = R2_tmp;
+ assert_different_registers(Robj, Rsize, Rklass, Rtlab_top, Rtlab_end);
+
+ __ ldr(Robj, Address(Rthread, JavaThread::tlab_top_offset()));
+ __ ldr(Rtlab_end, Address(Rthread, in_bytes(JavaThread::tlab_end_offset())));
+ __ add(Rtlab_top, Robj, Rsize);
+ __ cmp(Rtlab_top, Rtlab_end);
+ __ b(allow_shared_alloc ? allocate_shared : slow_case, hi);
+ __ str(Rtlab_top, Address(Rthread, JavaThread::tlab_top_offset()));
+ if (ZeroTLAB) {
+ // the fields have been already cleared
+ __ b(initialize_header);
+ } else {
+ // initialize both the header and fields
+ __ b(initialize_object);
+ }
+ }
+
+ // Allocation in the shared Eden, if allowed.
+ if (allow_shared_alloc) {
+ __ bind(allocate_shared);
+
+ const Register Rheap_top_addr = R2_tmp;
+ const Register Rheap_top = R5_tmp;
+ const Register Rheap_end = Rtemp;
+ assert_different_registers(Robj, Rklass, Rsize, Rheap_top_addr, Rheap_top, Rheap_end, LR);
+
+ // heap_end now (re)loaded in the loop since also used as a scratch register in the CAS
+ __ ldr_literal(Rheap_top_addr, Lheap_top_addr);
+
+ Label retry;
+ __ bind(retry);
+
+#ifdef AARCH64
+ __ ldxr(Robj, Rheap_top_addr);
+#else
+ __ ldr(Robj, Address(Rheap_top_addr));
+#endif // AARCH64
+
+ __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr()));
+ __ add(Rheap_top, Robj, Rsize);
+ __ cmp(Rheap_top, Rheap_end);
+ __ b(slow_case, hi);
+
+ // Update heap top atomically.
+ // If someone beats us on the allocation, try again, otherwise continue.
+#ifdef AARCH64
+ __ stxr(Rtemp2, Rheap_top, Rheap_top_addr);
+ __ cbnz_w(Rtemp2, retry);
+#else
+ __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/);
+ __ b(retry, ne);
+#endif // AARCH64
+
+ __ incr_allocated_bytes(Rsize, Rtemp);
+ }
+
+ if (UseTLAB || allow_shared_alloc) {
+ const Register Rzero0 = R1_tmp;
+ const Register Rzero1 = R2_tmp;
+ const Register Rzero_end = R5_tmp;
+ const Register Rzero_cur = Rtemp;
+ assert_different_registers(Robj, Rsize, Rklass, Rzero0, Rzero1, Rzero_cur, Rzero_end);
+
+ // The object is initialized before the header. If the object size is
+ // zero, go directly to the header initialization.
+ __ bind(initialize_object);
+ __ subs(Rsize, Rsize, sizeof(oopDesc));
+ __ add(Rzero_cur, Robj, sizeof(oopDesc));
+ __ b(initialize_header, eq);
+
+#ifdef ASSERT
+ // make sure Rsize is a multiple of 8
+ Label L;
+ __ tst(Rsize, 0x07);
+ __ b(L, eq);
+ __ stop("object size is not multiple of 8 - adjust this code");
+ __ bind(L);
+#endif
+
+#ifdef AARCH64
+ {
+ Label loop;
+ // Step back by 1 word if object size is not a multiple of 2*wordSize.
+ assert(wordSize <= sizeof(oopDesc), "oop header should contain at least one word");
+ __ andr(Rtemp2, Rsize, (uintx)wordSize);
+ __ sub(Rzero_cur, Rzero_cur, Rtemp2);
+
+ // Zero by 2 words per iteration.
+ __ bind(loop);
+ __ subs(Rsize, Rsize, 2*wordSize);
+ __ stp(ZR, ZR, Address(Rzero_cur, 2*wordSize, post_indexed));
+ __ b(loop, gt);
+ }
+#else
+ __ mov(Rzero0, 0);
+ __ mov(Rzero1, 0);
+ __ add(Rzero_end, Rzero_cur, Rsize);
+
+ // initialize remaining object fields: Rsize was a multiple of 8
+ { Label loop;
+ // loop is unrolled 2 times
+ __ bind(loop);
+ // #1
+ __ stmia(Rzero_cur, RegisterSet(Rzero0) | RegisterSet(Rzero1), writeback);
+ __ cmp(Rzero_cur, Rzero_end);
+ // #2
+ __ stmia(Rzero_cur, RegisterSet(Rzero0) | RegisterSet(Rzero1), writeback, ne);
+ __ cmp(Rzero_cur, Rzero_end, ne);
+ __ b(loop, ne);
+ }
+#endif // AARCH64
+
+ // initialize object header only.
+ __ bind(initialize_header);
+ if (UseBiasedLocking) {
+ __ ldr(Rtemp, Address(Rklass, Klass::prototype_header_offset()));
+ } else {
+ __ mov_slow(Rtemp, (intptr_t)markOopDesc::prototype());
+ }
+ // mark
+ __ str(Rtemp, Address(Robj, oopDesc::mark_offset_in_bytes()));
+
+ // klass
+#ifdef AARCH64
+ __ store_klass_gap(Robj);
+#endif // AARCH64
+ __ store_klass(Rklass, Robj); // blows Rklass:
+ Rklass = noreg;
+
+ // Note: Disable DTrace runtime check for now to eliminate overhead on each allocation
+ if (DTraceAllocProbes) {
+ // Trigger dtrace event for fastpath
+ Label Lcontinue;
+
+ __ ldrb_global(Rtemp, (address)&DTraceAllocProbes);
+ __ cbz(Rtemp, Lcontinue);
+
+ __ push(atos);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), Robj);
+ __ pop(atos);
+
+ __ bind(Lcontinue);
+ }
+
+ __ b(done);
+ } else {
+ // jump over literals
+ __ b(slow_case);
+ }
+
+ if (allow_shared_alloc) {
+ __ bind_literal(Lheap_top_addr);
+ }
+
+ // slow case
+ __ bind(slow_case);
+ __ get_constant_pool(Rcpool);
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+ __ call_VM(Robj, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Rcpool, Rindex);
+
+ // continue
+ __ bind(done);
+
+ // StoreStore barrier required after complete initialization
+ // (headers + content zeroing), before the object may escape.
+ __ membar(MacroAssembler::StoreStore, R1_tmp);
+}
+
+
+void TemplateTable::newarray() {
+ transition(itos, atos);
+ __ ldrb(R1, at_bcp(1));
+ __ mov(R2, R0_tos);
+ call_VM(R0_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), R1, R2);
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+}
+
+
+void TemplateTable::anewarray() {
+ transition(itos, atos);
+ __ get_unsigned_2_byte_index_at_bcp(R2, 1);
+ __ get_constant_pool(R1);
+ __ mov(R3, R0_tos);
+ call_VM(R0_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), R1, R2, R3);
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+}
+
+
+void TemplateTable::arraylength() {
+ transition(atos, itos);
+ __ null_check(R0_tos, Rtemp, arrayOopDesc::length_offset_in_bytes());
+ __ ldr_s32(R0_tos, Address(R0_tos, arrayOopDesc::length_offset_in_bytes()));
+}
+
+
+void TemplateTable::checkcast() {
+ transition(atos, atos);
+ Label done, is_null, quicked, resolved, throw_exception;
+
+ const Register Robj = R0_tos;
+ const Register Rcpool = R2_tmp;
+ const Register Rtags = R3_tmp;
+ const Register Rindex = R4_tmp;
+ const Register Rsuper = R3_tmp;
+ const Register Rsub = R4_tmp;
+ const Register Rsubtype_check_tmp1 = R1_tmp;
+ const Register Rsubtype_check_tmp2 = LR_tmp;
+
+ __ cbz(Robj, is_null);
+
+ // Get cpool & tags index
+ __ get_cpool_and_tags(Rcpool, Rtags);
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+
+ // See if bytecode has already been quicked
+ __ add(Rtemp, Rtags, Rindex);
+#ifdef AARCH64
+ // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough
+ __ add(Rtemp, Rtemp, Array<u1>::base_offset_in_bytes());
+ __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier
+#else
+ __ ldrb(Rtemp, Address(Rtemp, Array<u1>::base_offset_in_bytes()));
+#endif // AARCH64
+
+ __ cmp(Rtemp, JVM_CONSTANT_Class);
+
+#ifndef AARCH64
+ volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true);
+#endif // !AARCH64
+
+ __ b(quicked, eq);
+
+ __ push(atos);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ // vm_result_2 has metadata result
+ __ get_vm_result_2(Rsuper, Robj);
+ __ pop_ptr(Robj);
+ __ b(resolved);
+
+ __ bind(throw_exception);
+ // Come here on failure of subtype check
+ __ profile_typecheck_failed(R1_tmp);
+ __ mov(R2_ClassCastException_obj, Robj); // convention with generate_ClassCastException_handler()
+ __ b(Interpreter::_throw_ClassCastException_entry);
+
+ // Get superklass in Rsuper and subklass in Rsub
+ __ bind(quicked);
+ __ add(Rtemp, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr(Rsuper, Address(Rtemp, sizeof(ConstantPool)));
+
+ __ bind(resolved);
+ __ load_klass(Rsub, Robj);
+
+ // Generate subtype check. Blows both tmps and Rtemp.
+ assert_different_registers(Robj, Rsub, Rsuper, Rsubtype_check_tmp1, Rsubtype_check_tmp2, Rtemp);
+ __ gen_subtype_check(Rsub, Rsuper, throw_exception, Rsubtype_check_tmp1, Rsubtype_check_tmp2);
+
+ // Come here on success
+
+ // Collect counts on whether this check-cast sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ b(done);
+ __ bind(is_null);
+ __ profile_null_seen(R1_tmp);
+ } else {
+ __ bind(is_null); // same as 'done'
+ }
+ __ bind(done);
+}
+
+
+void TemplateTable::instanceof() {
+ // result = 0: obj == NULL or obj is not an instanceof the specified klass
+ // result = 1: obj != NULL and obj is an instanceof the specified klass
+
+ transition(atos, itos);
+ Label done, is_null, not_subtype, quicked, resolved;
+
+ const Register Robj = R0_tos;
+ const Register Rcpool = R2_tmp;
+ const Register Rtags = R3_tmp;
+ const Register Rindex = R4_tmp;
+ const Register Rsuper = R3_tmp;
+ const Register Rsub = R4_tmp;
+ const Register Rsubtype_check_tmp1 = R0_tmp;
+ const Register Rsubtype_check_tmp2 = R1_tmp;
+
+ __ cbz(Robj, is_null);
+
+ __ load_klass(Rsub, Robj);
+
+ // Get cpool & tags index
+ __ get_cpool_and_tags(Rcpool, Rtags);
+ __ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
+
+ // See if bytecode has already been quicked
+ __ add(Rtemp, Rtags, Rindex);
+#ifdef AARCH64
+ // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough
+ __ add(Rtemp, Rtemp, Array<u1>::base_offset_in_bytes());
+ __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier
+#else
+ __ ldrb(Rtemp, Address(Rtemp, Array<u1>::base_offset_in_bytes()));
+#endif // AARCH64
+ __ cmp(Rtemp, JVM_CONSTANT_Class);
+
+#ifndef AARCH64
+ volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true);
+#endif // !AARCH64
+
+ __ b(quicked, eq);
+
+ __ push(atos);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ // vm_result_2 has metadata result
+ __ get_vm_result_2(Rsuper, Robj);
+ __ pop_ptr(Robj);
+ __ b(resolved);
+
+ // Get superklass in Rsuper and subklass in Rsub
+ __ bind(quicked);
+ __ add(Rtemp, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord));
+ __ ldr(Rsuper, Address(Rtemp, sizeof(ConstantPool)));
+
+ __ bind(resolved);
+ __ load_klass(Rsub, Robj);
+
+ // Generate subtype check. Blows both tmps and Rtemp.
+ __ gen_subtype_check(Rsub, Rsuper, not_subtype, Rsubtype_check_tmp1, Rsubtype_check_tmp2);
+
+ // Come here on success
+ __ mov(R0_tos, 1);
+ __ b(done);
+
+ __ bind(not_subtype);
+ // Come here on failure
+ __ profile_typecheck_failed(R1_tmp);
+ __ mov(R0_tos, 0);
+
+ // Collect counts on whether this test sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ b(done);
+ __ bind(is_null);
+ __ profile_null_seen(R1_tmp);
+ } else {
+ __ bind(is_null); // same as 'done'
+ }
+ __ bind(done);
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+
+ // Note: We get here even if we are single stepping..
+ // jbug inists on setting breakpoints at every bytecode
+ // even if we are in single step mode.
+
+ transition(vtos, vtos);
+
+ // get the unpatched byte code
+ __ mov(R1, Rmethod);
+ __ mov(R2, Rbcp);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R1, R2);
+#ifdef AARCH64
+ __ sxtw(Rtmp_save0, R0);
+#else
+ __ mov(Rtmp_save0, R0);
+#endif // AARCH64
+
+ // post the breakpoint event
+ __ mov(R1, Rmethod);
+ __ mov(R2, Rbcp);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), R1, R2);
+
+ // complete the execution of original bytecode
+ __ mov(R3_bytecode, Rtmp_save0);
+ __ dispatch_only_normal(vtos);
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+ transition(atos, vtos);
+ __ mov(Rexception_obj, R0_tos);
+ __ null_check(Rexception_obj, Rtemp);
+ __ b(Interpreter::throw_exception_entry());
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+// in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions ] <--- Rstack_top = expression stack top
+// ..
+// [expressions ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data ] <--- monitor block bot
+// ...
+// [saved FP ] <--- FP
+
+
+void TemplateTable::monitorenter() {
+ transition(atos, vtos);
+
+ const Register Robj = R0_tos;
+ const Register Rentry = R1_tmp;
+
+ // check for NULL object
+ __ null_check(Robj, Rtemp);
+
+ const int entry_size = (frame::interpreter_frame_monitor_size() * wordSize);
+ assert (entry_size % StackAlignmentInBytes == 0, "keep stack alignment");
+ Label allocate_monitor, allocated;
+
+ // initialize entry pointer
+ __ mov(Rentry, 0); // points to free slot or NULL
+
+ // find a free slot in the monitor block (result in Rentry)
+ { Label loop, exit;
+ const Register Rcur = R2_tmp;
+ const Register Rcur_obj = Rtemp;
+ const Register Rbottom = R3_tmp;
+ assert_different_registers(Robj, Rentry, Rcur, Rbottom, Rcur_obj);
+
+ __ ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // points to current entry, starting with top-most entry
+ __ sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize);
+ // points to word before bottom of monitor block
+
+ __ cmp(Rcur, Rbottom); // check if there are no monitors
+#ifndef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+ // prefetch monitor's object for the first iteration
+#endif // !AARCH64
+ __ b(allocate_monitor, eq); // there are no monitors, skip searching
+
+ __ bind(loop);
+#ifdef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
+#endif // AARCH64
+ __ cmp(Rcur_obj, 0); // check if current entry is used
+ __ mov(Rentry, Rcur, eq); // if not used then remember entry
+
+ __ cmp(Rcur_obj, Robj); // check if current entry is for same object
+ __ b(exit, eq); // if same object then stop searching
+
+ __ add(Rcur, Rcur, entry_size); // otherwise advance to next entry
+
+ __ cmp(Rcur, Rbottom); // check if bottom reached
+#ifndef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+ // prefetch monitor's object for the next iteration
+#endif // !AARCH64
+ __ b(loop, ne); // if not at bottom then check this entry
+ __ bind(exit);
+ }
+
+ __ cbnz(Rentry, allocated); // check if a slot has been found; if found, continue with that one
+
+ __ bind(allocate_monitor);
+
+ // allocate one if there's no free slot
+ { Label loop;
+ assert_different_registers(Robj, Rentry, R2_tmp, Rtemp);
+
+ // 1. compute new pointers
+
+#ifdef AARCH64
+ __ check_extended_sp(Rtemp);
+ __ sub(SP, SP, entry_size); // adjust extended SP
+ __ mov(Rtemp, SP);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
+#endif // AARCH64
+
+ __ ldr(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // old monitor block top / expression stack bottom
+
+ __ sub(Rstack_top, Rstack_top, entry_size); // move expression stack top
+ __ check_stack_top_on_expansion();
+
+ __ sub(Rentry, Rentry, entry_size); // move expression stack bottom
+
+ __ mov(R2_tmp, Rstack_top); // set start value for copy loop
+
+ __ str(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // set new monitor block top
+
+ // 2. move expression stack contents
+
+ __ cmp(R2_tmp, Rentry); // check if expression stack is empty
+#ifndef AARCH64
+ __ ldr(Rtemp, Address(R2_tmp, entry_size), ne); // load expression stack word from old location
+#endif // !AARCH64
+ __ b(allocated, eq);
+
+ __ bind(loop);
+#ifdef AARCH64
+ __ ldr(Rtemp, Address(R2_tmp, entry_size)); // load expression stack word from old location
+#endif // AARCH64
+ __ str(Rtemp, Address(R2_tmp, wordSize, post_indexed)); // store expression stack word at new location
+ // and advance to next word
+ __ cmp(R2_tmp, Rentry); // check if bottom reached
+#ifndef AARCH64
+ __ ldr(Rtemp, Address(R2, entry_size), ne); // load expression stack word from old location
+#endif // !AARCH64
+ __ b(loop, ne); // if not at bottom then copy next word
+ }
+
+ // call run-time routine
+
+ // Rentry: points to monitor entry
+ __ bind(allocated);
+
+ // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
+ // The object has already been poped from the stack, so the expression stack looks correct.
+ __ add(Rbcp, Rbcp, 1);
+
+ __ str(Robj, Address(Rentry, BasicObjectLock::obj_offset_in_bytes())); // store object
+ __ lock_object(Rentry);
+
+ // check to make sure this monitor doesn't cause stack overflow after locking
+ __ save_bcp(); // in case of exception
+ __ arm_stack_overflow_check(0, Rtemp);
+
+ // The bcp has already been incremented. Just need to dispatch to next instruction.
+ __ dispatch_next(vtos);
+}
+
+
+void TemplateTable::monitorexit() {
+ transition(atos, vtos);
+
+ const Register Robj = R0_tos;
+ const Register Rcur = R1_tmp;
+ const Register Rbottom = R2_tmp;
+ const Register Rcur_obj = Rtemp;
+
+ // check for NULL object
+ __ null_check(Robj, Rtemp);
+
+ const int entry_size = (frame::interpreter_frame_monitor_size() * wordSize);
+ Label found, throw_exception;
+
+ // find matching slot
+ { Label loop;
+ assert_different_registers(Robj, Rcur, Rbottom, Rcur_obj);
+
+ __ ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // points to current entry, starting with top-most entry
+ __ sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize);
+ // points to word before bottom of monitor block
+
+ __ cmp(Rcur, Rbottom); // check if bottom reached
+#ifndef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+ // prefetch monitor's object for the first iteration
+#endif // !AARCH64
+ __ b(throw_exception, eq); // throw exception if there are now monitors
+
+ __ bind(loop);
+#ifdef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
+#endif // AARCH64
+ // check if current entry is for same object
+ __ cmp(Rcur_obj, Robj);
+ __ b(found, eq); // if same object then stop searching
+ __ add(Rcur, Rcur, entry_size); // otherwise advance to next entry
+ __ cmp(Rcur, Rbottom); // check if bottom reached
+#ifndef AARCH64
+ __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
+#endif // !AARCH64
+ __ b (loop, ne); // if not at bottom then check this entry
+ }
+
+ // error handling. Unlocking was not block-structured
+ __ bind(throw_exception);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
+ __ should_not_reach_here();
+
+ // call run-time routine
+ // Rcur: points to monitor entry
+ __ bind(found);
+ __ push_ptr(Robj); // make sure object is on stack (contract with oopMaps)
+ __ unlock_object(Rcur);
+ __ pop_ptr(Robj); // discard object
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Wide instructions
+
+void TemplateTable::wide() {
+ transition(vtos, vtos);
+ __ ldrb(R3_bytecode, at_bcp(1));
+
+ InlinedAddress Ltable((address)Interpreter::_wentry_point);
+ __ ldr_literal(Rtemp, Ltable);
+ __ indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp);
+
+ __ nop(); // to avoid filling CPU pipeline with invalid instructions
+ __ nop();
+ __ bind_literal(Ltable);
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Multi arrays
+
+void TemplateTable::multianewarray() {
+ transition(vtos, atos);
+ __ ldrb(Rtmp_save0, at_bcp(3)); // get number of dimensions
+
+ // last dim is on top of stack; we want address of first one:
+ // first_addr = last_addr + ndims * stackElementSize - 1*wordsize
+ // the latter wordSize to point to the beginning of the array.
+ __ add(Rtemp, Rstack_top, AsmOperand(Rtmp_save0, lsl, Interpreter::logStackElementSize));
+ __ sub(R1, Rtemp, wordSize);
+
+ call_VM(R0, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R1);
+ __ add(Rstack_top, Rstack_top, AsmOperand(Rtmp_save0, lsl, Interpreter::logStackElementSize));
+ // MacroAssembler::StoreStore useless (included in the runtime exit path)
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/templateTable_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_TEMPLATETABLE_ARM_HPP
+#define CPU_ARM_VM_TEMPLATETABLE_ARM_HPP
+
+ static void prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index = noreg, // itable index, MethodType, etc.
+ Register recv = noreg, // if caller wants to see it
+ Register flags = noreg // if caller wants to test it
+ );
+
+ static void invokevirtual_helper(Register index, Register recv,
+ Register flags);
+
+ static void volatile_barrier(MacroAssembler::Membar_mask_bits order_constraint,
+ Register tmp,
+ bool preserve_flags = false,
+ Register load_tgt = noreg);
+
+ // Helpers
+ static void index_check(Register array, Register index);
+ static void index_check_without_pop(Register array, Register index);
+
+ static void get_local_base_addr(Register r, Register index);
+
+ static Address load_iaddress(Register index, Register scratch);
+ static Address load_aaddress(Register index, Register scratch);
+ static Address load_faddress(Register index, Register scratch);
+ static Address load_daddress(Register index, Register scratch);
+
+ static void load_category2_local(Register Rlocal_index, Register tmp);
+ static void store_category2_local(Register Rlocal_index, Register tmp);
+
+ static Address get_array_elem_addr(BasicType elemType, Register array, Register index, Register temp);
+
+ static void jvmti_post_fast_field_mod(TosState state);
+
+#endif // CPU_ARM_VM_TEMPLATETABLE_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vmStructs_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_VMSTRUCTS_ARM_HPP
+#define CPU_ARM_VM_VMSTRUCTS_ARM_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* JavaCallWrapper */ \
+ /******************************/ \
+ /******************************/ \
+ /* JavaFrameAnchor */ \
+ /******************************/ \
+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // CPU_ARM_VM_VMSTRUCTS_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_VM_VERSION_ARM_HPP
+#define CPU_ARM_VM_VM_VERSION_ARM_HPP
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+class VM_Version: public Abstract_VM_Version {
+ friend class JVMCIVMStructs;
+
+ static bool _has_simd;
+
+ protected:
+ // Are we done with vm version initialization
+ static bool _is_initialized;
+
+ public:
+ static void initialize();
+ static bool is_initialized() { return _is_initialized; }
+
+#ifdef AARCH64
+
+ public:
+ static bool supports_ldrex() { return true; }
+ static bool supports_ldrexd() { return true; }
+ static bool supports_movw() { return true; }
+
+ // Override Abstract_VM_Version implementation
+ static bool use_biased_locking();
+
+ static bool has_simd() { return _has_simd; }
+ static bool has_vfp() { return has_simd(); }
+ static bool simd_math_is_compliant() { return true; }
+
+ static bool prefer_moves_over_load_literal() { return true; }
+
+#else
+
+ protected:
+ enum Feature_Flag {
+ vfp = 0,
+ vfp3_32 = 1,
+ simd = 2,
+ };
+
+ enum Feature_Flag_Set {
+ unknown_m = 0,
+ all_features_m = -1,
+
+ vfp_m = 1 << vfp,
+ vfp3_32_m = 1 << vfp3_32,
+ simd_m = 1 << simd,
+ };
+
+ // The value stored by "STR PC, [addr]" instruction can be either
+ // (address of this instruction + 8) or (address of this instruction + 12)
+ // depending on hardware implementation.
+ // This adjustment is calculated in runtime.
+ static int _stored_pc_adjustment;
+
+ // ARM architecture version: 5 = ARMv5, 6 = ARMv6, 7 = ARMv7 etc.
+ static int _arm_arch;
+
+ // linux kernel atomic helper function version info
+ // __kuser_cmpxchg() if version >= 2
+ // __kuser_cmpxchg64() if version >= 5
+ static int _kuser_helper_version;
+
+#define KUSER_HELPER_VERSION_ADDR 0xffff0ffc
+#define KUSER_VERSION_CMPXCHG32 2
+#define KUSER_VERSION_CMPXCHG64 5
+
+ // Read additional info using OS-specific interfaces
+ static void get_os_cpu_info();
+
+ public:
+ static void early_initialize();
+
+ static int arm_arch() { return _arm_arch; }
+ static int stored_pc_adjustment() { return _stored_pc_adjustment; }
+ static bool supports_rev() { return _arm_arch >= 6; }
+ static bool supports_ldrex() { return _arm_arch >= 6; }
+ static bool supports_movw() { return _arm_arch >= 7; }
+ static bool supports_ldrexd() { return _arm_arch >= 7; }
+ static bool supports_compare_and_exchange() { return true; }
+ static bool supports_kuser_cmpxchg32() { return _kuser_helper_version >= KUSER_VERSION_CMPXCHG32; }
+ static bool supports_kuser_cmpxchg64() { return _kuser_helper_version >= KUSER_VERSION_CMPXCHG64; }
+ // Override Abstract_VM_Version implementation
+ static bool use_biased_locking();
+ static const char* vm_info_string();
+
+ static bool has_vfp() { return (_features & vfp_m) != 0; }
+ static bool has_vfp3_32() { return (_features & vfp3_32_m) != 0; }
+ static bool has_simd() { return (_features & simd_m) != 0; }
+
+ static bool simd_math_is_compliant() { return false; }
+
+ static bool prefer_moves_over_load_literal() { return supports_movw(); }
+
+ friend class VM_Version_StubGenerator;
+
+#endif // AARCH64
+};
+
+#endif // CPU_ARM_VM_VM_VERSION_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_arm.hpp"
+
+int VM_Version::_stored_pc_adjustment = 4;
+int VM_Version::_arm_arch = 5;
+bool VM_Version::_is_initialized = false;
+int VM_Version::_kuser_helper_version = 0;
+
+extern "C" {
+ typedef int (*get_cpu_info_t)();
+ typedef bool (*check_vfp_t)(double *d);
+ typedef bool (*check_simd_t)();
+}
+
+#define __ _masm->
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+ address generate_get_cpu_info() {
+ StubCodeMark mark(this, "VM_Version", "get_cpu_info");
+ address start = __ pc();
+
+ __ mov(R0, PC);
+ __ push(PC);
+ __ pop(R1);
+ __ sub(R0, R1, R0);
+ // return the result in R0
+ __ bx(LR);
+
+ return start;
+ };
+
+ address generate_check_vfp() {
+ StubCodeMark mark(this, "VM_Version", "check_vfp");
+ address start = __ pc();
+
+ __ fstd(D0, Address(R0));
+ __ mov(R0, 1);
+ __ bx(LR);
+
+ return start;
+ };
+
+ address generate_check_vfp3_32() {
+ StubCodeMark mark(this, "VM_Version", "check_vfp3_32");
+ address start = __ pc();
+
+ __ fstd(D16, Address(R0));
+ __ mov(R0, 1);
+ __ bx(LR);
+
+ return start;
+ };
+
+ address generate_check_simd() {
+ StubCodeMark mark(this, "VM_Version", "check_simd");
+ address start = __ pc();
+
+ __ vcnt(Stemp, Stemp);
+ __ mov(R0, 1);
+ __ bx(LR);
+
+ return start;
+ };
+};
+
+#undef __
+
+
+extern "C" address check_vfp3_32_fault_instr;
+extern "C" address check_vfp_fault_instr;
+extern "C" address check_simd_fault_instr;
+
+void VM_Version::initialize() {
+ ResourceMark rm;
+
+ // Making this stub must be FIRST use of assembler
+ const int stub_size = 128;
+ BufferBlob* stub_blob = BufferBlob::create("get_cpu_info", stub_size);
+ if (stub_blob == NULL) {
+ vm_exit_during_initialization("Unable to allocate get_cpu_info stub");
+ }
+
+ CodeBuffer c(stub_blob);
+ VM_Version_StubGenerator g(&c);
+ address get_cpu_info_pc = g.generate_get_cpu_info();
+ get_cpu_info_t get_cpu_info = CAST_TO_FN_PTR(get_cpu_info_t, get_cpu_info_pc);
+
+ int pc_adjustment = get_cpu_info();
+
+ VM_Version::_stored_pc_adjustment = pc_adjustment;
+
+#ifndef __SOFTFP__
+ address check_vfp_pc = g.generate_check_vfp();
+ check_vfp_t check_vfp = CAST_TO_FN_PTR(check_vfp_t, check_vfp_pc);
+
+ check_vfp_fault_instr = (address)check_vfp;
+ double dummy;
+ if (check_vfp(&dummy)) {
+ _features |= vfp_m;
+ }
+
+#ifdef COMPILER2
+ if (has_vfp()) {
+ address check_vfp3_32_pc = g.generate_check_vfp3_32();
+ check_vfp_t check_vfp3_32 = CAST_TO_FN_PTR(check_vfp_t, check_vfp3_32_pc);
+ check_vfp3_32_fault_instr = (address)check_vfp3_32;
+ double dummy;
+ if (check_vfp3_32(&dummy)) {
+ _features |= vfp3_32_m;
+ }
+
+ address check_simd_pc =g.generate_check_simd();
+ check_simd_t check_simd = CAST_TO_FN_PTR(check_simd_t, check_simd_pc);
+ check_simd_fault_instr = (address)check_simd;
+ if (check_simd()) {
+ _features |= simd_m;
+ }
+ }
+#endif
+#endif
+
+
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+
+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
+ if (UseFMA) {
+ warning("FMA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseFMA, false);
+ }
+
+ if (UseSHA) {
+ warning("SHA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseSHA, false);
+ }
+
+ if (UseSHA1Intrinsics) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ }
+
+ if (UseSHA256Intrinsics) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+ }
+
+ if (UseSHA512Intrinsics) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+ }
+
+ if (UseCRC32Intrinsics) {
+ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
+ warning("CRC32 intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+ }
+
+ if (UseCRC32CIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
+ warning("CRC32C intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+ }
+
+ if (UseAdler32Intrinsics) {
+ warning("Adler32 intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
+ if (UseVectorizedMismatchIntrinsic) {
+ warning("vectorizedMismatch intrinsic is not available on this CPU.");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+
+ get_os_cpu_info();
+
+ _kuser_helper_version = *(int*)KUSER_HELPER_VERSION_ADDR;
+
+#ifdef COMPILER2
+ // C2 is only supported on v7+ VFP at this time
+ if (_arm_arch < 7 || !has_vfp()) {
+ vm_exit_during_initialization("Server VM is only supported on ARMv7+ VFP");
+ }
+#endif
+
+ // armv7 has the ldrexd instruction that can be used to implement cx8
+ // armv5 with linux >= 3.1 can use kernel helper routine
+ _supports_cx8 = (supports_ldrexd() || supports_kuser_cmpxchg64());
+ // ARM doesn't have special instructions for these but ldrex/ldrexd
+ // enable shorter instruction sequences that the ones based on cas.
+ _supports_atomic_getset4 = supports_ldrex();
+ _supports_atomic_getadd4 = supports_ldrex();
+ _supports_atomic_getset8 = supports_ldrexd();
+ _supports_atomic_getadd8 = supports_ldrexd();
+
+#ifdef COMPILER2
+ assert(_supports_cx8 && _supports_atomic_getset4 && _supports_atomic_getadd4
+ && _supports_atomic_getset8 && _supports_atomic_getadd8, "C2: atomic operations must be supported");
+#endif
+ char buf[512];
+ jio_snprintf(buf, sizeof(buf), "(ARMv%d)%s%s%s",
+ _arm_arch,
+ (has_vfp() ? ", vfp" : ""),
+ (has_vfp3_32() ? ", vfp3-32" : ""),
+ (has_simd() ? ", simd" : ""));
+
+ // buf is started with ", " or is empty
+ _features_string = os::strdup(buf);
+
+ if (has_simd()) {
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+ }
+ }
+
+ AllocatePrefetchDistance = 128;
+
+#ifdef COMPILER2
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+ // FLAG_SET_DEFAULT(MaxVectorSize, has_simd() ? 16 : 8);
+ // SIMD/NEON can use 16, but default is 8 because currently
+ // larger than 8 will disable instruction scheduling
+ FLAG_SET_DEFAULT(MaxVectorSize, 8);
+ }
+
+ if (MaxVectorSize > 16) {
+ FLAG_SET_DEFAULT(MaxVectorSize, 8);
+ }
+#endif
+
+ if (FLAG_IS_DEFAULT(Tier4CompileThreshold)) {
+ Tier4CompileThreshold = 10000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3InvocationThreshold)) {
+ Tier3InvocationThreshold = 1000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3CompileThreshold)) {
+ Tier3CompileThreshold = 5000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3MinInvocationThreshold)) {
+ Tier3MinInvocationThreshold = 500;
+ }
+
+ FLAG_SET_DEFAULT(TypeProfileLevel, 0); // unsupported
+
+ // This machine does not allow unaligned memory accesses
+ if (UseUnalignedAccesses) {
+ if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+ warning("Unaligned memory access is not available on this CPU");
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+ }
+
+ _is_initialized = true;
+}
+
+bool VM_Version::use_biased_locking() {
+ get_os_cpu_info();
+ // The cost of CAS on uniprocessor ARM v6 and later is low compared to the
+ // overhead related to slightly longer Biased Locking execution path.
+ // Testing shows no improvement when running with Biased Locking enabled
+ // on an ARMv6 and higher uniprocessor systems. The situation is different on
+ // ARMv5 and MP systems.
+ //
+ // Therefore the Biased Locking is enabled on ARMv5 and ARM MP only.
+ //
+ return (!os::is_MP() && (arm_arch() > 5)) ? false : true;
+}
+
+#define EXP
+
+// Temporary override for experimental features
+// Copied from Abstract_VM_Version
+const char* VM_Version::vm_info_string() {
+ switch (Arguments::mode()) {
+ case Arguments::_int:
+ return UseSharedSpaces ? "interpreted mode, sharing" EXP : "interpreted mode" EXP;
+ case Arguments::_mixed:
+ return UseSharedSpaces ? "mixed mode, sharing" EXP : "mixed mode" EXP;
+ case Arguments::_comp:
+ return UseSharedSpaces ? "compiled mode, sharing" EXP : "compiled mode" EXP;
+ };
+ ShouldNotReachHere();
+ return "";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_arm.hpp"
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+#ifndef HWCAP_AES
+#define HWCAP_AES 1 << 3
+#endif
+
+bool VM_Version::_is_initialized = false;
+bool VM_Version::_has_simd = false;
+
+extern "C" {
+ typedef bool (*check_simd_t)();
+}
+
+
+#ifdef COMPILER2
+
+#define __ _masm->
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+ address generate_check_simd() {
+ StubCodeMark mark(this, "VM_Version", "check_simd");
+ address start = __ pc();
+
+ __ vcnt(Stemp, Stemp);
+ __ mov(R0, 1);
+ __ ret(LR);
+
+ return start;
+ };
+};
+
+#undef __
+
+#endif
+
+
+
+extern "C" address check_simd_fault_instr;
+
+
+void VM_Version::initialize() {
+ ResourceMark rm;
+
+ // Making this stub must be FIRST use of assembler
+ const int stub_size = 128;
+ BufferBlob* stub_blob = BufferBlob::create("get_cpu_info", stub_size);
+ if (stub_blob == NULL) {
+ vm_exit_during_initialization("Unable to allocate get_cpu_info stub");
+ }
+
+ if (UseFMA) {
+ warning("FMA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseFMA, false);
+ }
+
+ if (UseSHA) {
+ warning("SHA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseSHA, false);
+ }
+
+ if (UseSHA1Intrinsics) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ }
+
+ if (UseSHA256Intrinsics) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+ }
+
+ if (UseSHA512Intrinsics) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+ }
+
+ if (UseCRC32Intrinsics) {
+ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
+ warning("CRC32 intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+ }
+
+ if (UseCRC32CIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
+ warning("CRC32C intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+ }
+
+ if (UseAdler32Intrinsics) {
+ warning("Adler32 intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
+ if (UseVectorizedMismatchIntrinsic) {
+ warning("vectorizedMismatch intrinsic is not available on this CPU.");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+
+ CodeBuffer c(stub_blob);
+
+#ifdef COMPILER2
+ VM_Version_StubGenerator g(&c);
+
+ address check_simd_pc = g.generate_check_simd();
+ if (check_simd_pc != NULL) {
+ check_simd_t check_simd = CAST_TO_FN_PTR(check_simd_t, check_simd_pc);
+ check_simd_fault_instr = (address)check_simd;
+ _has_simd = check_simd();
+ } else {
+ assert(! _has_simd, "default _has_simd value must be 'false'");
+ }
+#endif
+
+ unsigned long auxv = getauxval(AT_HWCAP);
+
+ char buf[512];
+ jio_snprintf(buf, sizeof(buf), "AArch64%s",
+ ((auxv & HWCAP_AES) ? ", aes" : ""));
+
+ _features_string = os::strdup(buf);
+
+#ifdef COMPILER2
+ if (auxv & HWCAP_AES) {
+ if (FLAG_IS_DEFAULT(UseAES)) {
+ FLAG_SET_DEFAULT(UseAES, true);
+ }
+ if (!UseAES) {
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ } else {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+ }
+ }
+ } else
+#endif
+ if (UseAES || UseAESIntrinsics) {
+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ }
+
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
+ _supports_cx8 = true;
+ _supports_atomic_getset4 = true;
+ _supports_atomic_getadd4 = true;
+ _supports_atomic_getset8 = true;
+ _supports_atomic_getadd8 = true;
+
+ // TODO-AARCH64 revise C2 flags
+
+ if (has_simd()) {
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+ }
+ }
+
+ AllocatePrefetchDistance = 128;
+
+#ifdef COMPILER2
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+ // FLAG_SET_DEFAULT(MaxVectorSize, has_simd() ? 16 : 8);
+ // SIMD/NEON can use 16, but default is 8 because currently
+ // larger than 8 will disable instruction scheduling
+ FLAG_SET_DEFAULT(MaxVectorSize, 8);
+ }
+
+ if (MaxVectorSize > 16) {
+ FLAG_SET_DEFAULT(MaxVectorSize, 8);
+ }
+#endif
+
+ if (FLAG_IS_DEFAULT(Tier4CompileThreshold)) {
+ Tier4CompileThreshold = 10000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3InvocationThreshold)) {
+ Tier3InvocationThreshold = 1000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3CompileThreshold)) {
+ Tier3CompileThreshold = 5000;
+ }
+ if (FLAG_IS_DEFAULT(Tier3MinInvocationThreshold)) {
+ Tier3MinInvocationThreshold = 500;
+ }
+
+ FLAG_SET_DEFAULT(TypeProfileLevel, 0); // unsupported
+
+ // This machine does not allow unaligned memory accesses
+ if (UseUnalignedAccesses) {
+ if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+ warning("Unaligned memory access is not available on this CPU");
+ FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+ }
+
+ _is_initialized = true;
+}
+
+bool VM_Version::use_biased_locking() {
+ // TODO-AARCH64 measure performance and revise
+
+ // The cost of CAS on uniprocessor ARM v6 and later is low compared to the
+ // overhead related to slightly longer Biased Locking execution path.
+ // Testing shows no improvement when running with Biased Locking enabled
+ // on an ARMv6 and higher uniprocessor systems. The situation is different on
+ // ARMv5 and MP systems.
+ //
+ // Therefore the Biased Locking is enabled on ARMv5 and ARM MP only.
+ //
+ return os::is_MP();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vmreg_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+void VMRegImpl::set_regName() {
+ Register reg = ::as_Register(0);
+ int i;
+ for (i = 0; i < ConcreteRegisterImpl::max_gpr; reg = reg->successor()) {
+ for (int j = 0; j < (1 << ConcreteRegisterImpl::log_vmregs_per_gpr); j++) {
+ regName[i++] = reg->name();
+ }
+ }
+#ifndef __SOFTFP__
+ FloatRegister freg = ::as_FloatRegister(0);
+ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+ for (int j = 0; j < (1 << ConcreteRegisterImpl::log_vmregs_per_fpr); j++) {
+ regName[i++] = freg->name();
+ }
+ freg = freg->successor();
+ }
+#endif
+
+ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+ regName[i] = "NON-GPR-FPR";
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vmreg_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_VMREG_ARM_HPP
+#define CPU_ARM_VM_VMREG_ARM_HPP
+
+ inline bool is_Register() {
+ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+ }
+
+ inline bool is_FloatRegister() {
+ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+ }
+
+ inline Register as_Register() {
+ assert(is_Register(), "must be");
+ assert(is_concrete(), "concrete register expected");
+ return ::as_Register(value() >> ConcreteRegisterImpl::log_vmregs_per_gpr);
+ }
+
+ inline FloatRegister as_FloatRegister() {
+ assert(is_FloatRegister(), "must be");
+ assert(is_concrete(), "concrete register expected");
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> ConcreteRegisterImpl::log_vmregs_per_fpr);
+ }
+
+ inline bool is_concrete() {
+ if (is_Register()) {
+ return ((value() & right_n_bits(ConcreteRegisterImpl::log_vmregs_per_gpr)) == 0);
+ } else if (is_FloatRegister()) {
+ return (((value() - ConcreteRegisterImpl::max_gpr) & right_n_bits(ConcreteRegisterImpl::log_vmregs_per_fpr)) == 0);
+ } else {
+ return false;
+ }
+ }
+
+#endif // CPU_ARM_VM_VMREG_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vmreg_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_VM_VMREG_ARM_INLINE_HPP
+#define CPU_ARM_VM_VMREG_ARM_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg(encoding() << ConcreteRegisterImpl::log_vmregs_per_gpr);
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg((encoding() << ConcreteRegisterImpl::log_vmregs_per_fpr) + ConcreteRegisterImpl::max_gpr);
+}
+#endif // CPU_ARM_VM_VMREG_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/arm/vm/vtableStubs_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_arm.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_arm.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_arm.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
+#endif
+
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+ const int code_length = VtableStub::pd_code_size_limit(true);
+ VtableStub* s = new(code_length) VtableStub(true, vtable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
+ return NULL;
+ }
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+
+ assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
+
+ const Register tmp = Rtemp; // Rtemp OK, should be free at call sites
+
+ address npe_addr = __ pc();
+ __ load_klass(tmp, R0);
+
+ {
+ int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
+ int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
+
+ assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
+ int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
+ if (method_offset & ~offset_mask) {
+ __ add(tmp, tmp, method_offset & ~offset_mask);
+ }
+ __ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
+ }
+
+ address ame_addr = __ pc();
+#ifdef AARCH64
+ __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
+ __ br(tmp);
+#else
+ __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
+#endif // AARCH64
+
+ masm->flush();
+
+ if (PrintMiscellaneous && (WizardMode || Verbose)) {
+ tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
+ vtable_index, p2i(s->entry_point()),
+ (int)(s->code_end() - s->entry_point()),
+ (int)(s->code_end() - __ pc()));
+ }
+ guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+ // FIXME ARM: need correct 'slop' - below is x86 code
+ // shut the door on sizing bugs
+ //int slop = 8; // 32-bit offset is this much larger than a 13-bit one
+ //assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
+
+ s->set_exception_points(npe_addr, ame_addr);
+ return s;
+}
+
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+ const int code_length = VtableStub::pd_code_size_limit(false);
+ VtableStub* s = new(code_length) VtableStub(false, itable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
+ return NULL;
+ }
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+
+ assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
+
+ // R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled
+ const Register Rclass = AARCH64_ONLY(R9) NOT_AARCH64(R4);
+ const Register Rlength = AARCH64_ONLY(R10) NOT_AARCH64(R5);
+ const Register Rscan = AARCH64_ONLY(R11) NOT_AARCH64(R6);
+ const Register tmp = Rtemp;
+
+ assert_different_registers(Ricklass, Rclass, Rlength, Rscan, tmp);
+
+ // Calculate the start of itable (itable goes after vtable)
+ const int scale = exact_log2(vtableEntry::size_in_bytes());
+ address npe_addr = __ pc();
+ __ load_klass(Rclass, R0);
+ __ ldr_s32(Rlength, Address(Rclass, Klass::vtable_length_offset()));
+
+ __ add(Rscan, Rclass, in_bytes(Klass::vtable_start_offset()));
+ __ add(Rscan, Rscan, AsmOperand(Rlength, lsl, scale));
+
+ // Search through the itable for an interface equal to incoming Ricklass
+ // itable looks like [intface][offset][intface][offset][intface][offset]
+ const int entry_size = itableOffsetEntry::size() * HeapWordSize;
+ assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
+
+ Label loop;
+ __ bind(loop);
+ __ ldr(tmp, Address(Rscan, entry_size, post_indexed));
+#ifdef AARCH64
+ Label found;
+ __ cmp(tmp, Ricklass);
+ __ b(found, eq);
+ __ cbnz(tmp, loop);
+#else
+ __ cmp(tmp, Ricklass); // set ZF and CF if interface is found
+ __ cmn(tmp, 0, ne); // check if tmp == 0 and clear CF if it is
+ __ b(loop, ne);
+#endif // AARCH64
+
+ assert(StubRoutines::throw_IncompatibleClassChangeError_entry() != NULL, "Check initialization order");
+#ifdef AARCH64
+ __ jump(StubRoutines::throw_IncompatibleClassChangeError_entry(), relocInfo::runtime_call_type, tmp);
+ __ bind(found);
+#else
+ // CF == 0 means we reached the end of itable without finding icklass
+ __ jump(StubRoutines::throw_IncompatibleClassChangeError_entry(), relocInfo::runtime_call_type, noreg, cc);
+#endif // !AARCH64
+
+ // Interface found at previous position of Rscan, now load the method oop
+ __ ldr_s32(tmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
+ {
+ const int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index +
+ itableMethodEntry::method_offset_in_bytes();
+ __ add_slow(Rmethod, Rclass, method_offset);
+ }
+ __ ldr(Rmethod, Address(Rmethod, tmp));
+
+ address ame_addr = __ pc();
+
+#ifdef AARCH64
+ __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
+ __ br(tmp);
+#else
+ __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
+#endif // AARCH64
+
+ masm->flush();
+
+ if (PrintMiscellaneous && (WizardMode || Verbose)) {
+ tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
+ itable_index, p2i(s->entry_point()),
+ (int)(s->code_end() - s->entry_point()),
+ (int)(s->code_end() - __ pc()));
+ }
+ guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+ // FIXME ARM: need correct 'slop' - below is x86 code
+ // shut the door on sizing bugs
+ //int slop = 8; // 32-bit offset is this much larger than a 13-bit one
+ //assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
+
+ s->set_exception_points(npe_addr, ame_addr);
+ return s;
+}
+
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+ int instr_count;
+
+ if (is_vtable_stub) {
+ // vtable stub size
+ instr_count = NOT_AARCH64(4) AARCH64_ONLY(5);
+ } else {
+ // itable stub size
+ instr_count = NOT_AARCH64(20) AARCH64_ONLY(20);
+ }
+
+#ifdef AARCH64
+ if (UseCompressedClassPointers) {
+ instr_count += MacroAssembler::instr_count_for_decode_klass_not_null();
+ }
+#endif // AARCH64
+
+ return instr_count * Assembler::InstructionSize;
+}
+
+int VtableStub::pd_code_alignment() {
+ return 8;
+}
--- a/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h Mon Dec 19 12:39:01 2016 -0500
@@ -72,9 +72,12 @@
#include <asm/ptrace.h>
#define user_regs_struct pt_regs
#endif
-#if defined(aarch64)
+#if defined(aarch64) || defined(arm64)
#include <asm/ptrace.h>
#define user_regs_struct user_pt_regs
+#elif defined(arm)
+#include <asm/ptrace.h>
+#define user_regs_struct pt_regs
#endif
#if defined(s390x)
#include <asm/ptrace.h>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/atomic_linux_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
+
+#include "runtime/os.hpp"
+#include "vm_version_arm.hpp"
+
+// Implementation of class atomic
+
+/*
+ * Atomic long operations on 32-bit ARM
+ * ARM v7 supports LDREXD/STREXD synchronization instructions so no problem.
+ * ARM < v7 does not have explicit 64 atomic load/store capability.
+ * However, gcc emits LDRD/STRD instructions on v5te and LDM/STM on v5t
+ * when loading/storing 64 bits.
+ * For non-MP machines (which is all we support for ARM < v7)
+ * under current Linux distros these instructions appear atomic.
+ * See section A3.5.3 of ARM Architecture Reference Manual for ARM v7.
+ * Also, for cmpxchg64, if ARM < v7 we check for cmpxchg64 support in the
+ * Linux kernel using _kuser_helper_version. See entry-armv.S in the Linux
+ * kernel source or kernel_user_helpers.txt in Linux Doc.
+ */
+
+inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
+inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
+inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
+inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
+inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
+
+inline jlong Atomic::load (volatile jlong* src) {
+ assert(((intx)src & (sizeof(jlong)-1)) == 0, "Atomic load jlong mis-aligned");
+#ifdef AARCH64
+ return *src;
+#else
+ return (*os::atomic_load_long_func)(src);
+#endif
+}
+
+inline void Atomic::store (jlong value, volatile jlong* dest) {
+ assert(((intx)dest & (sizeof(jlong)-1)) == 0, "Atomic store jlong mis-aligned");
+#ifdef AARCH64
+ *dest = value;
+#else
+ (*os::atomic_store_long_func)(value, dest);
+#endif
+}
+
+inline void Atomic::store (jlong value, jlong* dest) {
+ store(value, (volatile jlong*)dest);
+}
+
+// As per atomic.hpp all read-modify-write operations have to provide two-way
+// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
+// store-release-with-reservation. While load-acquire combined with store-release
+// do not generally form two-way barriers, their use with reservations does - the
+// ARMv8 architecture manual Section F "Barrier Litmus Tests" indicates they
+// provide sequentially consistent semantics. All we need to add is an explicit
+// barrier in the failure path of the cmpxchg operations (as these don't execute
+// the store) - arguably this may be overly cautious as there is a very low
+// likelihood that the hardware would pull loads/stores into the region guarded
+// by the reservation.
+//
+// For ARMv7 we add explicit barriers in the stubs.
+
+inline jint Atomic::add(jint add_value, volatile jint* dest) {
+#ifdef AARCH64
+ jint val;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %w[val], [%[dest]]\n\t"
+ " add %w[val], %w[val], %w[add_val]\n\t"
+ " stlxr %w[tmp], %w[val], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ : [val] "=&r" (val), [tmp] "=&r" (tmp)
+ : [add_val] "r" (add_value), [dest] "r" (dest)
+ : "memory");
+ return val;
+#else
+ return (*os::atomic_add_func)(add_value, dest);
+#endif
+}
+
+inline void Atomic::inc(volatile jint* dest) {
+ Atomic::add(1, (volatile jint *)dest);
+}
+
+inline void Atomic::dec(volatile jint* dest) {
+ Atomic::add(-1, (volatile jint *)dest);
+}
+
+inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
+#ifdef AARCH64
+ intptr_t val;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %[val], [%[dest]]\n\t"
+ " add %[val], %[val], %[add_val]\n\t"
+ " stlxr %w[tmp], %[val], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ : [val] "=&r" (val), [tmp] "=&r" (tmp)
+ : [add_val] "r" (add_value), [dest] "r" (dest)
+ : "memory");
+ return val;
+#else
+ return (intptr_t)Atomic::add((jint)add_value, (volatile jint*)dest);
+#endif
+}
+
+inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) {
+ return (void*)add_ptr(add_value, (volatile intptr_t*)dest);
+}
+
+inline void Atomic::inc_ptr(volatile intptr_t* dest) {
+ Atomic::add_ptr(1, dest);
+}
+
+inline void Atomic::dec_ptr(volatile intptr_t* dest) {
+ Atomic::add_ptr(-1, dest);
+}
+
+inline void Atomic::inc_ptr(volatile void* dest) {
+ inc_ptr((volatile intptr_t*)dest);
+}
+
+inline void Atomic::dec_ptr(volatile void* dest) {
+ dec_ptr((volatile intptr_t*)dest);
+}
+
+
+inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+#ifdef AARCH64
+ jint old_val;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %w[old_val], [%[dest]]\n\t"
+ " stlxr %w[tmp], %w[new_val], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
+ : [new_val] "r" (exchange_value), [dest] "r" (dest)
+ : "memory");
+ return old_val;
+#else
+ return (*os::atomic_xchg_func)(exchange_value, dest);
+#endif
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+#ifdef AARCH64
+ intptr_t old_val;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %[old_val], [%[dest]]\n\t"
+ " stlxr %w[tmp], %[new_val], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
+ : [new_val] "r" (exchange_value), [dest] "r" (dest)
+ : "memory");
+ return old_val;
+#else
+ return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
+#endif
+}
+
+inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
+ return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+}
+
+// The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
+
+inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) {
+#ifdef AARCH64
+ jint rv;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %w[rv], [%[dest]]\n\t"
+ " cmp %w[rv], %w[cv]\n\t"
+ " b.ne 2f\n\t"
+ " stlxr %w[tmp], %w[ev], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ " b 3f\n\t"
+ "2:\n\t"
+ " dmb sy\n\t"
+ "3:\n\t"
+ : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
+ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
+ : "memory");
+ return rv;
+#else
+ // Warning: Arguments are swapped to avoid moving them for kernel call
+ return (*os::atomic_cmpxchg_func)(compare_value, exchange_value, dest);
+#endif
+}
+
+inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) {
+#ifdef AARCH64
+ jlong rv;
+ int tmp;
+ __asm__ volatile(
+ "1:\n\t"
+ " ldaxr %[rv], [%[dest]]\n\t"
+ " cmp %[rv], %[cv]\n\t"
+ " b.ne 2f\n\t"
+ " stlxr %w[tmp], %[ev], [%[dest]]\n\t"
+ " cbnz %w[tmp], 1b\n\t"
+ " b 3f\n\t"
+ "2:\n\t"
+ " dmb sy\n\t"
+ "3:\n\t"
+ : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
+ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
+ : "memory");
+ return rv;
+#else
+ assert(VM_Version::supports_cx8(), "Atomic compare and exchange jlong not supported on this architecture!");
+ return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest);
+#endif
+}
+
+inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
+#ifdef AARCH64
+ return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
+#else
+ return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
+#endif
+}
+
+inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) {
+ return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value, order);
+}
+
+#endif // OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/bytes_linux_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_BYTES_LINUX_ARM_INLINE_HPP
+#define OS_CPU_LINUX_ARM_VM_BYTES_LINUX_ARM_INLINE_HPP
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2 Bytes::swap_u2(u2 x) {
+ // TODO: ARM - optimize
+ return bswap_16(x);
+}
+
+inline u4 Bytes::swap_u4(u4 x) {
+ // TODO: ARM - optimize
+ return bswap_32(x);
+}
+
+inline u8 Bytes::swap_u8(u8 x) {
+ // TODO: ARM - optimize
+ return bswap_64(x);
+}
+
+#endif // OS_CPU_LINUX_ARM_VM_BYTES_LINUX_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/copy_linux_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_COPY_LINUX_ARM_INLINE_HPP
+#define OS_CPU_LINUX_ARM_VM_COPY_LINUX_ARM_INLINE_HPP
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+#ifdef AARCH64
+ _Copy_conjoint_words(from, to, count * HeapWordSize);
+#else
+ // NOTE: _Copy_* functions on 32-bit ARM expect "to" and "from" arguments in reversed order
+ _Copy_conjoint_words(to, from, count * HeapWordSize);
+#endif
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+#ifdef AARCH64
+ _Copy_disjoint_words(from, to, count * HeapWordSize);
+#else
+ _Copy_disjoint_words(to, from, count * HeapWordSize);
+#endif // AARCH64
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+ pd_disjoint_words(from, to, count);
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+ pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+ memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+ pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+#ifdef AARCH64
+ _Copy_conjoint_jshorts_atomic(from, to, count * BytesPerShort);
+#else
+ _Copy_conjoint_jshorts_atomic(to, from, count * BytesPerShort);
+#endif
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+#ifdef AARCH64
+ _Copy_conjoint_jints_atomic(from, to, count * BytesPerInt);
+#else
+ assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
+ // pd_conjoint_words is word-atomic in this implementation.
+ pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
+#endif
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+#ifdef AARCH64
+ assert(HeapWordSize == BytesPerLong, "64-bit architecture");
+ pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
+#else
+ _Copy_conjoint_jlongs_atomic(to, from, count * BytesPerLong);
+#endif
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ assert(BytesPerHeapOop == BytesPerInt, "compressed oops");
+ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+ } else {
+ assert(BytesPerHeapOop == BytesPerLong, "64-bit architecture");
+ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+ }
+#else
+ assert(BytesPerHeapOop == BytesPerInt, "32-bit architecture");
+ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+#endif
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_bytes_atomic((void*)from, (void*)to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+#endif // OS_CPU_LINUX_ARM_VM_COPY_LINUX_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/globals_linux_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_GLOBALS_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_VM_GLOBALS_LINUX_ARM_HPP
+
+//
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+//
+define_pd_global(bool, DontYieldALot, false);
+#ifdef AARCH64
+define_pd_global(intx, CompilerThreadStackSize, 1024);
+define_pd_global(intx, ThreadStackSize, 1024);
+define_pd_global(intx, VMThreadStackSize, 1024);
+#else
+define_pd_global(intx, CompilerThreadStackSize, 512);
+// System default ThreadStackSize appears to be 512 which is too big.
+define_pd_global(intx, ThreadStackSize, 320);
+define_pd_global(intx, VMThreadStackSize, 512);
+#endif // AARCH64
+
+define_pd_global(size_t, JVMInvokeMethodSlack, 8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address or CDS
+define_pd_global(size_t, HeapBaseMinAddress, 2*G);
+
+#endif // OS_CPU_LINUX_ARM_VM_GLOBALS_LINUX_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/linux_arm_32.s Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,513 @@
+#
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+
+ # NOTE WELL! The _Copy functions are called directly
+ # from server-compiler-generated code via CallLeafNoFP,
+ # which means that they *must* either not use floating
+ # point or use it in the same manner as does the server
+ # compiler.
+
+ .globl _Copy_conjoint_bytes
+ .type _Copy_conjoint_bytes, %function
+ .globl _Copy_arrayof_conjoint_bytes
+ .type _Copy_arrayof_conjoint_bytes, %function
+ .globl _Copy_disjoint_words
+ .type _Copy_disjoint_words, %function
+ .globl _Copy_conjoint_words
+ .type _Copy_conjoint_words, %function
+ .globl _Copy_conjoint_jshorts_atomic
+ .type _Copy_conjoint_jshorts_atomic, %function
+ .globl _Copy_arrayof_conjoint_jshorts
+ .type _Copy_arrayof_conjoint_jshorts, %function
+ .globl _Copy_conjoint_jints_atomic
+ .type _Copy_conjoint_jints_atomic, %function
+ .globl _Copy_arrayof_conjoint_jints
+ .type _Copy_arrayof_conjoint_jints, %function
+ .globl _Copy_conjoint_jlongs_atomic
+ .type _Copy_conjoint_jlongs_atomic, %function
+ .globl _Copy_arrayof_conjoint_jlongs
+ .type _Copy_arrayof_conjoint_jlongs, %function
+
+ .text
+ .globl SpinPause
+ .type SpinPause, %function
+SpinPause:
+ bx LR
+
+ # Support for void Copy::conjoint_bytes(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_bytes:
+ swi 0x9f0001
+
+ # Support for void Copy::arrayof_conjoint_bytes(void* from,
+ # void* to,
+ # size_t count)
+_Copy_arrayof_conjoint_bytes:
+ swi 0x9f0001
+
+
+ # Support for void Copy::disjoint_words(void* from,
+ # void* to,
+ # size_t count)
+_Copy_disjoint_words:
+ stmdb sp!, {r3 - r9, ip}
+
+ cmp r2, #0
+ beq disjoint_words_finish
+
+ pld [r1, #0]
+ cmp r2, #12
+ ble disjoint_words_small
+
+ .align 3
+dw_f2b_loop_32:
+ subs r2, #32
+ blt dw_f2b_loop_32_finish
+ ldmia r1!, {r3 - r9, ip}
+ nop
+ pld [r1]
+ stmia r0!, {r3 - r9, ip}
+ bgt dw_f2b_loop_32
+dw_f2b_loop_32_finish:
+ addlts r2, #32
+ beq disjoint_words_finish
+ cmp r2, #16
+ blt disjoint_words_small
+ ldmia r1!, {r3 - r6}
+ subge r2, r2, #16
+ stmia r0!, {r3 - r6}
+ beq disjoint_words_finish
+disjoint_words_small:
+ cmp r2, #8
+ ldr r7, [r1], #4
+ ldrge r8, [r1], #4
+ ldrgt r9, [r1], #4
+ str r7, [r0], #4
+ strge r8, [r0], #4
+ strgt r9, [r0], #4
+
+disjoint_words_finish:
+ ldmia sp!, {r3 - r9, ip}
+ bx lr
+
+
+ # Support for void Copy::conjoint_words(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_words:
+ stmdb sp!, {r3 - r9, ip}
+
+ cmp r2, #0
+ beq conjoint_words_finish
+
+ pld [r1, #0]
+ cmp r2, #12
+ ble conjoint_words_small
+
+ subs r3, r0, r1
+ cmphi r2, r3
+ bhi cw_b2f_copy
+ .align 3
+cw_f2b_loop_32:
+ subs r2, #32
+ blt cw_f2b_loop_32_finish
+ ldmia r1!, {r3 - r9, ip}
+ nop
+ pld [r1]
+ stmia r0!, {r3 - r9, ip}
+ bgt cw_f2b_loop_32
+cw_f2b_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_words_finish
+ cmp r2, #16
+ blt conjoint_words_small
+ ldmia r1!, {r3 - r6}
+ subge r2, r2, #16
+ stmia r0!, {r3 - r6}
+ beq conjoint_words_finish
+conjoint_words_small:
+ cmp r2, #8
+ ldr r7, [r1], #4
+ ldrge r8, [r1], #4
+ ldrgt r9, [r1], #4
+ str r7, [r0], #4
+ strge r8, [r0], #4
+ strgt r9, [r0], #4
+ b conjoint_words_finish
+
+ # Src and dest overlap, copy in a descending order
+cw_b2f_copy:
+ add r1, r2
+ pld [r1, #-32]
+ add r0, r2
+ .align 3
+cw_b2f_loop_32:
+ subs r2, #32
+ blt cw_b2f_loop_32_finish
+ ldmdb r1!, {r3-r9,ip}
+ nop
+ pld [r1, #-32]
+ stmdb r0!, {r3-r9,ip}
+ bgt cw_b2f_loop_32
+cw_b2f_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_words_finish
+ cmp r2, #16
+ blt cw_b2f_copy_small
+ ldmdb r1!, {r3 - r6}
+ subge r2, r2, #16
+ stmdb r0!, {r3 - r6}
+ beq conjoint_words_finish
+cw_b2f_copy_small:
+ cmp r2, #8
+ ldr r7, [r1, #-4]!
+ ldrge r8, [r1, #-4]!
+ ldrgt r9, [r1, #-4]!
+ str r7, [r0, #-4]!
+ strge r8, [r0, #-4]!
+ strgt r9, [r0, #-4]!
+
+conjoint_words_finish:
+ ldmia sp!, {r3 - r9, ip}
+ bx lr
+
+ # Support for void Copy::conjoint_jshorts_atomic(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_jshorts_atomic:
+ stmdb sp!, {r3 - r9, ip}
+
+ cmp r2, #0
+ beq conjoint_shorts_finish
+
+ subs r3, r0, r1
+ cmphi r2, r3
+ bhi cs_b2f_copy
+
+ pld [r1]
+
+ ands r3, r0, #3
+ bne cs_f2b_dest_u
+ ands r3, r1, #3
+ bne cs_f2b_src_u
+
+ # Aligned source address
+ .align 3
+cs_f2b_loop_32:
+ subs r2, #32
+ blt cs_f2b_loop_32_finish
+ ldmia r1!, {r3 - r9, ip}
+ nop
+ pld [r1]
+ stmia r0!, {r3 - r9, ip}
+ bgt cs_f2b_loop_32
+cs_f2b_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_shorts_finish
+ movs r6, r2, lsr #3
+ .align 3
+cs_f2b_8_loop:
+ beq cs_f2b_4
+ ldmia r1!, {r4-r5}
+ subs r6, #1
+ stmia r0!, {r4-r5}
+ bgt cs_f2b_8_loop
+
+cs_f2b_4:
+ ands r2, #7
+ beq conjoint_shorts_finish
+ cmp r2, #4
+ ldrh r3, [r1], #2
+ ldrgeh r4, [r1], #2
+ ldrgth r5, [r1], #2
+ strh r3, [r0], #2
+ strgeh r4, [r0], #2
+ strgth r5, [r0], #2
+ b conjoint_shorts_finish
+
+ # Destination not aligned
+cs_f2b_dest_u:
+ ldrh r3, [r1], #2
+ subs r2, #2
+ strh r3, [r0], #2
+ beq conjoint_shorts_finish
+
+ # Check to see if source is not aligned ether
+ ands r3, r1, #3
+ beq cs_f2b_loop_32
+
+cs_f2b_src_u:
+ cmp r2, #16
+ blt cs_f2b_8_u
+
+ # Load 2 first bytes to r7 and make src ptr word aligned
+ bic r1, #3
+ ldr r7, [r1], #4
+
+ # Destination aligned, source not
+ mov r8, r2, lsr #4
+ .align 3
+cs_f2b_16_u_loop:
+ mov r3, r7, lsr #16
+ ldmia r1!, {r4 - r7}
+ orr r3, r3, r4, lsl #16
+ mov r4, r4, lsr #16
+ pld [r1]
+ orr r4, r4, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ mov r6, r6, lsr #16
+ orr r6, r6, r7, lsl #16
+ stmia r0!, {r3 - r6}
+ subs r8, #1
+ bgt cs_f2b_16_u_loop
+ ands r2, #0xf
+ beq conjoint_shorts_finish
+ sub r1, #2
+
+cs_f2b_8_u:
+ cmp r2, #8
+ blt cs_f2b_4_u
+ ldrh r4, [r1], #2
+ ldr r5, [r1], #4
+ ldrh r6, [r1], #2
+ orr r4, r4, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ subs r2, #8
+ stmia r0!, {r4 - r5}
+cs_f2b_4_u:
+ beq conjoint_shorts_finish
+ cmp r2, #4
+ ldrh r3, [r1], #2
+ ldrgeh r4, [r1], #2
+ ldrgth r5, [r1], #2
+ strh r3, [r0], #2
+ strgeh r4, [r0], #2
+ strgth r5, [r0], #2
+ b conjoint_shorts_finish
+
+ # Src and dest overlap, copy in a descending order
+cs_b2f_copy:
+ add r1, r2
+ pld [r1, #-32]
+ add r0, r2
+
+ ands r3, r0, #3
+ bne cs_b2f_dest_u
+ ands r3, r1, #3
+ bne cs_b2f_src_u
+ .align 3
+cs_b2f_loop_32:
+ subs r2, #32
+ blt cs_b2f_loop_32_finish
+ ldmdb r1!, {r3-r9,ip}
+ nop
+ pld [r1, #-32]
+ stmdb r0!, {r3-r9,ip}
+ bgt cs_b2f_loop_32
+cs_b2f_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_shorts_finish
+ cmp r2, #24
+ blt cs_b2f_16
+ ldmdb r1!, {r3-r8}
+ sub r2, #24
+ stmdb r0!, {r3-r8}
+ beq conjoint_shorts_finish
+cs_b2f_16:
+ cmp r2, #16
+ blt cs_b2f_8
+ ldmdb r1!, {r3-r6}
+ sub r2, #16
+ stmdb r0!, {r3-r6}
+ beq conjoint_shorts_finish
+cs_b2f_8:
+ cmp r2, #8
+ blt cs_b2f_all_copy
+ ldmdb r1!, {r3-r4}
+ sub r2, #8
+ stmdb r0!, {r3-r4}
+ beq conjoint_shorts_finish
+
+cs_b2f_all_copy:
+ cmp r2, #4
+ ldrh r3, [r1, #-2]!
+ ldrgeh r4, [r1, #-2]!
+ ldrgth r5, [r1, #-2]!
+ strh r3, [r0, #-2]!
+ strgeh r4, [r0, #-2]!
+ strgth r5, [r0, #-2]!
+ b conjoint_shorts_finish
+
+ # Destination not aligned
+cs_b2f_dest_u:
+ ldrh r3, [r1, #-2]!
+ strh r3, [r0, #-2]!
+ sub r2, #2
+ # Check source alignment as well
+ ands r3, r1, #3
+ beq cs_b2f_loop_32
+
+ # Source not aligned
+cs_b2f_src_u:
+ bic r1, #3
+ .align 3
+cs_b2f_16_loop_u:
+ subs r2, #16
+ blt cs_b2f_16_loop_u_finished
+ ldr r7, [r1]
+ mov r3, r7
+ ldmdb r1!, {r4 - r7}
+ mov r4, r4, lsr #16
+ orr r4, r4, r5, lsl #16
+ pld [r1, #-32]
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ mov r6, r6, lsr #16
+ orr r6, r6, r7, lsl #16
+ mov r7, r7, lsr #16
+ orr r7, r7, r3, lsl #16
+ stmdb r0!, {r4 - r7}
+ bgt cs_b2f_16_loop_u
+ beq conjoint_shorts_finish
+cs_b2f_16_loop_u_finished:
+ addlts r2, #16
+ ldr r3, [r1]
+ cmp r2, #10
+ blt cs_b2f_2_u_loop
+ ldmdb r1!, {r4 - r5}
+ mov r6, r4, lsr #16
+ orr r6, r6, r5, lsl #16
+ mov r7, r5, lsr #16
+ orr r7, r7, r3, lsl #16
+ stmdb r0!, {r6-r7}
+ sub r2, #8
+ .align 3
+cs_b2f_2_u_loop:
+ subs r2, #2
+ ldrh r3, [r1], #-2
+ strh r3, [r0, #-2]!
+ bgt cs_b2f_2_u_loop
+
+conjoint_shorts_finish:
+ ldmia sp!, {r3 - r9, ip}
+ bx lr
+
+
+ # Support for void Copy::arrayof_conjoint_jshorts(void* from,
+ # void* to,
+ # size_t count)
+_Copy_arrayof_conjoint_jshorts:
+ swi 0x9f0001
+
+ # Support for void Copy::conjoint_jints_atomic(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_jints_atomic:
+_Copy_arrayof_conjoint_jints:
+ swi 0x9f0001
+
+ # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
+ # jlong* to,
+ # size_t count)
+_Copy_conjoint_jlongs_atomic:
+_Copy_arrayof_conjoint_jlongs:
+ stmdb sp!, {r3 - r9, ip}
+
+ cmp r2, #0
+ beq conjoint_longs_finish
+
+ pld [r1, #0]
+ cmp r2, #24
+ ble conjoint_longs_small
+
+ subs r3, r0, r1
+ cmphi r2, r3
+ bhi cl_b2f_copy
+ .align 3
+cl_f2b_loop_32:
+ subs r2, #32
+ blt cl_f2b_loop_32_finish
+ ldmia r1!, {r3 - r9, ip}
+ nop
+ pld [r1]
+ stmia r0!, {r3 - r9, ip}
+ bgt cl_f2b_loop_32
+cl_f2b_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_longs_finish
+conjoint_longs_small:
+ cmp r2, #16
+ blt cl_f2b_copy_8
+ bgt cl_f2b_copy_24
+ ldmia r1!, {r3 - r6}
+ stmia r0!, {r3 - r6}
+ b conjoint_longs_finish
+cl_f2b_copy_8:
+ ldmia r1!, {r3 - r4}
+ stmia r0!, {r3 - r4}
+ b conjoint_longs_finish
+cl_f2b_copy_24:
+ ldmia r1!, {r3 - r8}
+ stmia r0!, {r3 - r8}
+ b conjoint_longs_finish
+
+ # Src and dest overlap, copy in a descending order
+cl_b2f_copy:
+ add r1, r2
+ pld [r1, #-32]
+ add r0, r2
+ .align 3
+cl_b2f_loop_32:
+ subs r2, #32
+ blt cl_b2f_loop_32_finish
+ ldmdb r1!, {r3 - r9, ip}
+ nop
+ pld [r1]
+ stmdb r0!, {r3 - r9, ip}
+ bgt cl_b2f_loop_32
+cl_b2f_loop_32_finish:
+ addlts r2, #32
+ beq conjoint_longs_finish
+ cmp r2, #16
+ blt cl_b2f_copy_8
+ bgt cl_b2f_copy_24
+ ldmdb r1!, {r3 - r6}
+ stmdb r0!, {r3 - r6}
+ b conjoint_longs_finish
+cl_b2f_copy_8:
+ ldmdb r1!, {r3 - r4}
+ stmdb r0!, {r3 - r4}
+ b conjoint_longs_finish
+cl_b2f_copy_24:
+ ldmdb r1!, {r3 - r8}
+ stmdb r0!, {r3 - r8}
+
+conjoint_longs_finish:
+ ldmia sp!, {r3 - r9, ip}
+ bx lr
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/linux_arm_64.s Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,542 @@
+#
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+ # TODO-AARCH64
+
+ # NOTE WELL! The _Copy functions are called directly
+ # from server-compiler-generated code via CallLeafNoFP,
+ # which means that they *must* either not use floating
+ # point or use it in the same manner as does the server
+ # compiler.
+
+ .globl _Copy_conjoint_bytes
+ .type _Copy_conjoint_bytes, %function
+ .globl _Copy_arrayof_conjoint_bytes
+ .type _Copy_arrayof_conjoint_bytes, %function
+ .globl _Copy_disjoint_words
+ .type _Copy_disjoint_words, %function
+ .globl _Copy_conjoint_words
+ .type _Copy_conjoint_words, %function
+ .globl _Copy_conjoint_jshorts_atomic
+ .type _Copy_conjoint_jshorts_atomic, %function
+ .globl _Copy_arrayof_conjoint_jshorts
+ .type _Copy_arrayof_conjoint_jshorts, %function
+ .globl _Copy_conjoint_jints_atomic
+ .type _Copy_conjoint_jints_atomic, %function
+ .globl _Copy_arrayof_conjoint_jints
+ .type _Copy_arrayof_conjoint_jints, %function
+ .globl _Copy_conjoint_jlongs_atomic
+ .type _Copy_conjoint_jlongs_atomic, %function
+ .globl _Copy_arrayof_conjoint_jlongs
+ .type _Copy_arrayof_conjoint_jlongs, %function
+
+ .text
+ .globl SpinPause
+ .type SpinPause, %function
+SpinPause:
+ yield
+ ret
+
+ # Support for void Copy::conjoint_bytes(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_bytes:
+ hlt 1002
+
+ # Support for void Copy::arrayof_conjoint_bytes(void* from,
+ # void* to,
+ # size_t count)
+_Copy_arrayof_conjoint_bytes:
+ hlt 1003
+
+
+ # Support for void Copy::disjoint_words(void* from,
+ # void* to,
+ # size_t count)
+_Copy_disjoint_words:
+ # These and further memory prefetches may hit out of array ranges.
+ # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
+ prfm pldl1keep, [x0, #0]
+ prfm pstl1keep, [x1, #0]
+ prfm pldl1keep, [x0, #64]
+ prfm pstl1keep, [x1, #64]
+
+ subs x18, x2, #128
+ b.ge dw_large
+
+dw_lt_128:
+ # Copy [x0, x0 + x2) to [x1, x1 + x2)
+
+ adr x15, dw_tail_table_base
+ and x16, x2, #~8
+
+ # Calculate address to jump and store it to x15:
+ # Each pair of instructions before dw_tail_table_base copies 16 bytes.
+ # x16 is count of bytes to copy aligned down by 16.
+ # So x16/16 pairs of instructions should be executed.
+ # Each pair takes 8 bytes, so x15 = dw_tail_table_base - (x16/16)*8 = x15 - x16/2
+ sub x15, x15, x16, lsr #1
+ prfm plil1keep, [x15]
+
+ add x17, x0, x2
+ add x18, x1, x2
+
+ # If x2 = x16 + 8, then copy 8 bytes and x16 bytes after that.
+ # Otherwise x2 = x16, so proceed to copy x16 bytes.
+ tbz x2, #3, dw_lt_128_even
+ ldr x3, [x0]
+ str x3, [x1]
+dw_lt_128_even:
+ # Copy [x17 - x16, x17) to [x18 - x16, x18)
+ # x16 is aligned by 16 and less than 128
+
+ # Execute (x16/16) ldp-stp pairs; each pair copies 16 bytes
+ br x15
+
+ ldp x3, x4, [x17, #-112]
+ stp x3, x4, [x18, #-112]
+ ldp x5, x6, [x17, #-96]
+ stp x5, x6, [x18, #-96]
+ ldp x7, x8, [x17, #-80]
+ stp x7, x8, [x18, #-80]
+ ldp x9, x10, [x17, #-64]
+ stp x9, x10, [x18, #-64]
+ ldp x11, x12, [x17, #-48]
+ stp x11, x12, [x18, #-48]
+ ldp x13, x14, [x17, #-32]
+ stp x13, x14, [x18, #-32]
+ ldp x15, x16, [x17, #-16]
+ stp x15, x16, [x18, #-16]
+dw_tail_table_base:
+ ret
+
+.p2align 6
+.rept 12
+ nop
+.endr
+dw_large:
+ # x18 >= 0;
+ # Copy [x0, x0 + x18 + 128) to [x1, x1 + x18 + 128)
+
+ ldp x3, x4, [x0], #64
+ ldp x5, x6, [x0, #-48]
+ ldp x7, x8, [x0, #-32]
+ ldp x9, x10, [x0, #-16]
+
+ # Before and after each iteration of loop registers x3-x10 contain [x0 - 64, x0),
+ # and x1 is a place to copy this data;
+ # x18 contains number of bytes to be stored minus 128
+
+ # Exactly 16 instructions from p2align, so dw_loop starts from cache line boundary
+ # Checking it explictly by aligning with "hlt 1000" instructions
+.p2alignl 6, 0xd4407d00
+dw_loop:
+ prfm pldl1keep, [x0, #64]
+ # Next line actually hurted memory copy performance (for interpreter) - JDK-8078120
+ # prfm pstl1keep, [x1, #64]
+
+ subs x18, x18, #64
+
+ stp x3, x4, [x1, #0]
+ ldp x3, x4, [x0, #0]
+ stp x5, x6, [x1, #16]
+ ldp x5, x6, [x0, #16]
+ stp x7, x8, [x1, #32]
+ ldp x7, x8, [x0, #32]
+ stp x9, x10, [x1, #48]
+ ldp x9, x10, [x0, #48]
+
+ add x1, x1, #64
+ add x0, x0, #64
+
+ b.ge dw_loop
+
+ # 13 instructions from dw_loop, so the loop body hits into one cache line
+
+dw_loop_end:
+ adds x2, x18, #64
+
+ stp x3, x4, [x1], #64
+ stp x5, x6, [x1, #-48]
+ stp x7, x8, [x1, #-32]
+ stp x9, x10, [x1, #-16]
+
+ # Increased x18 by 64, but stored 64 bytes, so x2 contains exact number of bytes to be stored
+
+ # If this number is not zero, also copy remaining bytes
+ b.ne dw_lt_128
+ ret
+
+
+ # Support for void Copy::conjoint_words(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_words:
+ subs x3, x1, x0
+ # hi condition is met <=> from < to
+ ccmp x2, x3, #0, hi
+ # hi condition is met <=> (from < to) and (to - from < count)
+ # otherwise _Copy_disjoint_words may be used, because it performs forward copying,
+ # so it also works when ranges overlap but to <= from
+ b.ls _Copy_disjoint_words
+
+ # Overlapping case should be the rare one, it does not worth optimizing
+
+ ands x3, x2, #~8
+ # x3 is count aligned down by 2*wordSize
+ add x0, x0, x2
+ add x1, x1, x2
+ sub x3, x3, #16
+ # Skip loop if 0 or 1 words
+ b.eq cw_backward_loop_end
+
+ # x3 >= 0
+ # Copy [x0 - x3 - 16, x0) to [x1 - x3 - 16, x1) backward
+cw_backward_loop:
+ subs x3, x3, #16
+ ldp x4, x5, [x0, #-16]!
+ stp x4, x5, [x1, #-16]!
+ b.ge cw_backward_loop
+
+cw_backward_loop_end:
+ # Copy remaining 0 or 1 words
+ tbz x2, #3, cw_finish
+ ldr x3, [x0, #-8]
+ str x3, [x1, #-8]
+
+cw_finish:
+ ret
+
+
+ # Support for void Copy::conjoint_jshorts_atomic(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_jshorts_atomic:
+ add x17, x0, x2
+ add x18, x1, x2
+
+ subs x3, x1, x0
+ # hi is met <=> (from < to) and (to - from < count)
+ ccmp x2, x3, #0, hi
+ b.hi cs_backward
+
+ subs x3, x2, #14
+ b.ge cs_forward_loop
+
+ # Copy x2 < 14 bytes from x0 to x1
+cs_forward_lt14:
+ ands x7, x2, #7
+ tbz x2, #3, cs_forward_lt8
+ ldrh w3, [x0, #0]
+ ldrh w4, [x0, #2]
+ ldrh w5, [x0, #4]
+ ldrh w6, [x0, #6]
+
+ strh w3, [x1, #0]
+ strh w4, [x1, #2]
+ strh w5, [x1, #4]
+ strh w6, [x1, #6]
+
+ # Copy x7 < 8 bytes from x17 - x7 to x18 - x7
+cs_forward_lt8:
+ b.eq cs_forward_0
+ cmp x7, #4
+ b.lt cs_forward_2
+ b.eq cs_forward_4
+
+cs_forward_6:
+ ldrh w3, [x17, #-6]
+ strh w3, [x18, #-6]
+cs_forward_4:
+ ldrh w4, [x17, #-4]
+ strh w4, [x18, #-4]
+cs_forward_2:
+ ldrh w5, [x17, #-2]
+ strh w5, [x18, #-2]
+cs_forward_0:
+ ret
+
+
+ # Copy [x0, x0 + x3 + 14) to [x1, x1 + x3 + 14)
+ # x3 >= 0
+.p2align 6
+cs_forward_loop:
+ subs x3, x3, #14
+
+ ldrh w4, [x0], #14
+ ldrh w5, [x0, #-12]
+ ldrh w6, [x0, #-10]
+ ldrh w7, [x0, #-8]
+ ldrh w8, [x0, #-6]
+ ldrh w9, [x0, #-4]
+ ldrh w10, [x0, #-2]
+
+ strh w4, [x1], #14
+ strh w5, [x1, #-12]
+ strh w6, [x1, #-10]
+ strh w7, [x1, #-8]
+ strh w8, [x1, #-6]
+ strh w9, [x1, #-4]
+ strh w10, [x1, #-2]
+
+ b.ge cs_forward_loop
+ # Exactly 16 instruction from cs_forward_loop, so loop fits into one cache line
+
+ adds x2, x3, #14
+ # x2 bytes should be copied from x0 to x1
+ b.ne cs_forward_lt14
+ ret
+
+ # Very similar to forward copying
+cs_backward:
+ subs x3, x2, #14
+ b.ge cs_backward_loop
+
+cs_backward_lt14:
+ ands x7, x2, #7
+ tbz x2, #3, cs_backward_lt8
+
+ ldrh w3, [x17, #-8]
+ ldrh w4, [x17, #-6]
+ ldrh w5, [x17, #-4]
+ ldrh w6, [x17, #-2]
+
+ strh w3, [x18, #-8]
+ strh w4, [x18, #-6]
+ strh w5, [x18, #-4]
+ strh w6, [x18, #-2]
+
+cs_backward_lt8:
+ b.eq cs_backward_0
+ cmp x7, #4
+ b.lt cs_backward_2
+ b.eq cs_backward_4
+
+cs_backward_6:
+ ldrh w3, [x0, #4]
+ strh w3, [x1, #4]
+
+cs_backward_4:
+ ldrh w4, [x0, #2]
+ strh w4, [x1, #2]
+
+cs_backward_2:
+ ldrh w5, [x0, #0]
+ strh w5, [x1, #0]
+
+cs_backward_0:
+ ret
+
+
+.p2align 6
+cs_backward_loop:
+ subs x3, x3, #14
+
+ ldrh w4, [x17, #-14]!
+ ldrh w5, [x17, #2]
+ ldrh w6, [x17, #4]
+ ldrh w7, [x17, #6]
+ ldrh w8, [x17, #8]
+ ldrh w9, [x17, #10]
+ ldrh w10, [x17, #12]
+
+ strh w4, [x18, #-14]!
+ strh w5, [x18, #2]
+ strh w6, [x18, #4]
+ strh w7, [x18, #6]
+ strh w8, [x18, #8]
+ strh w9, [x18, #10]
+ strh w10, [x18, #12]
+
+ b.ge cs_backward_loop
+ adds x2, x3, #14
+ b.ne cs_backward_lt14
+ ret
+
+
+ # Support for void Copy::arrayof_conjoint_jshorts(void* from,
+ # void* to,
+ # size_t count)
+_Copy_arrayof_conjoint_jshorts:
+ hlt 1007
+
+
+ # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
+ # jlong* to,
+ # size_t count)
+_Copy_conjoint_jlongs_atomic:
+_Copy_arrayof_conjoint_jlongs:
+ hlt 1009
+
+
+ # Support for void Copy::conjoint_jints_atomic(void* from,
+ # void* to,
+ # size_t count)
+_Copy_conjoint_jints_atomic:
+_Copy_arrayof_conjoint_jints:
+ # These and further memory prefetches may hit out of array ranges.
+ # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
+ prfm pldl1keep, [x0, #0]
+ prfm pstl1keep, [x1, #0]
+ prfm pldl1keep, [x0, #32]
+ prfm pstl1keep, [x1, #32]
+
+ subs x3, x1, x0
+ # hi condition is met <=> from < to
+ ccmp x2, x3, #0, hi
+ # hi condition is met <=> (from < to) and (to - from < count)
+ b.hi ci_backward
+
+ subs x18, x2, #64
+ b.ge ci_forward_large
+
+ci_forward_lt_64:
+ # Copy [x0, x0 + x2) to [x1, x1 + x2)
+
+ adr x15, ci_forward_tail_table_base
+ and x16, x2, #~4
+
+ # Calculate address to jump and store it to x15:
+ # Each pair of instructions before ci_forward_tail_table_base copies 8 bytes.
+ # x16 is count of bytes to copy aligned down by 8.
+ # So x16/8 pairs of instructions should be executed.
+ # Each pair takes 8 bytes, so x15 = ci_forward_tail_table_base - (x16/8)*8 = x15 - x16
+ sub x15, x15, x16
+ prfm plil1keep, [x15]
+
+ add x17, x0, x2
+ add x18, x1, x2
+
+ # If x2 = x16 + 4, then copy 4 bytes and x16 bytes after that.
+ # Otherwise x2 = x16, so proceed to copy x16 bytes.
+ tbz x2, #2, ci_forward_lt_64_even
+ ldr w3, [x0]
+ str w3, [x1]
+ci_forward_lt_64_even:
+ # Copy [x17 - x16, x17) to [x18 - x16, x18)
+ # x16 is aligned by 8 and less than 64
+
+ # Execute (x16/8) ldp-stp pairs; each pair copies 8 bytes
+ br x15
+
+ ldp w3, w4, [x17, #-56]
+ stp w3, w4, [x18, #-56]
+ ldp w5, w6, [x17, #-48]
+ stp w5, w6, [x18, #-48]
+ ldp w7, w8, [x17, #-40]
+ stp w7, w8, [x18, #-40]
+ ldp w9, w10, [x17, #-32]
+ stp w9, w10, [x18, #-32]
+ ldp w11, w12, [x17, #-24]
+ stp w11, w12, [x18, #-24]
+ ldp w13, w14, [x17, #-16]
+ stp w13, w14, [x18, #-16]
+ ldp w15, w16, [x17, #-8]
+ stp w15, w16, [x18, #-8]
+ci_forward_tail_table_base:
+ ret
+
+.p2align 6
+.rept 12
+ nop
+.endr
+ci_forward_large:
+ # x18 >= 0;
+ # Copy [x0, x0 + x18 + 64) to [x1, x1 + x18 + 64)
+
+ ldp w3, w4, [x0], #32
+ ldp w5, w6, [x0, #-24]
+ ldp w7, w8, [x0, #-16]
+ ldp w9, w10, [x0, #-8]
+
+ # Before and after each iteration of loop registers w3-w10 contain [x0 - 32, x0),
+ # and x1 is a place to copy this data;
+ # x18 contains number of bytes to be stored minus 64
+
+ # Exactly 16 instructions from p2align, so ci_forward_loop starts from cache line boundary
+ # Checking it explictly by aligning with "hlt 1000" instructions
+.p2alignl 6, 0xd4407d00
+ci_forward_loop:
+ prfm pldl1keep, [x0, #32]
+ prfm pstl1keep, [x1, #32]
+
+ subs x18, x18, #32
+
+ stp w3, w4, [x1, #0]
+ ldp w3, w4, [x0, #0]
+ stp w5, w6, [x1, #8]
+ ldp w5, w6, [x0, #8]
+ stp w7, w8, [x1, #16]
+ ldp w7, w8, [x0, #16]
+ stp w9, w10, [x1, #24]
+ ldp w9, w10, [x0, #24]
+
+ add x1, x1, #32
+ add x0, x0, #32
+
+ b.ge ci_forward_loop
+
+ # 14 instructions from ci_forward_loop, so the loop body hits into one cache line
+
+ci_forward_loop_end:
+ adds x2, x18, #32
+
+ stp w3, w4, [x1], #32
+ stp w5, w6, [x1, #-24]
+ stp w7, w8, [x1, #-16]
+ stp w9, w10, [x1, #-8]
+
+ # Increased x18 by 32, but stored 32 bytes, so x2 contains exact number of bytes to be stored
+
+ # If this number is not zero, also copy remaining bytes
+ b.ne ci_forward_lt_64
+ ret
+
+ci_backward:
+
+ # Overlapping case should be the rare one, it does not worth optimizing
+
+ ands x3, x2, #~4
+ # x3 is count aligned down by 2*jintSize
+ add x0, x0, x2
+ add x1, x1, x2
+ sub x3, x3, #8
+ # Skip loop if 0 or 1 jints
+ b.eq ci_backward_loop_end
+
+ # x3 >= 0
+ # Copy [x0 - x3 - 8, x0) to [x1 - x3 - 8, x1) backward
+ci_backward_loop:
+ subs x3, x3, #8
+ ldp w4, w5, [x0, #-8]!
+ stp w4, w5, [x1, #-8]!
+ b.ge ci_backward_loop
+
+ci_backward_loop_end:
+ # Copy remaining 0 or 1 jints
+ tbz x2, #2, ci_backward_finish
+ ldr w3, [x0, #-4]
+ str w3, [x1, #-4]
+
+ci_backward_finish:
+ ret
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/macroAssembler_linux_arm_32.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/os.hpp"
+
+void MacroAssembler::breakpoint(AsmCondition cond) {
+ if (cond == al) {
+ emit_int32(0xe7f001f0);
+ } else {
+ call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type, cond);
+ }
+}
+
+// atomic_cas_bool
+//
+// Perform an atomic compare and exchange and return bool result
+//
+// inputs:
+// oldval value to compare to
+// newval value to store if *(base+offset) == oldval
+// base base address of storage location
+// offset offset added to base to form dest address
+// output:
+// Z flag is set in success
+
+void MacroAssembler::atomic_cas_bool(Register oldval, Register newval, Register base, int offset, Register tmpreg) {
+ if (VM_Version::supports_ldrex()) {
+ Register tmp_reg;
+ if (tmpreg == noreg) {
+ push(LR);
+ tmp_reg = LR;
+ } else {
+ tmp_reg = tmpreg;
+ }
+ assert_different_registers(tmp_reg, oldval, newval, base);
+ Label loop;
+ bind(loop);
+ ldrex(tmp_reg, Address(base, offset));
+ subs(tmp_reg, tmp_reg, oldval);
+ strex(tmp_reg, newval, Address(base, offset), eq);
+ cmp(tmp_reg, 1, eq);
+ b(loop, eq);
+ cmp(tmp_reg, 0);
+ if (tmpreg == noreg) {
+ pop(tmp_reg);
+ }
+ } else if (VM_Version::supports_kuser_cmpxchg32()) {
+ // On armv5 platforms we must use the Linux kernel helper
+ // function for atomic cas operations since ldrex/strex is
+ // not supported.
+ //
+ // This is a special routine at a fixed address 0xffff0fc0 with
+ // with these arguments and results
+ //
+ // input:
+ // r0 = oldval, r1 = newval, r2 = ptr, lr = return adress
+ // output:
+ // r0 = 0 carry set on success
+ // r0 != 0 carry clear on failure
+ //
+ // r3, ip and flags are clobbered
+ //
+
+ Label loop;
+
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+
+ Register tmp_reg = LR; // ignore the argument
+
+ assert_different_registers(tmp_reg, oldval, newval, base);
+
+ // Shuffle registers for kernel call
+ if (oldval != R0) {
+ if (newval == R0) {
+ mov(tmp_reg, newval);
+ newval = tmp_reg;
+ }
+ if (base == R0) {
+ mov(tmp_reg, base);
+ base = tmp_reg;
+ }
+ mov(R0, oldval);
+ }
+ if(newval != R1) {
+ if(base == R1) {
+ if(newval == R2) {
+ mov(tmp_reg, base);
+ base = tmp_reg;
+ }
+ else {
+ mov(R2, base);
+ base = R2;
+ }
+ }
+ mov(R1, newval);
+ }
+ if (base != R2)
+ mov(R2, base);
+
+ if (offset != 0)
+ add(R2, R2, offset);
+
+ mvn(R3, 0xf000);
+ mov(LR, PC);
+ sub(PC, R3, 0x3f);
+ cmp (R0, 0);
+
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+ } else {
+ // Should never run on a platform so old that it does not have kernel helper
+ stop("Atomic cmpxchg32 unsupported on this platform");
+ }
+}
+
+// atomic_cas
+//
+// Perform an atomic compare and exchange and return previous value
+//
+// inputs:
+// prev temporary register (destroyed)
+// oldval value to compare to
+// newval value to store if *(base+offset) == oldval
+// base base address of storage location
+// offset offset added to base to form dest address
+// output:
+// returns previous value from *(base+offset) in R0
+
+void MacroAssembler::atomic_cas(Register temp1, Register temp2, Register oldval, Register newval, Register base, int offset) {
+ if (temp1 != R0) {
+ // try to read the previous value directly in R0
+ if (temp2 == R0) {
+ // R0 declared free
+ temp2 = temp1;
+ temp1 = R0;
+ } else if ((oldval != R0) && (newval != R0) && (base != R0)) {
+ // free, and scratched on return
+ temp1 = R0;
+ }
+ }
+ if (VM_Version::supports_ldrex()) {
+ Label loop;
+ assert_different_registers(temp1, temp2, oldval, newval, base);
+
+ bind(loop);
+ ldrex(temp1, Address(base, offset));
+ cmp(temp1, oldval);
+ strex(temp2, newval, Address(base, offset), eq);
+ cmp(temp2, 1, eq);
+ b(loop, eq);
+ if (temp1 != R0) {
+ mov(R0, temp1);
+ }
+ } else if (VM_Version::supports_kuser_cmpxchg32()) {
+ // On armv5 platforms we must use the Linux kernel helper
+ // function for atomic cas operations since ldrex/strex is
+ // not supported.
+ //
+ // This is a special routine at a fixed address 0xffff0fc0
+ //
+ // input:
+ // r0 = oldval, r1 = newval, r2 = ptr, lr = return adress
+ // output:
+ // r0 = 0 carry set on success
+ // r0 != 0 carry clear on failure
+ //
+ // r3, ip and flags are clobbered
+ //
+ Label done;
+ Label loop;
+
+ push(RegisterSet(R1, R4) | RegisterSet(R12) | RegisterSet(LR));
+
+ if ( oldval != R0 || newval != R1 || base != R2 ) {
+ push(oldval);
+ push(newval);
+ push(base);
+ pop(R2);
+ pop(R1);
+ pop(R0);
+ }
+
+ if (offset != 0) {
+ add(R2, R2, offset);
+ }
+
+ mov(R4, R0);
+ bind(loop);
+ ldr(R0, Address(R2));
+ cmp(R0, R4);
+ b(done, ne);
+ mvn(R12, 0xf000);
+ mov(LR, PC);
+ sub(PC, R12, 0x3f);
+ b(loop, cc);
+ mov(R0, R4);
+ bind(done);
+
+ pop(RegisterSet(R1, R4) | RegisterSet(R12) | RegisterSet(LR));
+ } else {
+ // Should never run on a platform so old that it does not have kernel helper
+ stop("Atomic cmpxchg32 unsupported on this platform");
+ }
+}
+
+// atomic_cas64
+//
+// Perform a 64 bit atomic compare and exchange and return previous value
+// as well as returning status in 'result' register
+//
+// inputs:
+// oldval_lo, oldval_hi value to compare to
+// newval_lo, newval_hi value to store if *(base+offset) == oldval
+// base base address of storage location
+// offset offset added to base to form dest address
+// output:
+// memval_lo, memval_hi, result
+// returns previous value from *(base+offset) in memval_lo/hi
+// returns status in result, 1==success, 0==failure
+// C1 just uses status result
+// VM code uses previous value returned in memval_lo/hi
+
+void MacroAssembler::atomic_cas64(Register memval_lo, Register memval_hi, Register result, Register oldval_lo, Register oldval_hi, Register newval_lo, Register newval_hi, Register base, int offset) {
+ if (VM_Version::supports_ldrexd()) {
+ Label loop;
+ assert_different_registers(memval_lo, memval_hi, result, oldval_lo,
+ oldval_hi, newval_lo, newval_hi, base);
+ assert(memval_hi == memval_lo + 1 && memval_lo < R9, "cmpxchg_long: illegal registers");
+ assert(oldval_hi == oldval_lo + 1 && oldval_lo < R9, "cmpxchg_long: illegal registers");
+ assert(newval_hi == newval_lo + 1 && newval_lo < R9, "cmpxchg_long: illegal registers");
+ assert(result != R10, "cmpxchg_long: illegal registers");
+ assert(base != R10, "cmpxchg_long: illegal registers");
+
+ mov(result, 0);
+ bind(loop);
+ ldrexd(memval_lo, Address(base, offset));
+ cmp(memval_lo, oldval_lo);
+ cmp(memval_hi, oldval_hi, eq);
+ strexd(result, newval_lo, Address(base, offset), eq);
+ rsbs(result, result, 1, eq);
+ b(loop, eq);
+ } else if (VM_Version::supports_kuser_cmpxchg64()) {
+ // On armv5 platforms we must use the Linux kernel helper
+ // function for atomic cas64 operations since ldrexd/strexd is
+ // not supported.
+ //
+ // This is a special routine at a fixed address 0xffff0f60
+ //
+ // input:
+ // r0 = (long long *)oldval, r1 = (long long *)newval,
+ // r2 = ptr, lr = return adress
+ // output:
+ // r0 = 0 carry set on success
+ // r0 != 0 carry clear on failure
+ //
+ // r3, and flags are clobbered
+ //
+ Label done;
+ Label loop;
+
+ if (result != R12) {
+ push(R12);
+ }
+ push(RegisterSet(R10) | RegisterSet(LR));
+ mov(R10, SP); // Save SP
+
+ bic(SP, SP, StackAlignmentInBytes - 1); // align stack
+ push(RegisterSet(oldval_lo, oldval_hi));
+ push(RegisterSet(newval_lo, newval_hi));
+
+ if ((offset != 0) || (base != R12)) {
+ add(R12, base, offset);
+ }
+ push(RegisterSet(R0, R3));
+ bind(loop);
+ ldrd(memval_lo, Address(R12)); //current
+ ldrd(oldval_lo, Address(SP, 24));
+ cmp(memval_lo, oldval_lo);
+ cmp(memval_hi, oldval_hi, eq);
+ pop(RegisterSet(R0, R3), ne);
+ mov(result, 0, ne);
+ b(done, ne);
+ // Setup for kernel call
+ mov(R2, R12);
+ add(R0, SP, 24); // R0 == &oldval_lo
+ add(R1, SP, 16); // R1 == &newval_lo
+ mvn(R3, 0xf000); // call kernel helper at 0xffff0f60
+ mov(LR, PC);
+ sub(PC, R3, 0x9f);
+ b(loop, cc); // if Carry clear then oldval != current
+ // try again. Otherwise, return oldval
+ // Here on success
+ pop(RegisterSet(R0, R3));
+ mov(result, 1);
+ ldrd(memval_lo, Address(SP, 8));
+ bind(done);
+ pop(RegisterSet(newval_lo, newval_hi));
+ pop(RegisterSet(oldval_lo, oldval_hi));
+ mov(SP, R10); // restore SP
+ pop(RegisterSet(R10) | RegisterSet(LR));
+ if (result != R12) {
+ pop(R12);
+ }
+ } else {
+ stop("Atomic cmpxchg64 unsupported on this platform");
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/orderAccess_linux_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP
+#define OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP
+
+#include "runtime/orderAccess.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_arm.hpp"
+
+// Implementation of class OrderAccess.
+// - we define the high level barriers below and use the general
+// implementation in orderAccess.inline.hpp, with customizations
+// on AARCH64 via the specialized_* template functions
+#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
+
+// Memory Ordering on ARM is weak.
+//
+// Implement all 4 memory ordering barriers by DMB, since it is a
+// lighter version of DSB.
+// dmb_sy implies full system shareability domain. RD/WR access type.
+// dmb_st implies full system shareability domain. WR only access type.
+//
+// NOP on < ARMv6 (MP not supported)
+//
+// Non mcr instructions can be used if we build for armv7 or higher arch
+// __asm__ __volatile__ ("dmb" : : : "memory");
+// __asm__ __volatile__ ("dsb" : : : "memory");
+//
+// inline void _OrderAccess_dsb() {
+// volatile intptr_t dummy = 0;
+// if (os::is_MP()) {
+// __asm__ volatile (
+// "mcr p15, 0, %0, c7, c10, 4"
+// : : "r" (dummy) : "memory");
+// }
+// }
+
+inline static void dmb_sy() {
+ if (!os::is_MP()) {
+ return;
+ }
+#ifdef AARCH64
+ __asm__ __volatile__ ("dmb sy" : : : "memory");
+#else
+ if (VM_Version::arm_arch() >= 7) {
+#ifdef __thumb__
+ __asm__ volatile (
+ "dmb sy": : : "memory");
+#else
+ __asm__ volatile (
+ ".word 0xF57FF050 | 0xf" : : : "memory");
+#endif
+ } else {
+ intptr_t zero = 0;
+ __asm__ volatile (
+ "mcr p15, 0, %0, c7, c10, 5"
+ : : "r" (zero) : "memory");
+ }
+#endif
+}
+
+inline static void dmb_st() {
+ if (!os::is_MP()) {
+ return;
+ }
+#ifdef AARCH64
+ __asm__ __volatile__ ("dmb st" : : : "memory");
+#else
+ if (VM_Version::arm_arch() >= 7) {
+#ifdef __thumb__
+ __asm__ volatile (
+ "dmb st": : : "memory");
+#else
+ __asm__ volatile (
+ ".word 0xF57FF050 | 0xe" : : : "memory");
+#endif
+ } else {
+ intptr_t zero = 0;
+ __asm__ volatile (
+ "mcr p15, 0, %0, c7, c10, 5"
+ : : "r" (zero) : "memory");
+ }
+#endif
+}
+
+// Load-Load/Store barrier
+inline static void dmb_ld() {
+#ifdef AARCH64
+ if (!os::is_MP()) {
+ return;
+ }
+ __asm__ __volatile__ ("dmb ld" : : : "memory");
+#else
+ dmb_sy();
+#endif
+}
+
+
+inline void OrderAccess::loadload() { dmb_ld(); }
+inline void OrderAccess::loadstore() { dmb_ld(); }
+inline void OrderAccess::acquire() { dmb_ld(); }
+inline void OrderAccess::storestore() { dmb_st(); }
+inline void OrderAccess::storeload() { dmb_sy(); }
+inline void OrderAccess::release() { dmb_sy(); }
+inline void OrderAccess::fence() { dmb_sy(); }
+
+// specializations for Aarch64
+// TODO-AARCH64: evaluate effectiveness of ldar*/stlr* implementations compared to 32-bit ARM approach
+
+#ifdef AARCH64
+
+template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte>(volatile jbyte* p) {
+ volatile jbyte result;
+ __asm__ volatile(
+ "ldarb %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+}
+
+template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(volatile jshort* p) {
+ volatile jshort result;
+ __asm__ volatile(
+ "ldarh %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+}
+
+template<> inline jint OrderAccess::specialized_load_acquire<jint>(volatile jint* p) {
+ volatile jint result;
+ __asm__ volatile(
+ "ldar %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+}
+
+template<> inline jfloat OrderAccess::specialized_load_acquire<jfloat>(volatile jfloat* p) {
+ return jfloat_cast(specialized_load_acquire((volatile jint*)p));
+}
+
+// This is implicit as jlong and intptr_t are both "long int"
+//template<> inline jlong OrderAccess::specialized_load_acquire(volatile jlong* p) {
+// return (volatile jlong)specialized_load_acquire((volatile intptr_t*)p);
+//}
+
+template<> inline intptr_t OrderAccess::specialized_load_acquire<intptr_t>(volatile intptr_t* p) {
+ volatile intptr_t result;
+ __asm__ volatile(
+ "ldar %[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+}
+
+template<> inline jdouble OrderAccess::specialized_load_acquire<jdouble>(volatile jdouble* p) {
+ return jdouble_cast(specialized_load_acquire((volatile intptr_t*)p));
+}
+
+
+template<> inline void OrderAccess::specialized_release_store<jbyte>(volatile jbyte* p, jbyte v) {
+ __asm__ volatile(
+ "stlrb %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+}
+
+template<> inline void OrderAccess::specialized_release_store<jshort>(volatile jshort* p, jshort v) {
+ __asm__ volatile(
+ "stlrh %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+}
+
+template<> inline void OrderAccess::specialized_release_store<jint>(volatile jint* p, jint v) {
+ __asm__ volatile(
+ "stlr %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+}
+
+template<> inline void OrderAccess::specialized_release_store<jlong>(volatile jlong* p, jlong v) {
+ __asm__ volatile(
+ "stlr %[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+}
+#endif // AARCH64
+
+#endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/os_linux_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,804 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "assembler_arm.inline.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "nativeInst_arm.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+# include <fpu_control.h>
+# include <asm/ptrace.h>
+
+#define SPELL_REG_SP "sp"
+
+// Don't #define SPELL_REG_FP for thumb because it is not safe to use, so this makes sure we never fetch it.
+#ifndef __thumb__
+#define SPELL_REG_FP AARCH64_ONLY("x29") NOT_AARCH64("fp")
+#endif
+
+address os::current_stack_pointer() {
+ register address sp __asm__ (SPELL_REG_SP);
+ return sp;
+}
+
+char* os::non_memory_address_word() {
+ // Must never look like an address returned by reserve_memory
+ return (char*) -1;
+}
+
+void os::initialize_thread(Thread* thr) {
+ // Nothing to do
+}
+
+#ifdef AARCH64
+
+#define arm_pc pc
+#define arm_sp sp
+#define arm_fp regs[29]
+#define arm_r0 regs[0]
+#define ARM_REGS_IN_CONTEXT 31
+
+#else
+
+#if NGREG == 16
+// These definitions are based on the observation that until
+// the certain version of GCC mcontext_t was defined as
+// a structure containing gregs[NGREG] array with 16 elements.
+// In later GCC versions mcontext_t was redefined as struct sigcontext,
+// along with NGREG constant changed to 18.
+#define arm_pc gregs[15]
+#define arm_sp gregs[13]
+#define arm_fp gregs[11]
+#define arm_r0 gregs[0]
+#endif
+
+#define ARM_REGS_IN_CONTEXT 16
+
+#endif // AARCH64
+
+address os::Linux::ucontext_get_pc(const ucontext_t* uc) {
+ return (address)uc->uc_mcontext.arm_pc;
+}
+
+void os::Linux::ucontext_set_pc(ucontext_t* uc, address pc) {
+ uc->uc_mcontext.arm_pc = (uintx)pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t* uc) {
+ return (intptr_t*)uc->uc_mcontext.arm_sp;
+}
+
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t* uc) {
+ return (intptr_t*)uc->uc_mcontext.arm_fp;
+}
+
+bool is_safe_for_fp(address pc) {
+#ifdef __thumb__
+ if (CodeCache::find_blob(pc) != NULL) {
+ return true;
+ }
+ // For thumb C frames, given an fp we have no idea how to access the frame contents.
+ return false;
+#else
+ // Calling os::address_is_in_vm() here leads to a dladdr call. Calling any libc
+ // function during os::get_native_stack() can result in a deadlock if JFR is
+ // enabled. For now, be more lenient and allow all pc's. There are other
+ // frame sanity checks in shared code, and to date they have been sufficient
+ // for other platforms.
+ //return os::address_is_in_vm(pc);
+ return true;
+#endif
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+ assert(thread != NULL, "just checking");
+ assert(ret_sp != NULL, "just checking");
+ assert(ret_fp != NULL, "just checking");
+
+ return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
+ intptr_t** ret_sp, intptr_t** ret_fp) {
+
+ ExtendedPC epc;
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+
+ if (uc != NULL) {
+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
+ if (ret_fp) {
+ intptr_t* fp = os::Linux::ucontext_get_fp(uc);
+#ifndef __thumb__
+ if (CodeCache::find_blob(epc.pc()) == NULL) {
+ // It's a C frame. We need to adjust the fp.
+ fp += os::C_frame_offset;
+ }
+#endif
+ // Clear FP when stack walking is dangerous so that
+ // the frame created will not be walked.
+ // However, ensure FP is set correctly when reliable and
+ // potentially necessary.
+ if (!is_safe_for_fp(epc.pc())) {
+ // FP unreliable
+ fp = (intptr_t *)NULL;
+ }
+ *ret_fp = fp;
+ }
+ } else {
+ // construct empty ExtendedPC for return value checking
+ epc = ExtendedPC(NULL);
+ if (ret_sp) *ret_sp = (intptr_t *)NULL;
+ if (ret_fp) *ret_fp = (intptr_t *)NULL;
+ }
+
+ return epc;
+}
+
+frame os::fetch_frame_from_context(const void* ucVoid) {
+ intptr_t* sp;
+ intptr_t* fp;
+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+ return frame(sp, fp, epc.pc());
+}
+
+frame os::get_sender_for_C_frame(frame* fr) {
+#ifdef __thumb__
+ // We can't reliably get anything from a thumb C frame.
+ return frame();
+#else
+ address pc = fr->sender_pc();
+ if (! is_safe_for_fp(pc)) {
+ return frame(fr->sender_sp(), (intptr_t *)NULL, pc);
+ } else {
+ return frame(fr->sender_sp(), fr->link() + os::C_frame_offset, pc);
+ }
+#endif
+}
+
+//
+// This actually returns two frames up. It does not return os::current_frame(),
+// which is the actual current frame. Nor does it return os::get_native_stack(),
+// which is the caller. It returns whoever called os::get_native_stack(). Not
+// very intuitive, but consistent with how this API is implemented on other
+// platforms.
+//
+frame os::current_frame() {
+#ifdef __thumb__
+ // We can't reliably get anything from a thumb C frame.
+ return frame();
+#else
+ register intptr_t* fp __asm__ (SPELL_REG_FP);
+ // fp is for os::current_frame. We want the fp for our caller.
+ frame myframe((intptr_t*)os::current_stack_pointer(), fp + os::C_frame_offset,
+ CAST_FROM_FN_PTR(address, os::current_frame));
+ frame caller_frame = os::get_sender_for_C_frame(&myframe);
+
+ if (os::is_first_C_frame(&caller_frame)) {
+ // stack is not walkable
+ // Assert below was added because it does not seem like this can ever happen.
+ // How can this frame ever be the first C frame since it is called from C code?
+ // If it does ever happen, undo the assert and comment here on when/why it happens.
+ assert(false, "this should never happen");
+ return frame();
+ }
+
+ // return frame for our caller's caller
+ return os::get_sender_for_C_frame(&caller_frame);
+#endif
+}
+
+#ifndef AARCH64
+extern "C" address check_vfp_fault_instr;
+extern "C" address check_vfp3_32_fault_instr;
+
+address check_vfp_fault_instr = NULL;
+address check_vfp3_32_fault_instr = NULL;
+#endif // !AARCH64
+extern "C" address check_simd_fault_instr;
+address check_simd_fault_instr = NULL;
+
+// Utility functions
+
+extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
+ void* ucVoid, int abort_if_unrecognized) {
+ ucontext_t* uc = (ucontext_t*) ucVoid;
+
+ Thread* t = Thread::current_or_null_safe();
+
+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+ // (no destructors can be run)
+ os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
+ SignalHandlerMark shm(t);
+
+ if (sig == SIGILL &&
+ ((info->si_addr == (caddr_t)check_simd_fault_instr)
+ NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp_fault_instr)
+ NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp3_32_fault_instr))) {
+ // skip faulty instruction + instruction that sets return value to
+ // success and set return value to failure.
+ os::Linux::ucontext_set_pc(uc, (address)info->si_addr + 8);
+ uc->uc_mcontext.arm_r0 = 0;
+ return true;
+ }
+
+ // Note: it's not uncommon that JNI code uses signal/sigset to install
+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+ // or have a SIGILL handler when detecting CPU type). When that happens,
+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+ // that do not require siginfo/ucontext first.
+
+ if (sig == SIGPIPE || sig == SIGXFSZ) {
+ // allow chained handler to go first
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
+ return true;
+ } else {
+ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
+ return true;
+ }
+ }
+
+ JavaThread* thread = NULL;
+ VMThread* vmthread = NULL;
+ if (os::Linux::signal_handlers_are_installed) {
+ if (t != NULL ){
+ if(t->is_Java_thread()) {
+ thread = (JavaThread*)t;
+ }
+ else if(t->is_VM_thread()){
+ vmthread = (VMThread *)t;
+ }
+ }
+ }
+
+ address stub = NULL;
+ address pc = NULL;
+ bool unsafe_access = false;
+
+ if (info != NULL && uc != NULL && thread != NULL) {
+ pc = (address) os::Linux::ucontext_get_pc(uc);
+
+ // Handle ALL stack overflow variations here
+ if (sig == SIGSEGV) {
+ address addr = (address) info->si_addr;
+
+ if (StubRoutines::is_safefetch_fault(pc)) {
+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
+ return 1;
+ }
+ // check if fault address is within thread stack
+ if (addr < thread->stack_base() &&
+ addr >= thread->stack_base() - thread->stack_size()) {
+ // stack overflow
+ if (thread->in_stack_yellow_reserved_zone(addr)) {
+ thread->disable_stack_yellow_reserved_zone();
+ if (thread->thread_state() == _thread_in_Java) {
+ // Throw a stack overflow exception. Guard pages will be reenabled
+ // while unwinding the stack.
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+ } else {
+ // Thread was in the vm or native code. Return and try to finish.
+ return 1;
+ }
+ } else if (thread->in_stack_red_zone(addr)) {
+ // Fatal red zone violation. Disable the guard pages and fall through
+ // to handle_unexpected_exception way down below.
+ thread->disable_stack_red_zone();
+ tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+ } else {
+ // Accessing stack address below sp may cause SEGV if current
+ // thread has MAP_GROWSDOWN stack. This should only happen when
+ // current thread was created by user code with MAP_GROWSDOWN flag
+ // and then attached to VM. See notes in os_linux.cpp.
+ if (thread->osthread()->expanding_stack() == 0) {
+ thread->osthread()->set_expanding_stack();
+ if (os::Linux::manually_expand_stack(thread, addr)) {
+ thread->osthread()->clear_expanding_stack();
+ return 1;
+ }
+ thread->osthread()->clear_expanding_stack();
+ } else {
+ fatal("recursive segv. expanding stack.");
+ }
+ }
+ }
+ }
+
+ if (thread->thread_state() == _thread_in_Java) {
+ // Java thread running in Java code => find exception handler if any
+ // a fault inside compiled code, the interpreter, or a stub
+
+ if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+ stub = SharedRuntime::get_poll_stub(pc);
+ } else if (sig == SIGBUS) {
+ // BugId 4454115: A read from a MappedByteBuffer can fault
+ // here if the underlying file has been truncated.
+ // Do not crash the VM in such a case.
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
+ if (nm != NULL && nm->has_unsafe_access()) {
+ unsafe_access = true;
+ }
+ } else if (sig == SIGSEGV && !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+ // Determination of interpreter/vtable stub/compiled code null exception
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+ if (cb != NULL) {
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+ }
+ } else if (sig == SIGILL && *(int *)pc == NativeInstruction::zombie_illegal_instruction) {
+ // Zombie
+ stub = SharedRuntime::get_handle_wrong_method_stub();
+ }
+ } else if (thread->thread_state() == _thread_in_vm &&
+ sig == SIGBUS && thread->doing_unsafe_access()) {
+ unsafe_access = true;
+ }
+
+ // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+ // and the heap gets shrunk before the field access.
+ if (sig == SIGSEGV || sig == SIGBUS) {
+ address addr = JNI_FastGetField::find_slowcase_pc(pc);
+ if (addr != (address)-1) {
+ stub = addr;
+ }
+ }
+
+ // Check to see if we caught the safepoint code in the
+ // process of write protecting the memory serialization page.
+ // It write enables the page immediately after protecting it
+ // so we can just return to retry the write.
+ if (sig == SIGSEGV && os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+ // Block current thread until the memory serialize page permission restored.
+ os::block_on_serialize_page_trap();
+ return true;
+ }
+ }
+
+ if (unsafe_access && stub == NULL) {
+ // it can be an unsafe access and we haven't found
+ // any other suitable exception reason,
+ // so assume it is an unsafe access.
+ address next_pc = pc + Assembler::InstructionSize;
+#ifdef __thumb__
+ if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) {
+ next_pc = (address)((intptr_t)next_pc | 0x1);
+ }
+#endif
+
+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+ }
+
+ if (stub != NULL) {
+#ifdef __thumb__
+ if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) {
+ intptr_t p = (intptr_t)pc | 0x1;
+ pc = (address)p;
+
+ // Clear Thumb mode bit if we're redirected into the ARM ISA based code
+ if (((intptr_t)stub & 0x1) == 0) {
+ uc->uc_mcontext.arm_cpsr &= ~PSR_T_BIT;
+ }
+ } else {
+ // No Thumb2 compiled stubs are triggered from ARM ISA compiled JIT'd code today.
+ // The support needs to be added if that changes
+ assert((((intptr_t)stub & 0x1) == 0), "can't return to Thumb code");
+ }
+#endif
+
+ // save all thread context in case we need to restore it
+ if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+ os::Linux::ucontext_set_pc(uc, stub);
+ return true;
+ }
+
+ // signal-chaining
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
+ return true;
+ }
+
+ if (!abort_if_unrecognized) {
+ // caller wants another chance, so give it to him
+ return false;
+ }
+
+ if (pc == NULL && uc != NULL) {
+ pc = os::Linux::ucontext_get_pc(uc);
+ }
+
+ // unmask current signal
+ sigset_t newset;
+ sigemptyset(&newset);
+ sigaddset(&newset, sig);
+ sigprocmask(SIG_UNBLOCK, &newset, NULL);
+
+ VMError::report_and_die(t, sig, pc, info, ucVoid);
+
+ ShouldNotReachHere();
+ return false;
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+ os::setup_fpu();
+}
+
+int os::Linux::get_fpu_control_word(void) {
+ return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+ // Nothing to do
+}
+
+void os::setup_fpu() {
+#ifdef AARCH64
+ __asm__ volatile ("msr fpcr, xzr");
+#else
+#if !defined(__SOFTFP__) && defined(__VFP_FP__)
+ // Turn on IEEE-754 compliant VFP mode
+ __asm__ volatile (
+ "mov %%r0, #0;"
+ "fmxr fpscr, %%r0"
+ : /* no output */ : /* no input */ : "r0"
+ );
+#endif
+#endif // AARCH64
+}
+
+bool os::is_allocatable(size_t bytes) {
+ return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Posix::_compiler_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K;
+size_t os::Posix::_java_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K;
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K;
+
+// return default stack size for thr_type
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
+ // default stack size (compiler thread needs larger stack)
+ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
+ return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+ // Creating guard page is very expensive. Java thread has HotSpot
+ // guard page, only enable glibc guard page for non-Java threads.
+ return (thr_type == java_thread ? 0 : page_size());
+}
+
+// Java thread:
+//
+// Low memory addresses
+// +------------------------+
+// | |\ JavaThread created by VM does not have glibc
+// | glibc guard page | - guard, attached Java thread usually has
+// | |/ 1 page glibc guard.
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+// | |\
+// | HotSpot Guard Pages | - red and yellow pages
+// | |/
+// +------------------------+ JavaThread::stack_yellow_zone_base()
+// | |\
+// | Normal Stack | -
+// | |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+// Low memory addresses
+// +------------------------+
+// | |\
+// | glibc guard page | - usually 1 page
+// | |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+// | |\
+// | Normal Stack | -
+// | |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
+// pthread_attr_getstack()
+
+static void current_stack_region(address * bottom, size_t * size) {
+ if (os::Linux::is_initial_thread()) {
+ // initial thread needs special handling because pthread_getattr_np()
+ // may return bogus value.
+ *bottom = os::Linux::initial_thread_stack_bottom();
+ *size = os::Linux::initial_thread_stack_size();
+ } else {
+ pthread_attr_t attr;
+
+ int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+ // JVM needs to know exact stack location, abort if it fails
+ if (rslt != 0) {
+ if (rslt == ENOMEM) {
+ vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+ } else {
+ fatal("pthread_getattr_np failed");
+ }
+ }
+
+ if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+ fatal("Can not locate current stack attributes!");
+ }
+
+ pthread_attr_destroy(&attr);
+
+ }
+ assert(os::current_stack_pointer() >= *bottom &&
+ os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+ address bottom;
+ size_t size;
+ current_stack_region(&bottom, &size);
+ return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+ // stack size includes normal stack and HotSpot guard pages
+ address bottom;
+ size_t size;
+ current_stack_region(&bottom, &size);
+ return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+void os::print_context(outputStream *st, const void *context) {
+ if (context == NULL) return;
+ const ucontext_t *uc = (const ucontext_t*)context;
+
+ st->print_cr("Registers:");
+ intx* reg_area = (intx*)&uc->uc_mcontext.arm_r0;
+ for (int r = 0; r < ARM_REGS_IN_CONTEXT; r++) {
+ st->print_cr(" %-3s = " INTPTR_FORMAT, as_Register(r)->name(), reg_area[r]);
+ }
+#define U64_FORMAT "0x%016llx"
+#ifdef AARCH64
+ st->print_cr(" %-3s = " U64_FORMAT, "sp", uc->uc_mcontext.sp);
+ st->print_cr(" %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc);
+ st->print_cr(" %-3s = " U64_FORMAT, "pstate", uc->uc_mcontext.pstate);
+#else
+ // now print flag register
+ st->print_cr(" %-4s = 0x%08lx", "cpsr",uc->uc_mcontext.arm_cpsr);
+#endif
+ st->cr();
+
+ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+ st->print_cr("Top of Stack: (sp=" INTPTR_FORMAT ")", p2i(sp));
+ print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
+ st->cr();
+
+ // Note: it may be unsafe to inspect memory near pc. For example, pc may
+ // point to garbage if entry point in an nmethod is corrupted. Leave
+ // this at the end, and hope for the best.
+ address pc = os::Linux::ucontext_get_pc(uc);
+ st->print_cr("Instructions: (pc=" INTPTR_FORMAT ")", p2i(pc));
+ print_hex_dump(st, pc - 32, pc + 32, Assembler::InstructionSize);
+}
+
+void os::print_register_info(outputStream *st, const void *context) {
+ if (context == NULL) return;
+
+ const ucontext_t *uc = (const ucontext_t*)context;
+ intx* reg_area = (intx*)&uc->uc_mcontext.arm_r0;
+
+ st->print_cr("Register to memory mapping:");
+ st->cr();
+ for (int r = 0; r < ARM_REGS_IN_CONTEXT; r++) {
+ st->print_cr(" %-3s = " INTPTR_FORMAT, as_Register(r)->name(), reg_area[r]);
+ print_location(st, reg_area[r]);
+ st->cr();
+ }
+#ifdef AARCH64
+ st->print_cr(" %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc);
+ print_location(st, uc->uc_mcontext.pc);
+ st->cr();
+#endif
+ st->cr();
+}
+
+
+#ifndef AARCH64
+
+typedef jlong cmpxchg_long_func_t(jlong, jlong, volatile jlong*);
+
+cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
+
+jlong os::atomic_cmpxchg_long_bootstrap(jlong compare_value, jlong exchange_value, volatile jlong* dest) {
+ // try to use the stub:
+ cmpxchg_long_func_t* func = CAST_TO_FN_PTR(cmpxchg_long_func_t*, StubRoutines::atomic_cmpxchg_long_entry());
+
+ if (func != NULL) {
+ os::atomic_cmpxchg_long_func = func;
+ return (*func)(compare_value, exchange_value, dest);
+ }
+ assert(Threads::number_of_threads() == 0, "for bootstrap only");
+
+ jlong old_value = *dest;
+ if (old_value == compare_value)
+ *dest = exchange_value;
+ return old_value;
+}
+typedef jlong load_long_func_t(volatile jlong*);
+
+load_long_func_t* os::atomic_load_long_func = os::atomic_load_long_bootstrap;
+
+jlong os::atomic_load_long_bootstrap(volatile jlong* src) {
+ // try to use the stub:
+ load_long_func_t* func = CAST_TO_FN_PTR(load_long_func_t*, StubRoutines::atomic_load_long_entry());
+
+ if (func != NULL) {
+ os::atomic_load_long_func = func;
+ return (*func)(src);
+ }
+ assert(Threads::number_of_threads() == 0, "for bootstrap only");
+
+ jlong old_value = *src;
+ return old_value;
+}
+
+typedef void store_long_func_t(jlong, volatile jlong*);
+
+store_long_func_t* os::atomic_store_long_func = os::atomic_store_long_bootstrap;
+
+void os::atomic_store_long_bootstrap(jlong val, volatile jlong* dest) {
+ // try to use the stub:
+ store_long_func_t* func = CAST_TO_FN_PTR(store_long_func_t*, StubRoutines::atomic_store_long_entry());
+
+ if (func != NULL) {
+ os::atomic_store_long_func = func;
+ return (*func)(val, dest);
+ }
+ assert(Threads::number_of_threads() == 0, "for bootstrap only");
+
+ *dest = val;
+}
+
+typedef jint atomic_add_func_t(jint add_value, volatile jint *dest);
+
+atomic_add_func_t * os::atomic_add_func = os::atomic_add_bootstrap;
+
+jint os::atomic_add_bootstrap(jint add_value, volatile jint *dest) {
+ atomic_add_func_t * func = CAST_TO_FN_PTR(atomic_add_func_t*,
+ StubRoutines::atomic_add_entry());
+ if (func != NULL) {
+ os::atomic_add_func = func;
+ return (*func)(add_value, dest);
+ }
+
+ jint old_value = *dest;
+ *dest = old_value + add_value;
+ return (old_value + add_value);
+}
+
+typedef jint atomic_xchg_func_t(jint exchange_value, volatile jint *dest);
+
+atomic_xchg_func_t * os::atomic_xchg_func = os::atomic_xchg_bootstrap;
+
+jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint *dest) {
+ atomic_xchg_func_t * func = CAST_TO_FN_PTR(atomic_xchg_func_t*,
+ StubRoutines::atomic_xchg_entry());
+ if (func != NULL) {
+ os::atomic_xchg_func = func;
+ return (*func)(exchange_value, dest);
+ }
+
+ jint old_value = *dest;
+ *dest = exchange_value;
+ return (old_value);
+}
+
+typedef jint cmpxchg_func_t(jint, jint, volatile jint*);
+
+cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap;
+
+jint os::atomic_cmpxchg_bootstrap(jint compare_value, jint exchange_value, volatile jint* dest) {
+ // try to use the stub:
+ cmpxchg_func_t* func = CAST_TO_FN_PTR(cmpxchg_func_t*, StubRoutines::atomic_cmpxchg_entry());
+
+ if (func != NULL) {
+ os::atomic_cmpxchg_func = func;
+ return (*func)(compare_value, exchange_value, dest);
+ }
+ assert(Threads::number_of_threads() == 0, "for bootstrap only");
+
+ jint old_value = *dest;
+ if (old_value == compare_value)
+ *dest = exchange_value;
+ return old_value;
+}
+
+#endif // !AARCH64
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
+
+int os::extra_bang_size_in_bytes() {
+ // ARM does not require an additional stack bang.
+ return 0;
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/os_linux_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_OS_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_VM_OS_LINUX_ARM_HPP
+
+#ifndef __thumb__
+ enum {
+ // Offset to add to frame::_fp when dealing with non-thumb C frames
+#ifdef AARCH64
+ C_frame_offset = 0,
+#else
+ C_frame_offset = -1,
+#endif
+ };
+#endif
+
+ static void setup_fpu();
+
+ static bool is_allocatable(size_t bytes);
+
+ // Used to register dynamic code cache area with the OS
+ // Note: Currently only used in 64 bit Windows implementations
+ static bool register_code_area(char *low, char *high) { return true; }
+
+#ifndef AARCH64
+ static jlong (*atomic_cmpxchg_long_func)(jlong compare_value,
+ jlong exchange_value,
+ volatile jlong *dest);
+
+ static jlong (*atomic_load_long_func)(volatile jlong*);
+
+ static void (*atomic_store_long_func)(jlong, volatile jlong*);
+
+ static jint (*atomic_add_func)(jint add_value, volatile jint *dest);
+
+ static jint (*atomic_xchg_func)(jint exchange_value, volatile jint *dest);
+
+ static jint (*atomic_cmpxchg_func)(jint compare_value,
+ jint exchange_value,
+ volatile jint *dest);
+
+ static jlong atomic_cmpxchg_long_bootstrap(jlong, jlong, volatile jlong*);
+
+ static jlong atomic_load_long_bootstrap(volatile jlong*);
+
+ static void atomic_store_long_bootstrap(jlong, volatile jlong*);
+
+ static jint atomic_add_bootstrap(jint add_value, volatile jint *dest);
+
+ static jint atomic_xchg_bootstrap(jint exchange_value, volatile jint *dest);
+
+ static jint atomic_cmpxchg_bootstrap(jint compare_value,
+ jint exchange_value,
+ volatile jint *dest);
+#endif // !AARCH64
+
+#endif // OS_CPU_LINUX_ARM_VM_OS_LINUX_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/prefetch_linux_arm.inline.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_PREFETCH_LINUX_ARM_INLINE_HPP
+#define OS_CPU_LINUX_ARM_VM_PREFETCH_LINUX_ARM_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+inline void Prefetch::read (void *loc, intx interval) {
+#ifdef AARCH64
+ __asm__ volatile ("prfm PLDL1KEEP, [%0]" : : "r" (loc));
+#else
+#if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__)
+ __asm__ volatile ("pld [%0]" : : "r" (loc));
+#endif
+#endif // AARCH64
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+#ifdef AARCH64
+ __asm__ volatile ("prfm PSTL1KEEP, [%0]" : : "r" (loc));
+#else
+ // Not available on 32-bit ARM (prior to ARMv7 with MP extensions)
+#endif // AARCH64
+}
+
+#endif // OS_CPU_LINUX_ARM_VM_PREFETCH_LINUX_ARM_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/thread_linux_arm.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/barrierSet.inline.hpp"
+#include "gc/shared/cardTableModRefBS.inline.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "memory/metaspaceShared.hpp"
+#include "runtime/frame.inline.hpp"
+
+void JavaThread::cache_global_variables() {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+
+ const bool allow_shared_alloc =
+ Universe::heap()->supports_inline_contig_alloc();
+
+ if (allow_shared_alloc) {
+ _heap_top_addr = (address) Universe::heap()->top_addr();
+ } else {
+ _heap_top_addr = NULL;
+ }
+
+ if (bs->is_a(BarrierSet::CardTableModRef)) {
+ _card_table_base = (address) (barrier_set_cast<CardTableModRefBS>(bs)->byte_map_base);
+ } else {
+ _card_table_base = NULL;
+ }
+
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+ void* ucontext, bool isInJava) {
+ assert(Thread::current() == this, "caller must be current thread");
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+ assert(this->is_Java_thread(), "must be JavaThread");
+
+ JavaThread* jt = (JavaThread *)this;
+
+ // If we have a last_Java_frame, then we should use it even if
+ // isInJava == true. It should be more reliable than ucontext info.
+ if (jt->has_last_Java_frame() AARCH64_ONLY(&& jt->last_Java_pc() != NULL)) {
+ *fr_addr = jt->pd_last_frame();
+ return true;
+ }
+
+ // Could be in a code section that plays with the stack, like
+ // MacroAssembler::verify_heapbase()
+ if (jt->in_top_frame_unsafe_section()) {
+ return false;
+ }
+
+ // At this point, we don't have a last_Java_frame, so
+ // we try to glean some information out of the ucontext
+ // if we were running Java code when SIGPROF came in.
+ if (isInJava) {
+ ucontext_t* uc = (ucontext_t*) ucontext;
+
+ intptr_t* ret_fp;
+ intptr_t* ret_sp;
+ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+ &ret_sp, &ret_fp);
+ if (addr.pc() == NULL || ret_sp == NULL ) {
+ // ucontext wasn't useful
+ return false;
+ }
+
+#if INCLUDE_CDS
+ if (UseSharedSpaces && MetaspaceShared::is_in_shared_region(addr.pc(), MetaspaceShared::md)) {
+ // In the middle of a trampoline call. Bail out for safety.
+ // This happens rarely so shouldn't affect profiling.
+ return false;
+ }
+#endif
+
+ frame ret_frame(ret_sp, ret_fp, addr.pc());
+ if (!ret_frame.safe_for_sender(jt)) {
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if NULL fp helps
+ frame ret_frame2(ret_sp, NULL, addr.pc());
+ if (!ret_frame2.safe_for_sender(jt)) {
+ // nothing else to try if the frame isn't good
+ return false;
+ }
+ ret_frame = ret_frame2;
+#else
+ // nothing else to try if the frame isn't good
+ return false;
+#endif /* COMPILER2 */
+ }
+ *fr_addr = ret_frame;
+ return true;
+ }
+
+ // nothing else to try
+ return false;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/thread_linux_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_THREAD_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_VM_THREAD_LINUX_ARM_HPP
+
+ private:
+ // The following thread-local variables replicate corresponding global variables.
+ // They are used for a quick access from compiled code via Rthread register.
+ address _heap_top_addr;
+ address _heap_lock_addr;
+ address _card_table_base;
+
+ void pd_initialize() {
+ _anchor.clear();
+ _in_top_frame_unsafe_section = NULL;
+ }
+
+ frame pd_last_frame() {
+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+#ifdef AARCH64
+ assert (_anchor.last_Java_pc() != NULL, "pc should be stored");
+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+#else
+ if (_anchor.last_Java_pc() != NULL) {
+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+ } else {
+ // This will pick up pc from sp
+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+ }
+#endif // AARCH64
+ }
+
+ public:
+ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); }
+ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); }
+ void set_last_Java_pc(address pc) { _anchor.set_last_Java_pc(pc); }
+
+ static ByteSize last_Java_fp_offset() {
+ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+ }
+
+ void set_base_of_stack_pointer(intptr_t* base_sp) {
+ // Nothing to do
+ }
+
+ intptr_t* base_of_stack_pointer() {
+ return NULL;
+ }
+
+ void record_base_of_stack_pointer() {
+ // Nothing to do
+ }
+
+ static ByteSize heap_top_addr_offset() { return byte_offset_of(JavaThread, _heap_top_addr); }
+ static ByteSize card_table_base_offset() { return byte_offset_of(JavaThread, _card_table_base); }
+
+private:
+ // Set to "this" if pd_get_top_frame should ignore this thread for now.
+ JavaThread *_in_top_frame_unsafe_section;
+
+public:
+ static ByteSize in_top_frame_unsafe_section_offset() { return byte_offset_of(JavaThread, _in_top_frame_unsafe_section); }
+ bool in_top_frame_unsafe_section() { return _in_top_frame_unsafe_section == this; }
+
+ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava);
+
+ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+public:
+
+ // These routines are only used on cpu architectures that
+ // have separate register stacks (Itanium).
+ static bool register_stack_overflow() { return false; }
+ static void enable_register_stack_guard() {}
+ static void disable_register_stack_guard() {}
+
+#endif // OS_CPU_LINUX_ARM_VM_THREAD_LINUX_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/vmStructs_linux_arm.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_ARM_VM_VMSTRUCTS_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_VM_VMSTRUCTS_LINUX_ARM_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
+ nonstatic_field(OSThread, _pthread_id, pthread_t)
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Posix Thread IDs */ \
+ /**********************/ \
+ \
+ declare_integer_type(OSThread::thread_id_t) \
+ declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_ARM_VM_VMSTRUCTS_LINUX_ARM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/vm_version_linux_arm_32.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_arm.hpp"
+
+# include <sys/utsname.h>
+
+// Use uname() to find the architecture version
+void VM_Version::get_os_cpu_info() {
+ struct utsname name;
+ static bool done = false;
+
+ // Support for multiple calls in the init phase
+ if (done) return;
+ done = true;
+
+ uname(&name);
+ if (strncmp(name.machine, "aarch64", 7) == 0) {
+ _arm_arch = 8;
+ } else if (strncmp(name.machine, "armv", 4) == 0 &&
+ name.machine[4] >= '5' && name.machine[4] <= '9') {
+ _arm_arch = (int)(name.machine[4] - '0');
+ }
+}
+
+// Make sure that _arm_arch is initialized so that any calls to OrderAccess will
+// use proper dmb instruction
+void VM_Version::early_initialize() {
+ get_os_cpu_info();
+}
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -33,7 +33,6 @@
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/codeBlob.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/compiledIC.hpp"
#include "code/pcDesc.hpp"
#include "code/scopeDesc.hpp"
@@ -189,52 +188,44 @@
int frame_size;
bool must_gc_arguments;
- if (!CodeCacheExtensions::skip_compiler_support()) {
- // bypass useless code generation
- Compilation::setup_code_buffer(&code, 0);
+ Compilation::setup_code_buffer(&code, 0);
- // create assembler for code generation
- StubAssembler* sasm = new StubAssembler(&code, name_for(id), id);
- // generate code for runtime stub
- oop_maps = generate_code_for(id, sasm);
- assert(oop_maps == NULL || sasm->frame_size() != no_frame_size,
- "if stub has an oop map it must have a valid frame size");
+ // create assembler for code generation
+ StubAssembler* sasm = new StubAssembler(&code, name_for(id), id);
+ // generate code for runtime stub
+ oop_maps = generate_code_for(id, sasm);
+ assert(oop_maps == NULL || sasm->frame_size() != no_frame_size,
+ "if stub has an oop map it must have a valid frame size");
#ifdef ASSERT
- // Make sure that stubs that need oopmaps have them
- switch (id) {
- // These stubs don't need to have an oopmap
- case dtrace_object_alloc_id:
- case g1_pre_barrier_slow_id:
- case g1_post_barrier_slow_id:
- case slow_subtype_check_id:
- case fpu2long_stub_id:
- case unwind_exception_id:
- case counter_overflow_id:
+ // Make sure that stubs that need oopmaps have them
+ switch (id) {
+ // These stubs don't need to have an oopmap
+ case dtrace_object_alloc_id:
+ case g1_pre_barrier_slow_id:
+ case g1_post_barrier_slow_id:
+ case slow_subtype_check_id:
+ case fpu2long_stub_id:
+ case unwind_exception_id:
+ case counter_overflow_id:
#if defined(SPARC) || defined(PPC32)
- case handle_exception_nofpu_id: // Unused on sparc
+ case handle_exception_nofpu_id: // Unused on sparc
#endif
- break;
+ break;
- // All other stubs should have oopmaps
- default:
- assert(oop_maps != NULL, "must have an oopmap");
- }
+ // All other stubs should have oopmaps
+ default:
+ assert(oop_maps != NULL, "must have an oopmap");
+ }
#endif
- // align so printing shows nop's instead of random code at the end (SimpleStubs are aligned)
- sasm->align(BytesPerWord);
- // make sure all code is in code buffer
- sasm->flush();
+ // align so printing shows nop's instead of random code at the end (SimpleStubs are aligned)
+ sasm->align(BytesPerWord);
+ // make sure all code is in code buffer
+ sasm->flush();
- frame_size = sasm->frame_size();
- must_gc_arguments = sasm->must_gc_arguments();
- } else {
- /* ignored values */
- oop_maps = NULL;
- frame_size = 0;
- must_gc_arguments = false;
- }
+ frame_size = sasm->frame_size();
+ must_gc_arguments = sasm->must_gc_arguments();
// create blob - distinguish a few special cases
CodeBlob* blob = RuntimeStub::new_runtime_stub(name_for(id),
&code,
--- a/hotspot/src/share/vm/code/codeBlob.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/code/codeBlob.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -25,7 +25,6 @@
#include "precompiled.hpp"
#include "code/codeBlob.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/relocInfo.hpp"
#include "compiler/disassembler.hpp"
#include "interpreter/bytecode.hpp"
@@ -228,7 +227,6 @@
BufferBlob* blob = NULL;
unsigned int size = sizeof(BufferBlob);
- CodeCacheExtensions::size_blob(name, &buffer_size);
// align the size to CodeEntryAlignment
size = CodeBlob::align_code_offset(size);
size += round_to(buffer_size, oopSize);
@@ -312,7 +310,6 @@
MethodHandlesAdapterBlob* blob = NULL;
unsigned int size = sizeof(MethodHandlesAdapterBlob);
- CodeCacheExtensions::size_blob("MethodHandles adapters", &buffer_size);
// align the size to CodeEntryAlignment
size = CodeBlob::align_code_offset(size);
size += round_to(buffer_size, oopSize);
@@ -354,13 +351,11 @@
{
RuntimeStub* stub = NULL;
ThreadInVMfromUnknown __tiv; // get to VM state in case we block on CodeCache_lock
- if (!CodeCacheExtensions::skip_code_generation()) {
- // bypass useless code generation
+ {
MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
unsigned int size = CodeBlob::allocation_size(cb, sizeof(RuntimeStub));
stub = new (size) RuntimeStub(stub_name, cb, size, frame_complete, frame_size, oop_maps, caller_must_gc_arguments);
}
- stub = (RuntimeStub*) CodeCacheExtensions::handle_generated_blob(stub, stub_name);
trace_new_stub(stub, "RuntimeStub - ", stub_name);
--- a/hotspot/src/share/vm/code/codeBlob.hpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/code/codeBlob.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -40,9 +40,8 @@
MethodProfiled = 1, // Execution level 2 and 3 (profiled) nmethods
NonNMethod = 2, // Non-nmethods like Buffers, Adapters and Runtime Stubs
All = 3, // All types (No code cache segmentation)
- Pregenerated = 4, // Special blobs, managed by CodeCacheExtensions
- AOT = 5, // AOT methods
- NumTypes = 6 // Number of CodeBlobTypes
+ AOT = 4, // AOT methods
+ NumTypes = 5 // Number of CodeBlobTypes
};
};
--- a/hotspot/src/share/vm/code/codeCache.hpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/code/codeCache.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -26,7 +26,6 @@
#define SHARE_VM_CODE_CODECACHE_HPP
#include "code/codeBlob.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/nmethod.hpp"
#include "memory/allocation.hpp"
#include "memory/heap.hpp"
--- a/hotspot/src/share/vm/code/codeCacheExtensions.hpp Mon Dec 19 00:49:34 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_HPP
-#define SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_HPP
-
-#include "memory/allocation.hpp"
-
-class CodeCacheExtensionsSteps: AllStatic {
-public:
- enum Step {
- // Support for optional fine grain initialization hooks
- // Note: these hooks must support refining the granularity
- // (e.g. adding intermediate steps in the ordered enum
- // if needed for future features)
- Start,
- VMVersion,
- StubRoutines1,
- Universe,
- TemplateInterpreter,
- Interpreter,
- StubRoutines2,
- InitGlobals,
- CreateVM,
- LastStep
- };
-};
-
-#include "code/codeCacheExtensions_ext.hpp"
-
-#endif // SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_HPP
--- a/hotspot/src/share/vm/code/codeCacheExtensions_ext.hpp Mon Dec 19 00:49:34 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_EXT_HPP
-#define SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_EXT_HPP
-
-#include "utilities/macros.hpp"
-#include "memory/allocation.hpp"
-#include "utilities/globalDefinitions.hpp"
-#include "interpreter/bytecodes.hpp"
-
-class AdapterHandlerEntry;
-class CodeBlob;
-class CodeBuffer;
-class InterpreterMacroAssembler;
-class Template;
-
-// All the methods defined here are placeholders for possible extensions.
-
-class CodeCacheExtensions: AllStatic {
- friend class CodeCacheDumper;
-
-public:
- // init both code saving and loading
- // Must be called very early, before any code is generated.
- static void initialize() {}
-
- // Check whether the generated interpreter will be saved.
- static bool saving_generated_interpreter() { return false; }
-
- // Check whether a pregenerated interpreter is used.
- static bool use_pregenerated_interpreter() { return false; }
-
- // Placeholder for additional VM initialization code
- static void complete_step(CodeCacheExtensionsSteps::Step phase) {}
-
- // Return false for newly generated code, on systems where it is not
- // executable.
- static bool is_executable(void *pc) { return true; }
-
- // Return whether dynamically generated code can be executable
- static bool support_dynamic_code() { return true; }
-
- // Skip new code generation when known to be useless.
- static bool skip_code_generation() { return false; }
-
- // Skip stubs used only for compiled code support.
- static bool skip_compiler_support() { return false; }
-
- // Ignore UseFastSignatureHandlers when returning false
- static bool support_fast_signature_handlers() { return true; }
-
- /////////////////////////
- // Handle generated code:
- // - allow newly generated code to be shared
- // - allow pregenerated code to be used in place of the newly generated one
- // (modifying pc).
- // - support remapping when doing both save and load
- // 'remap' can be set to false if the addresses handled are not referenced
- // from code generated later.
-
- // Associate a name to a generated codelet and possibly modify the pc
- // Note: use instead the specialized versions when they exist:
- // - handle_generated_blob for CodeBlob
- // - handle_generated_handler for SignatureHandlers
- // See also the optimized calls below that handle several PCs at once.
- static void handle_generated_pc(address &pc, const char *name) {}
-
- // Adds a safe definition of the codelet, for codelets used right after
- // generation (else we would need to immediately stop the JVM and convert
- // the generated code to executable format before being able to go further).
- static void handle_generated_pc(address &pc, const char *name, address default_entry) {}
-
- // Special cases
-
- // Special case for CodeBlobs, which may require blob specific actions.
- static CodeBlob* handle_generated_blob(CodeBlob* blob, const char *name = NULL) { return blob; }
-
- // Special case for Signature Handlers.
- static void handle_generated_handler(address &handler_start, const char *name, address handler_end) {}
-
- // Support for generating different variants of the interpreter
- // that can be dynamically selected after reload.
- //
- // - init_interpreter_assembler allows to configure the assembler for
- // the current variant
- //
- // - needs_other_interpreter_variant returns true as long as other
- // variants are needed.
- //
- // - skip_template_interpreter_entries returns true if new entries
- // need not be generated for this masm setup and this bytecode
- //
- // - completed_template_interpreter_entries is called after new
- // entries have been generated and installed, for any non skipped
- // bytecode.
- static void init_interpreter_assembler(InterpreterMacroAssembler* masm, CodeBuffer* code) {}
- static bool needs_other_interpreter_variant() { return false; }
- static bool skip_template_interpreter_entries(Bytecodes::Code code) { return false; }
- static void completed_template_interpreter_entries(InterpreterMacroAssembler* masm, Bytecodes::Code code) {}
-
- // Code size optimization. May optimize the requested size.
- static void size_blob(const char* name, int *updatable_size) {}
-
- // ergonomics
- static void set_ergonomics_flags() {}
-};
-
-#endif // SHARE_VM_CODE_CODE_CACHE_EXTENSIONS_EXT_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/code/relocInfo_ext.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/relocInfo.hpp"
+#include "code/relocInfo_ext.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "memory/universe.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#ifdef COMPILER1
+#include "c1/c1_globals.hpp"
+#endif
+
+address symbolic_Relocation::symbolic_value(symbolic_Relocation::symbolic_reference t) {
+ if (Universe::heap() == NULL) {
+ // the symbolic values are not needed so early
+ // (and most of them lead to errors if asked too early)
+ return NULL;
+ }
+ switch(t) {
+ case symbolic_Relocation::polling_page_reference: {
+ return os::get_polling_page();
+ }
+ case symbolic_Relocation::eden_top_reference: {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ return NULL;
+ }
+ return (address)Universe::heap()->top_addr();
+ }
+ case symbolic_Relocation::heap_end_reference: {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ return NULL;
+ }
+ return (address)Universe::heap()->end_addr();
+ }
+ case symbolic_Relocation::card_table_reference: {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ return (address)ct->byte_map_base;
+ }
+ case symbolic_Relocation::mark_bits_reference: {
+ return (address)Universe::verify_mark_bits();
+ }
+ case symbolic_Relocation::mark_mask_reference: {
+ return (address)Universe::verify_mark_mask();
+ }
+ case symbolic_Relocation::oop_bits_reference: {
+ return (address)Universe::verify_oop_bits();
+ }
+ case symbolic_Relocation::oop_mask_reference: {
+ return (address)Universe::verify_oop_mask();
+ }
+ case symbolic_Relocation::debug_string_reference: {
+ return (address)"<Lost debug string>";
+ }
+ default: {
+ // missing declaration
+ ShouldNotReachHere();
+ return NULL;
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/code/relocInfo_ext.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CODE_RELOCINFO_EXT_HPP
+#define SHARE_VM_CODE_RELOCINFO_EXT_HPP
+
+// symbolic_Relocation allows to anotate some addresses in the generated code.
+//
+// This class was initially defined using the last unused relocType. The
+// new version tries to limit the impact on open source code changes.
+//
+// Without compiled code support, symbolic_Relocation need not be a real
+// relocation. To avoid using the last unused relocType, the
+// symbolic_Relocation::spec(<any symbolic type>) has been replaced
+// by additional methods using directly the symbolic type.
+//
+// Note: the order of the arguments in some methods had to reversed
+// to avoid confusion between the relocType enum and the
+// symbolic_reference enum.
+class symbolic_Relocation : AllStatic {
+
+ public:
+ enum symbolic_reference {
+ card_table_reference,
+ eden_top_reference,
+ heap_end_reference,
+ polling_page_reference,
+ mark_bits_reference,
+ mark_mask_reference,
+ oop_bits_reference,
+ oop_mask_reference,
+ debug_string_reference,
+ last_symbolic_reference
+ };
+
+ // get the new value for a given symbolic type
+ static address symbolic_value(symbolic_reference t);
+};
+
+#endif // SHARE_VM_CODE_RELOCINFO_EXT_HPP
--- a/hotspot/src/share/vm/code/stubs.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/code/stubs.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -262,16 +262,3 @@
}
}
-// Fixup for pregenerated code
-void StubQueue::fix_buffer(address buffer, address queue_end, address buffer_end, int number_of_stubs) {
- const int extra_bytes = CodeEntryAlignment;
- _stub_buffer = buffer;
- _queue_begin = 0;
- _queue_end = queue_end - buffer;
- _number_of_stubs = number_of_stubs;
- int size = buffer_end - buffer;
- // Note: _buffer_limit must differ from _queue_end in the iteration loops
- // => add extra space at the end (preserving alignment for asserts) if needed
- if (buffer_end == queue_end) size += extra_bytes;
- _buffer_limit = _buffer_size = size;
-}
--- a/hotspot/src/share/vm/code/stubs.hpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/code/stubs.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -217,8 +217,6 @@
void verify(); // verifies the stub queue
void print(); // prints information about the stub queue
- // Fixup for pregenerated code
- void fix_buffer(address buffer, address queue_end, address buffer_end, int number_of_stubs);
};
#endif // SHARE_VM_CODE_STUBS_HPP
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -27,7 +27,6 @@
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/disassembler.hpp"
#include "gc/shared/collectedHeap.hpp"
@@ -1199,7 +1198,6 @@
ICache::invalidate_range(handler, insts_size);
_handler = handler + insts_size;
}
- CodeCacheExtensions::handle_generated_handler(handler, buffer->name(), _handler);
return handler;
}
@@ -1208,7 +1206,7 @@
// use slow signature handler if we can't do better
int handler_index = -1;
// check if we can use customized (fast) signature handler
- if (UseFastSignatureHandlers && CodeCacheExtensions::support_fast_signature_handlers() && method->size_of_parameters() <= Fingerprinter::max_size_of_parameters) {
+ if (UseFastSignatureHandlers && method->size_of_parameters() <= Fingerprinter::max_size_of_parameters) {
// use customized signature handler
MutexLocker mu(SignatureHandlerLibrary_lock);
// make sure data structure is initialized
@@ -1225,15 +1223,6 @@
round_to((intptr_t)_buffer, CodeEntryAlignment) - (address)_buffer;
CodeBuffer buffer((address)(_buffer + align_offset),
SignatureHandlerLibrary::buffer_size - align_offset);
- if (!CodeCacheExtensions::support_dynamic_code()) {
- // we need a name for the signature (for lookups or saving)
- const int SYMBOL_SIZE = 50;
- char *symbolName = NEW_RESOURCE_ARRAY(char, SYMBOL_SIZE);
- // support for named signatures
- jio_snprintf(symbolName, SYMBOL_SIZE,
- "native_" UINT64_FORMAT, fingerprint);
- buffer.set_name(symbolName);
- }
InterpreterRuntime::SignatureHandlerGenerator(method, &buffer).generate(fingerprint);
// copy into code heap
address handler = set_handler(&buffer);
@@ -1251,7 +1240,6 @@
fingerprint,
buffer.insts_size());
if (buffer.insts_size() > 0) {
- // buffer may be empty for pregenerated handlers
Disassembler::decode(handler, handler + buffer.insts_size());
}
#ifndef PRODUCT
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -23,7 +23,6 @@
*/
#include "precompiled.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/interp_masm.hpp"
@@ -52,29 +51,10 @@
TraceTime timer("Interpreter generation", TRACETIME_LOG(Info, startuptime));
int code_size = InterpreterCodeSize;
NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space
-#if INCLUDE_JVMTI
- if (CodeCacheExtensions::saving_generated_interpreter()) {
- // May requires several versions of the codelets.
- // Final size will automatically be optimized.
- code_size *= 2;
- }
-#endif
_code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,
"Interpreter");
TemplateInterpreterGenerator g(_code);
}
- if (PrintInterpreter) {
- if (CodeCacheExtensions::saving_generated_interpreter() &&
- CodeCacheExtensions::use_pregenerated_interpreter()) {
- ResourceMark rm;
- tty->print("Printing the newly generated interpreter first");
- print();
- tty->print("Printing the pregenerated interpreter next");
- }
- }
-
- // Install the pregenerated interpreter code before printing it
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::TemplateInterpreter);
if (PrintInterpreter) {
ResourceMark rm;
--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -23,7 +23,6 @@
*/
#include "precompiled.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/interp_masm.hpp"
@@ -55,219 +54,213 @@
};
void TemplateInterpreterGenerator::generate_all() {
- // Loop, in case we need several variants of the interpreter entries
- do {
- if (!CodeCacheExtensions::skip_code_generation()) {
- // bypass code generation when useless
- { CodeletMark cm(_masm, "slow signature handler");
- AbstractInterpreter::_slow_signature_handler = generate_slow_signature_handler();
- }
+ { CodeletMark cm(_masm, "slow signature handler");
+ AbstractInterpreter::_slow_signature_handler = generate_slow_signature_handler();
+ }
- { CodeletMark cm(_masm, "error exits");
- _unimplemented_bytecode = generate_error_exit("unimplemented bytecode");
- _illegal_bytecode_sequence = generate_error_exit("illegal bytecode sequence - method not verified");
- }
+ { CodeletMark cm(_masm, "error exits");
+ _unimplemented_bytecode = generate_error_exit("unimplemented bytecode");
+ _illegal_bytecode_sequence = generate_error_exit("illegal bytecode sequence - method not verified");
+ }
#ifndef PRODUCT
- if (TraceBytecodes) {
- CodeletMark cm(_masm, "bytecode tracing support");
- Interpreter::_trace_code =
- EntryPoint(
- generate_trace_code(btos),
- generate_trace_code(ztos),
- generate_trace_code(ctos),
- generate_trace_code(stos),
- generate_trace_code(atos),
- generate_trace_code(itos),
- generate_trace_code(ltos),
- generate_trace_code(ftos),
- generate_trace_code(dtos),
- generate_trace_code(vtos)
- );
- }
+ if (TraceBytecodes) {
+ CodeletMark cm(_masm, "bytecode tracing support");
+ Interpreter::_trace_code =
+ EntryPoint(
+ generate_trace_code(btos),
+ generate_trace_code(ztos),
+ generate_trace_code(ctos),
+ generate_trace_code(stos),
+ generate_trace_code(atos),
+ generate_trace_code(itos),
+ generate_trace_code(ltos),
+ generate_trace_code(ftos),
+ generate_trace_code(dtos),
+ generate_trace_code(vtos)
+ );
+ }
#endif // !PRODUCT
- { CodeletMark cm(_masm, "return entry points");
- const int index_size = sizeof(u2);
- for (int i = 0; i < Interpreter::number_of_return_entries; i++) {
- Interpreter::_return_entry[i] =
- EntryPoint(
- generate_return_entry_for(itos, i, index_size),
- generate_return_entry_for(itos, i, index_size),
- generate_return_entry_for(itos, i, index_size),
- generate_return_entry_for(itos, i, index_size),
- generate_return_entry_for(atos, i, index_size),
- generate_return_entry_for(itos, i, index_size),
- generate_return_entry_for(ltos, i, index_size),
- generate_return_entry_for(ftos, i, index_size),
- generate_return_entry_for(dtos, i, index_size),
- generate_return_entry_for(vtos, i, index_size)
- );
- }
- }
+ { CodeletMark cm(_masm, "return entry points");
+ const int index_size = sizeof(u2);
+ for (int i = 0; i < Interpreter::number_of_return_entries; i++) {
+ Interpreter::_return_entry[i] =
+ EntryPoint(
+ generate_return_entry_for(itos, i, index_size),
+ generate_return_entry_for(itos, i, index_size),
+ generate_return_entry_for(itos, i, index_size),
+ generate_return_entry_for(itos, i, index_size),
+ generate_return_entry_for(atos, i, index_size),
+ generate_return_entry_for(itos, i, index_size),
+ generate_return_entry_for(ltos, i, index_size),
+ generate_return_entry_for(ftos, i, index_size),
+ generate_return_entry_for(dtos, i, index_size),
+ generate_return_entry_for(vtos, i, index_size)
+ );
+ }
+ }
- { CodeletMark cm(_masm, "invoke return entry points");
- // These states are in order specified in TosState, except btos/ztos/ctos/stos are
- // really the same as itos since there is no top of stack optimization for these types
- const TosState states[] = {itos, itos, itos, itos, itos, ltos, ftos, dtos, atos, vtos, ilgl};
- const int invoke_length = Bytecodes::length_for(Bytecodes::_invokestatic);
- const int invokeinterface_length = Bytecodes::length_for(Bytecodes::_invokeinterface);
- const int invokedynamic_length = Bytecodes::length_for(Bytecodes::_invokedynamic);
+ { CodeletMark cm(_masm, "invoke return entry points");
+ // These states are in order specified in TosState, except btos/ztos/ctos/stos are
+ // really the same as itos since there is no top of stack optimization for these types
+ const TosState states[] = {itos, itos, itos, itos, itos, ltos, ftos, dtos, atos, vtos, ilgl};
+ const int invoke_length = Bytecodes::length_for(Bytecodes::_invokestatic);
+ const int invokeinterface_length = Bytecodes::length_for(Bytecodes::_invokeinterface);
+ const int invokedynamic_length = Bytecodes::length_for(Bytecodes::_invokedynamic);
- for (int i = 0; i < Interpreter::number_of_return_addrs; i++) {
- TosState state = states[i];
- assert(state != ilgl, "states array is wrong above");
- Interpreter::_invoke_return_entry[i] = generate_return_entry_for(state, invoke_length, sizeof(u2));
- Interpreter::_invokeinterface_return_entry[i] = generate_return_entry_for(state, invokeinterface_length, sizeof(u2));
- Interpreter::_invokedynamic_return_entry[i] = generate_return_entry_for(state, invokedynamic_length, sizeof(u4));
- }
- }
+ for (int i = 0; i < Interpreter::number_of_return_addrs; i++) {
+ TosState state = states[i];
+ assert(state != ilgl, "states array is wrong above");
+ Interpreter::_invoke_return_entry[i] = generate_return_entry_for(state, invoke_length, sizeof(u2));
+ Interpreter::_invokeinterface_return_entry[i] = generate_return_entry_for(state, invokeinterface_length, sizeof(u2));
+ Interpreter::_invokedynamic_return_entry[i] = generate_return_entry_for(state, invokedynamic_length, sizeof(u4));
+ }
+ }
- { CodeletMark cm(_masm, "earlyret entry points");
- Interpreter::_earlyret_entry =
- EntryPoint(
- generate_earlyret_entry_for(btos),
- generate_earlyret_entry_for(ztos),
- generate_earlyret_entry_for(ctos),
- generate_earlyret_entry_for(stos),
- generate_earlyret_entry_for(atos),
- generate_earlyret_entry_for(itos),
- generate_earlyret_entry_for(ltos),
- generate_earlyret_entry_for(ftos),
- generate_earlyret_entry_for(dtos),
- generate_earlyret_entry_for(vtos)
- );
- }
+ { CodeletMark cm(_masm, "earlyret entry points");
+ Interpreter::_earlyret_entry =
+ EntryPoint(
+ generate_earlyret_entry_for(btos),
+ generate_earlyret_entry_for(ztos),
+ generate_earlyret_entry_for(ctos),
+ generate_earlyret_entry_for(stos),
+ generate_earlyret_entry_for(atos),
+ generate_earlyret_entry_for(itos),
+ generate_earlyret_entry_for(ltos),
+ generate_earlyret_entry_for(ftos),
+ generate_earlyret_entry_for(dtos),
+ generate_earlyret_entry_for(vtos)
+ );
+ }
- { CodeletMark cm(_masm, "deoptimization entry points");
- for (int i = 0; i < Interpreter::number_of_deopt_entries; i++) {
- Interpreter::_deopt_entry[i] =
- EntryPoint(
- generate_deopt_entry_for(itos, i),
- generate_deopt_entry_for(itos, i),
- generate_deopt_entry_for(itos, i),
- generate_deopt_entry_for(itos, i),
- generate_deopt_entry_for(atos, i),
- generate_deopt_entry_for(itos, i),
- generate_deopt_entry_for(ltos, i),
- generate_deopt_entry_for(ftos, i),
- generate_deopt_entry_for(dtos, i),
- generate_deopt_entry_for(vtos, i)
- );
- }
+ { CodeletMark cm(_masm, "deoptimization entry points");
+ for (int i = 0; i < Interpreter::number_of_deopt_entries; i++) {
+ Interpreter::_deopt_entry[i] =
+ EntryPoint(
+ generate_deopt_entry_for(itos, i),
+ generate_deopt_entry_for(itos, i),
+ generate_deopt_entry_for(itos, i),
+ generate_deopt_entry_for(itos, i),
+ generate_deopt_entry_for(atos, i),
+ generate_deopt_entry_for(itos, i),
+ generate_deopt_entry_for(ltos, i),
+ generate_deopt_entry_for(ftos, i),
+ generate_deopt_entry_for(dtos, i),
+ generate_deopt_entry_for(vtos, i)
+ );
+ }
+ }
+
+ { CodeletMark cm(_masm, "result handlers for native calls");
+ // The various result converter stublets.
+ int is_generated[Interpreter::number_of_result_handlers];
+ memset(is_generated, 0, sizeof(is_generated));
+
+ for (int i = 0; i < Interpreter::number_of_result_handlers; i++) {
+ BasicType type = types[i];
+ if (!is_generated[Interpreter::BasicType_as_index(type)]++) {
+ Interpreter::_native_abi_to_tosca[Interpreter::BasicType_as_index(type)] = generate_result_handler_for(type);
}
-
- { CodeletMark cm(_masm, "result handlers for native calls");
- // The various result converter stublets.
- int is_generated[Interpreter::number_of_result_handlers];
- memset(is_generated, 0, sizeof(is_generated));
-
- for (int i = 0; i < Interpreter::number_of_result_handlers; i++) {
- BasicType type = types[i];
- if (!is_generated[Interpreter::BasicType_as_index(type)]++) {
- Interpreter::_native_abi_to_tosca[Interpreter::BasicType_as_index(type)] = generate_result_handler_for(type);
- }
- }
- }
+ }
+ }
- { CodeletMark cm(_masm, "continuation entry points");
- Interpreter::_continuation_entry =
- EntryPoint(
- generate_continuation_for(btos),
- generate_continuation_for(ztos),
- generate_continuation_for(ctos),
- generate_continuation_for(stos),
- generate_continuation_for(atos),
- generate_continuation_for(itos),
- generate_continuation_for(ltos),
- generate_continuation_for(ftos),
- generate_continuation_for(dtos),
- generate_continuation_for(vtos)
- );
- }
+ { CodeletMark cm(_masm, "continuation entry points");
+ Interpreter::_continuation_entry =
+ EntryPoint(
+ generate_continuation_for(btos),
+ generate_continuation_for(ztos),
+ generate_continuation_for(ctos),
+ generate_continuation_for(stos),
+ generate_continuation_for(atos),
+ generate_continuation_for(itos),
+ generate_continuation_for(ltos),
+ generate_continuation_for(ftos),
+ generate_continuation_for(dtos),
+ generate_continuation_for(vtos)
+ );
+ }
- { CodeletMark cm(_masm, "safepoint entry points");
- Interpreter::_safept_entry =
- EntryPoint(
- generate_safept_entry_for(btos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(ztos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(ctos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(stos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(atos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(itos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(ltos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(ftos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(dtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
- generate_safept_entry_for(vtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint))
- );
- }
+ { CodeletMark cm(_masm, "safepoint entry points");
+ Interpreter::_safept_entry =
+ EntryPoint(
+ generate_safept_entry_for(btos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(ztos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(ctos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(stos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(atos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(itos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(ltos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(ftos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(dtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)),
+ generate_safept_entry_for(vtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint))
+ );
+ }
- { CodeletMark cm(_masm, "exception handling");
- // (Note: this is not safepoint safe because thread may return to compiled code)
- generate_throw_exception();
- }
+ { CodeletMark cm(_masm, "exception handling");
+ // (Note: this is not safepoint safe because thread may return to compiled code)
+ generate_throw_exception();
+ }
- { CodeletMark cm(_masm, "throw exception entrypoints");
- Interpreter::_throw_ArrayIndexOutOfBoundsException_entry = generate_ArrayIndexOutOfBounds_handler("java/lang/ArrayIndexOutOfBoundsException");
- Interpreter::_throw_ArrayStoreException_entry = generate_klass_exception_handler("java/lang/ArrayStoreException" );
- Interpreter::_throw_ArithmeticException_entry = generate_exception_handler("java/lang/ArithmeticException" , "/ by zero");
- Interpreter::_throw_ClassCastException_entry = generate_ClassCastException_handler();
- Interpreter::_throw_NullPointerException_entry = generate_exception_handler("java/lang/NullPointerException" , NULL );
- Interpreter::_throw_StackOverflowError_entry = generate_StackOverflowError_handler();
- }
+ { CodeletMark cm(_masm, "throw exception entrypoints");
+ Interpreter::_throw_ArrayIndexOutOfBoundsException_entry = generate_ArrayIndexOutOfBounds_handler("java/lang/ArrayIndexOutOfBoundsException");
+ Interpreter::_throw_ArrayStoreException_entry = generate_klass_exception_handler("java/lang/ArrayStoreException" );
+ Interpreter::_throw_ArithmeticException_entry = generate_exception_handler("java/lang/ArithmeticException" , "/ by zero");
+ Interpreter::_throw_ClassCastException_entry = generate_ClassCastException_handler();
+ Interpreter::_throw_NullPointerException_entry = generate_exception_handler("java/lang/NullPointerException" , NULL );
+ Interpreter::_throw_StackOverflowError_entry = generate_StackOverflowError_handler();
+ }
#define method_entry(kind) \
- { CodeletMark cm(_masm, "method entry point (kind = " #kind ")"); \
- Interpreter::_entry_table[Interpreter::kind] = generate_method_entry(Interpreter::kind); \
- Interpreter::update_cds_entry_table(Interpreter::kind); \
- }
+ { CodeletMark cm(_masm, "method entry point (kind = " #kind ")"); \
+ Interpreter::_entry_table[Interpreter::kind] = generate_method_entry(Interpreter::kind); \
+ Interpreter::update_cds_entry_table(Interpreter::kind); \
+ }
- // all non-native method kinds
- method_entry(zerolocals)
- method_entry(zerolocals_synchronized)
- method_entry(empty)
- method_entry(accessor)
- method_entry(abstract)
- method_entry(java_lang_math_sin )
- method_entry(java_lang_math_cos )
- method_entry(java_lang_math_tan )
- method_entry(java_lang_math_abs )
- method_entry(java_lang_math_sqrt )
- method_entry(java_lang_math_log )
- method_entry(java_lang_math_log10)
- method_entry(java_lang_math_exp )
- method_entry(java_lang_math_pow )
- method_entry(java_lang_math_fmaF )
- method_entry(java_lang_math_fmaD )
- method_entry(java_lang_ref_reference_get)
+ // all non-native method kinds
+ method_entry(zerolocals)
+ method_entry(zerolocals_synchronized)
+ method_entry(empty)
+ method_entry(accessor)
+ method_entry(abstract)
+ method_entry(java_lang_math_sin )
+ method_entry(java_lang_math_cos )
+ method_entry(java_lang_math_tan )
+ method_entry(java_lang_math_abs )
+ method_entry(java_lang_math_sqrt )
+ method_entry(java_lang_math_log )
+ method_entry(java_lang_math_log10)
+ method_entry(java_lang_math_exp )
+ method_entry(java_lang_math_pow )
+ method_entry(java_lang_math_fmaF )
+ method_entry(java_lang_math_fmaD )
+ method_entry(java_lang_ref_reference_get)
- AbstractInterpreter::initialize_method_handle_entries();
+ AbstractInterpreter::initialize_method_handle_entries();
- // all native method kinds (must be one contiguous block)
- Interpreter::_native_entry_begin = Interpreter::code()->code_end();
- method_entry(native)
- method_entry(native_synchronized)
- Interpreter::_native_entry_end = Interpreter::code()->code_end();
+ // all native method kinds (must be one contiguous block)
+ Interpreter::_native_entry_begin = Interpreter::code()->code_end();
+ method_entry(native)
+ method_entry(native_synchronized)
+ Interpreter::_native_entry_end = Interpreter::code()->code_end();
- method_entry(java_util_zip_CRC32_update)
- method_entry(java_util_zip_CRC32_updateBytes)
- method_entry(java_util_zip_CRC32_updateByteBuffer)
- method_entry(java_util_zip_CRC32C_updateBytes)
- method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
+ method_entry(java_util_zip_CRC32_update)
+ method_entry(java_util_zip_CRC32_updateBytes)
+ method_entry(java_util_zip_CRC32_updateByteBuffer)
+ method_entry(java_util_zip_CRC32C_updateBytes)
+ method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
- method_entry(java_lang_Float_intBitsToFloat);
- method_entry(java_lang_Float_floatToRawIntBits);
- method_entry(java_lang_Double_longBitsToDouble);
- method_entry(java_lang_Double_doubleToRawLongBits);
+ method_entry(java_lang_Float_intBitsToFloat);
+ method_entry(java_lang_Float_floatToRawIntBits);
+ method_entry(java_lang_Double_longBitsToDouble);
+ method_entry(java_lang_Double_doubleToRawLongBits);
#undef method_entry
- // Bytecodes
- set_entry_points_for_all_bytes();
- }
- } while (CodeCacheExtensions::needs_other_interpreter_variant());
+ // Bytecodes
+ set_entry_points_for_all_bytes();
// installation of code in other places in the runtime
// (ExcutableCodeManager calls not needed to copy the entries)
@@ -314,9 +307,6 @@
void TemplateInterpreterGenerator::set_entry_points(Bytecodes::Code code) {
- if (CodeCacheExtensions::skip_template_interpreter_entries(code)) {
- return;
- }
CodeletMark cm(_masm, Bytecodes::name(code), code);
// initialize entry points
assert(_unimplemented_bytecode != NULL, "should have been generated before");
@@ -347,7 +337,6 @@
EntryPoint entry(bep, zep, cep, sep, aep, iep, lep, fep, dep, vep);
Interpreter::_normal_table.set_entry(code, entry);
Interpreter::_wentry_point[code] = wep;
- CodeCacheExtensions::completed_template_interpreter_entries(_masm, code);
}
--- a/hotspot/src/share/vm/memory/heap.hpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/memory/heap.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -81,7 +81,6 @@
class CodeHeap : public CHeapObj<mtCode> {
friend class VMStructs;
- friend class PregeneratedCodeHeap;
protected:
VirtualSpace _memory; // the memory holding the blocks
VirtualSpace _segmap; // the memory holding the segment map
--- a/hotspot/src/share/vm/memory/virtualspace.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/memory/virtualspace.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -23,7 +23,6 @@
*/
#include "precompiled.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "memory/virtualspace.hpp"
@@ -592,7 +591,7 @@
ReservedCodeSpace::ReservedCodeSpace(size_t r_size,
size_t rs_align,
bool large) :
- ReservedSpace(r_size, rs_align, large, /*executable*/ CodeCacheExtensions::support_dynamic_code()) {
+ ReservedSpace(r_size, rs_align, large, /*executable*/ true) {
MemTracker::record_virtual_memory_type((address)base(), mtCode);
}
--- a/hotspot/src/share/vm/precompiled/precompiled.hpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/precompiled/precompiled.hpp Mon Dec 19 12:39:01 2016 -0500
@@ -66,7 +66,6 @@
# include "classfile/vmSymbols.hpp"
# include "code/codeBlob.hpp"
# include "code/codeCache.hpp"
-# include "code/codeCacheExtensions.hpp"
# include "code/compressedStream.hpp"
# include "code/debugInfo.hpp"
# include "code/debugInfoRec.hpp"
--- a/hotspot/src/share/vm/prims/methodHandles.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/prims/methodHandles.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -26,7 +26,6 @@
#include "classfile/javaClasses.inline.hpp"
#include "classfile/stringTable.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/dependencyContext.hpp"
#include "compiler/compileBroker.hpp"
#include "interpreter/interpreter.hpp"
@@ -94,7 +93,6 @@
StubCodeMark mark(this, "MethodHandle::interpreter_entry", vmIntrinsics::name_at(iid));
address entry = MethodHandles::generate_method_handle_interpreter_entry(_masm, iid);
if (entry != NULL) {
- CodeCacheExtensions::handle_generated_pc(entry, vmIntrinsics::name_at(iid));
Interpreter::set_entry_for_kind(mk, entry);
}
// If the entry is not set, it will throw AbstractMethodError.
--- a/hotspot/src/share/vm/runtime/arguments.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -28,7 +28,6 @@
#include "classfile/moduleEntry.hpp"
#include "classfile/stringTable.hpp"
#include "classfile/symbolTable.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "gc/shared/cardTableRS.hpp"
#include "gc/shared/genCollectedHeap.hpp"
#include "gc/shared/referenceProcessor.hpp"
@@ -1882,7 +1881,6 @@
#endif // _LP64
#endif // !ZERO
- CodeCacheExtensions::set_ergonomics_flags();
}
void Arguments::set_parallel_gc_flags() {
--- a/hotspot/src/share/vm/runtime/init.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/init.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -25,7 +25,6 @@
#include "precompiled.hpp"
#include "classfile/stringTable.hpp"
#include "classfile/symbolTable.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/icBuffer.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "interpreter/bytecodes.hpp"
@@ -105,20 +104,15 @@
classLoader_init1();
compilationPolicy_init();
codeCache_init();
- CodeCacheExtensions::initialize();
VM_Version_init();
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::VMVersion);
os_init_globals();
stubRoutines_init1();
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::StubRoutines1);
jint status = universe_init(); // dependent on codeCache_init and
// stubRoutines_init1 and metaspace_init.
if (status != JNI_OK)
return status;
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::Universe);
interpreter_init(); // before any methods loaded
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::Interpreter);
invocationCounter_init(); // before any methods loaded
marksweep_init();
accessFlags_init();
@@ -148,7 +142,6 @@
javaClasses_init(); // must happen after vtable initialization
stubRoutines_init2(); // note: StubRoutines need 2-phase init
MethodHandles::generate_adapters();
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::StubRoutines2);
#if INCLUDE_NMT
// Solaris stack is walkable only after stubRoutines are set up.
@@ -162,7 +155,6 @@
CommandLineFlags::printFlags(tty, false, PrintFlagsRanges);
}
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::InitGlobals);
return JNI_OK;
}
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -29,7 +29,6 @@
#include "classfile/vmSymbols.hpp"
#include "code/codeCache.hpp"
#include "code/compiledIC.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/scopeDesc.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/abstractCompiler.hpp"
@@ -2580,27 +2579,15 @@
if (_adapters != NULL) return;
_adapters = new AdapterHandlerTable();
- if (!CodeCacheExtensions::skip_compiler_support()) {
- // Create a special handler for abstract methods. Abstract methods
- // are never compiled so an i2c entry is somewhat meaningless, but
- // throw AbstractMethodError just in case.
- // Pass wrong_method_abstract for the c2i transitions to return
- // AbstractMethodError for invalid invocations.
- address wrong_method_abstract = SharedRuntime::get_handle_wrong_method_abstract_stub();
- _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL),
- StubRoutines::throw_AbstractMethodError_entry(),
- wrong_method_abstract, wrong_method_abstract);
- } else {
- // Adapters are not supposed to be used.
- // Generate a special one to cause an error if used (and store this
- // singleton in place of the useless _abstract_method_error adapter).
- address entry = (address) &unexpected_adapter_call;
- _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL),
- entry,
- entry,
- entry);
-
- }
+ // Create a special handler for abstract methods. Abstract methods
+ // are never compiled so an i2c entry is somewhat meaningless, but
+ // throw AbstractMethodError just in case.
+ // Pass wrong_method_abstract for the c2i transitions to return
+ // AbstractMethodError for invalid invocations.
+ address wrong_method_abstract = SharedRuntime::get_handle_wrong_method_abstract_stub();
+ _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL),
+ StubRoutines::throw_AbstractMethodError_entry(),
+ wrong_method_abstract, wrong_method_abstract);
}
AdapterHandlerEntry* AdapterHandlerLibrary::new_entry(AdapterFingerPrint* fingerprint,
@@ -2651,17 +2638,6 @@
// make sure data structure is initialized
initialize();
- // during dump time, always generate adapters, even if the
- // compiler has been turned off.
- if (!DumpSharedSpaces && CodeCacheExtensions::skip_compiler_support()) {
- // adapters are useless and should not be used, including the
- // abstract_method_handler. However, some callers check that
- // an adapter was installed.
- // Return the singleton adapter, stored into _abstract_method_handler
- // and modified to cause an error if we ever call it.
- return _abstract_method_handler;
- }
-
if (method->is_abstract()) {
return _abstract_method_handler;
}
--- a/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -26,7 +26,6 @@
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "compiler/disassembler.hpp"
#include "oops/oop.inline.hpp"
#include "prims/forte.hpp"
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -24,7 +24,6 @@
#include "precompiled.hpp"
#include "asm/codeBuffer.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "memory/resourceArea.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/interfaceSupport.hpp"
@@ -204,12 +203,6 @@
// simple tests of generated arraycopy functions
static void test_arraycopy_func(address func, int alignment) {
- if (CodeCacheExtensions::use_pregenerated_interpreter() || !CodeCacheExtensions::is_executable(func)) {
- // Exit safely if stubs were generated but cannot be used.
- // Also excluding pregenerated interpreter since the code may depend on
- // some registers being properly initialized (for instance Rthread)
- return;
- }
int v = 0xcc;
int v2 = 0x11;
jlong lbuffer[8];
--- a/hotspot/src/share/vm/runtime/thread.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/thread.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -29,7 +29,6 @@
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "code/scopeDesc.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compileTask.hpp"
@@ -3842,8 +3841,6 @@
}
}
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::CreateVM);
-
create_vm_timer.end();
#ifdef ASSERT
_vm_complete = true;
--- a/hotspot/src/share/vm/runtime/vm_operations.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/vm_operations.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -26,7 +26,6 @@
#include "classfile/symbolTable.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/codeCache.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "compiler/compileBroker.hpp"
#include "gc/shared/isGCActiveMark.hpp"
#include "logging/log.hpp"
@@ -390,7 +389,6 @@
Thread * VM_Exit::_shutdown_thread = NULL;
int VM_Exit::set_vm_exited() {
- CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::LastStep);
Thread * thr_cur = Thread::current();
--- a/hotspot/src/share/vm/runtime/vm_version.cpp Mon Dec 19 00:49:34 2016 +0100
+++ b/hotspot/src/share/vm/runtime/vm_version.cpp Mon Dec 19 12:39:01 2016 -0500
@@ -23,7 +23,6 @@
*/
#include "precompiled.hpp"
-#include "code/codeCacheExtensions.hpp"
#include "logging/log.hpp"
#include "memory/universe.hpp"
#include "oops/oop.inline.hpp"
@@ -127,9 +126,6 @@
const char* Abstract_VM_Version::vm_info_string() {
- if (CodeCacheExtensions::use_pregenerated_interpreter()) {
- return "interpreted mode, pregenerated";
- }
switch (Arguments::mode()) {
case Arguments::_int:
return UseSharedSpaces ? "interpreted mode, sharing" : "interpreted mode";