--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,3339 @@
+/*
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "ci/ciEnv.hpp"
+#include "code/nativeInst.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc/g1/heapRegion.hpp"
+#endif
+
+// Implementation of AddressLiteral
+
+void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
+ switch (rtype) {
+ case relocInfo::oop_type:
+ // Oops are a special case. Normally they would be their own section
+ // but in cases like icBuffer they are literals in the code stream that
+ // we don't have a section for. We use none so that we get a literal address
+ // which is always patchable.
+ break;
+ case relocInfo::external_word_type:
+ _rspec = external_word_Relocation::spec(_target);
+ break;
+ case relocInfo::internal_word_type:
+ _rspec = internal_word_Relocation::spec(_target);
+ break;
+ case relocInfo::opt_virtual_call_type:
+ _rspec = opt_virtual_call_Relocation::spec();
+ break;
+ case relocInfo::static_call_type:
+ _rspec = static_call_Relocation::spec();
+ break;
+ case relocInfo::runtime_call_type:
+ _rspec = runtime_call_Relocation::spec();
+ break;
+ case relocInfo::poll_type:
+ case relocInfo::poll_return_type:
+ _rspec = Relocation::spec_simple(rtype);
+ break;
+ case relocInfo::none:
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+}
+
+// Initially added to the Assembler interface as a pure virtual:
+// RegisterConstant delayed_value(..)
+// for:
+// 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
+// this was subsequently modified to its present name and return type
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset) {
+ ShouldNotReachHere();
+ return RegisterOrConstant(-1);
+}
+
+
+#ifdef AARCH64
+// Note: ARM32 version is OS dependent
+void MacroAssembler::breakpoint(AsmCondition cond) {
+ if (cond == al) {
+ brk();
+ } else {
+ Label L;
+ b(L, inverse(cond));
+ brk();
+ bind(L);
+ }
+}
+#endif // AARCH64
+
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+ Register vtable_index,
+ Register method_result) {
+ const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
+ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+ add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
+ ldr(method_result, Address(recv_klass, base_offset));
+}
+
+
+// Simplified, combined version, good for typical uses.
+// Falls through on failure.
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Register temp_reg3,
+ Label& L_success) {
+ Label L_failure;
+ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
+ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
+ bind(L_failure);
+};
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp_reg2,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path) {
+
+ assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
+ const Register super_check_offset = temp_reg2;
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ Address super_check_offset_addr(super_klass, sco_offset);
+
+ // If the pointers are equal, we are done (e.g., String[] elements).
+ // This self-check enables sharing of secondary supertype arrays among
+ // non-primary types such as array-of-interface. Otherwise, each such
+ // type would need its own customized SSA.
+ // We move this check to the front of the fast path because many
+ // type checks are in fact trivially successful in this manner,
+ // so we get a nicely predicted branch right at the start of the check.
+ cmp(sub_klass, super_klass);
+ b(*L_success, eq);
+
+ // Check the supertype display:
+ ldr_u32(super_check_offset, super_check_offset_addr);
+
+ Address super_check_addr(sub_klass, super_check_offset);
+ ldr(temp_reg, super_check_addr);
+ cmp(super_klass, temp_reg); // load displayed supertype
+
+ // This check has worked decisively for primary supers.
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
+ // This works in the same check above because of a tricky aliasing
+ // between the super_cache and the primary super display elements.
+ // (The 'super_check_addr' can address either, as the case requires.)
+ // Note that the cache is updated below if it does not help us find
+ // what we need immediately.
+ // So if it was a primary super, we can just fail immediately.
+ // Otherwise, it's the slow path for us (no success at this point).
+
+ b(*L_success, eq);
+ cmp_32(super_check_offset, sc_offset);
+ if (L_failure == &L_fallthrough) {
+ b(*L_slow_path, eq);
+ } else {
+ b(*L_failure, ne);
+ if (L_slow_path != &L_fallthrough) {
+ b(*L_slow_path);
+ }
+ }
+
+ bind(L_fallthrough);
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp2_reg,
+ Register temp3_reg,
+ Label* L_success,
+ Label* L_failure,
+ bool set_cond_codes) {
+#ifdef AARCH64
+ NOT_IMPLEMENTED();
+#else
+ // Note: if used by code that expects a register to be 0 on success,
+ // this register must be temp_reg and set_cond_codes must be true
+
+ Register saved_reg = noreg;
+
+ // get additional tmp registers
+ if (temp3_reg == noreg) {
+ saved_reg = temp3_reg = LR;
+ push(saved_reg);
+ }
+
+ assert(temp2_reg != noreg, "need all the temporary registers");
+ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
+
+ Register cmp_temp = temp_reg;
+ Register scan_temp = temp3_reg;
+ Register count_temp = temp2_reg;
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ // a couple of useful fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ Address secondary_supers_addr(sub_klass, ss_offset);
+ Address super_cache_addr( sub_klass, sc_offset);
+
+#ifndef PRODUCT
+ inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
+#endif
+
+ // We will consult the secondary-super array.
+ ldr(scan_temp, Address(sub_klass, ss_offset));
+
+ assert(! UseCompressedOops, "search_key must be the compressed super_klass");
+ // else search_key is the
+ Register search_key = super_klass;
+
+ // Load the array length.
+ ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
+ add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
+
+ add(count_temp, count_temp, 1);
+
+ Label L_loop, L_setnz_and_fail, L_fail;
+
+ // Top of search loop
+ bind(L_loop);
+ // Notes:
+ // scan_temp starts at the array elements
+ // count_temp is 1+size
+ subs(count_temp, count_temp, 1);
+ if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
+ // direct jump to L_failure if failed and no cleanup needed
+ b(*L_failure, eq); // not found and
+ } else {
+ b(L_fail, eq); // not found in the array
+ }
+
+ // Load next super to check
+ // In the array of super classes elements are pointer sized.
+ int element_size = wordSize;
+ ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
+
+ // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+ subs(cmp_temp, cmp_temp, search_key);
+
+ // A miss means we are NOT a subtype and need to keep looping
+ b(L_loop, ne);
+
+ // Falling out the bottom means we found a hit; we ARE a subtype
+
+ // Note: temp_reg/cmp_temp is already 0 and flag Z is set
+
+ // Success. Cache the super we found and proceed in triumph.
+ str(super_klass, Address(sub_klass, sc_offset));
+
+ if (saved_reg != noreg) {
+ // Return success
+ pop(saved_reg);
+ }
+
+ b(*L_success);
+
+ bind(L_fail);
+ // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
+ if (set_cond_codes) {
+ movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
+ }
+ if (saved_reg != noreg) {
+ pop(saved_reg);
+ }
+ if (L_failure != &L_fallthrough) {
+ b(*L_failure);
+ }
+
+ bind(L_fallthrough);
+#endif
+}
+
+// Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
+Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
+ assert_different_registers(params_base, params_count);
+ add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
+ return Address(tmp, -Interpreter::stackElementSize);
+}
+
+
+void MacroAssembler::align(int modulus) {
+ while (offset() % modulus != 0) {
+ nop();
+ }
+}
+
+int MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ bool save_last_java_pc,
+ Register tmp) {
+ int pc_offset;
+ if (last_java_fp != noreg) {
+ // optional
+ str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
+ _fp_saved = true;
+ } else {
+ _fp_saved = false;
+ }
+ if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
+#ifdef AARCH64
+ pc_offset = mov_pc_to(tmp);
+ str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
+#else
+ str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
+ pc_offset = offset() + VM_Version::stored_pc_adjustment();
+#endif
+ _pc_saved = true;
+ } else {
+ _pc_saved = false;
+ pc_offset = -1;
+ }
+ // According to comment in javaFrameAnchorm SP must be saved last, so that other
+ // entries are valid when SP is set.
+
+ // However, this is probably not a strong constrainst since for instance PC is
+ // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
+ // we now write the fields in the expected order but we have not added a StoreStore
+ // barrier.
+
+ // XXX: if the ordering is really important, PC should always be saved (without forgetting
+ // to update oop_map offsets) and a StoreStore barrier might be needed.
+
+ if (last_java_sp == noreg) {
+ last_java_sp = SP; // always saved
+ }
+#ifdef AARCH64
+ if (last_java_sp == SP) {
+ mov(tmp, SP);
+ str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ } else {
+ str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ }
+#else
+ str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
+#endif
+
+ return pc_offset; // for oopmaps
+}
+
+void MacroAssembler::reset_last_Java_frame(Register tmp) {
+ const Register Rzero = zero_register(tmp);
+ str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
+ if (_fp_saved) {
+ str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
+ }
+ if (_pc_saved) {
+ str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
+ }
+}
+
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
+ assert(number_of_arguments >= 0, "cannot have negative number of arguments");
+ assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
+
+#ifndef AARCH64
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but is not worth
+ // optimizing for the few platforms where R9 is scratched.
+ push(RegisterSet(R4) | R9ifScratched);
+ mov(R4, SP);
+ bic(SP, SP, StackAlignmentInBytes - 1);
+#endif // AARCH64
+ call(entry_point, relocInfo::runtime_call_type);
+#ifndef AARCH64
+ mov(SP, R4);
+ pop(RegisterSet(R4) | R9ifScratched);
+#endif // AARCH64
+}
+
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+ assert(number_of_arguments >= 0, "cannot have negative number of arguments");
+ assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
+
+ const Register tmp = Rtemp;
+ assert_different_registers(oop_result, tmp);
+
+ set_last_Java_frame(SP, FP, true, tmp);
+
+#ifdef ASSERT
+ AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
+#endif // ASSERT
+
+#ifndef AARCH64
+#if R9_IS_SCRATCHED
+ // Safer to save R9 here since callers may have been written
+ // assuming R9 survives. This is suboptimal but is not worth
+ // optimizing for the few platforms where R9 is scratched.
+
+ // Note: cannot save R9 above the saved SP (some calls expect for
+ // instance the Java stack top at the saved SP)
+ // => once saved (with set_last_Java_frame), decrease SP before rounding to
+ // ensure the slot at SP will be free for R9).
+ sub(SP, SP, 4);
+ bic(SP, SP, StackAlignmentInBytes - 1);
+ str(R9, Address(SP, 0));
+#else
+ bic(SP, SP, StackAlignmentInBytes - 1);
+#endif // R9_IS_SCRATCHED
+#endif
+
+ mov(R0, Rthread);
+ call(entry_point, relocInfo::runtime_call_type);
+
+#ifndef AARCH64
+#if R9_IS_SCRATCHED
+ ldr(R9, Address(SP, 0));
+#endif
+ ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
+#endif
+
+ reset_last_Java_frame(tmp);
+
+ // C++ interp handles this in the interpreter
+ check_and_handle_popframe();
+ check_and_handle_earlyret();
+
+ if (check_exceptions) {
+ // check for pending exceptions
+ ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
+#ifdef AARCH64
+ Label L;
+ cbz(tmp, L);
+ mov_pc_to(Rexception_pc);
+ b(StubRoutines::forward_exception_entry());
+ bind(L);
+#else
+ cmp(tmp, 0);
+ mov(Rexception_pc, PC, ne);
+ b(StubRoutines::forward_exception_entry(), ne);
+#endif // AARCH64
+ }
+
+ // get oop result if there is one and reset the value in the thread
+ if (oop_result->is_valid()) {
+ get_vm_result(oop_result, tmp);
+ }
+}
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
+ call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ assert (arg_2 == R2, "fixed register for arg_2");
+ call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+ assert (arg_1 == R1, "fixed register for arg_1");
+ assert (arg_2 == R2, "fixed register for arg_2");
+ assert (arg_3 == R3, "fixed register for arg_3");
+ call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+// Not used on ARM
+ Unimplemented();
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+ // Not used on ARM
+ Unimplemented();
+}
+
+// Raw call, without saving/restoring registers, exception handling, etc.
+// Mainly used from various stubs.
+void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
+ const Register tmp = Rtemp; // Rtemp free since scratched by call
+ set_last_Java_frame(SP, FP, true, tmp);
+#if R9_IS_SCRATCHED
+ if (save_R9_if_scratched) {
+ // Note: Saving also R10 for alignment.
+ push(RegisterSet(R9, R10));
+ }
+#endif
+ mov(R0, Rthread);
+ call(entry_point, relocInfo::runtime_call_type);
+#if R9_IS_SCRATCHED
+ if (save_R9_if_scratched) {
+ pop(RegisterSet(R9, R10));
+ }
+#endif
+ reset_last_Java_frame(tmp);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point) {
+ call_VM_leaf_helper(entry_point, 0);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ call_VM_leaf_helper(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ call_VM_leaf_helper(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ assert (arg_3 == R2, "fixed register for arg_3");
+ call_VM_leaf_helper(entry_point, 3);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
+ assert (arg_1 == R0, "fixed register for arg_1");
+ assert (arg_2 == R1, "fixed register for arg_2");
+ assert (arg_3 == R2, "fixed register for arg_3");
+ assert (arg_4 == R3, "fixed register for arg_4");
+ call_VM_leaf_helper(entry_point, 4);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
+ assert_different_registers(oop_result, tmp);
+ ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
+ str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
+ verify_oop(oop_result);
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
+ assert_different_registers(metadata_result, tmp);
+ ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
+ str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
+}
+
+void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
+ if (arg2.is_register()) {
+ add(dst, arg1, arg2.as_register());
+ } else {
+ add(dst, arg1, arg2.as_constant());
+ }
+}
+
+void MacroAssembler::add_slow(Register rd, Register rn, int c) {
+#ifdef AARCH64
+ if (c == 0) {
+ if (rd != rn) {
+ mov(rd, rn);
+ }
+ return;
+ }
+ if (c < 0) {
+ sub_slow(rd, rn, -c);
+ return;
+ }
+ if (c > right_n_bits(24)) {
+ guarantee(rd != rn, "no large add_slow with only one register");
+ mov_slow(rd, c);
+ add(rd, rn, rd);
+ } else {
+ int lo = c & right_n_bits(12);
+ int hi = (c >> 12) & right_n_bits(12);
+ if (lo != 0) {
+ add(rd, rn, lo, lsl0);
+ }
+ if (hi != 0) {
+ add(rd, (lo == 0) ? rn : rd, hi, lsl12);
+ }
+ }
+#else
+ // This function is used in compiler for handling large frame offsets
+ if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
+ return sub(rd, rn, (-c));
+ }
+ int low = c & 0x3fc;
+ if (low != 0) {
+ add(rd, rn, low);
+ rn = rd;
+ }
+ if (c & ~0x3fc) {
+ assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
+ add(rd, rn, c & ~0x3fc);
+ } else if (rd != rn) {
+ assert(c == 0, "");
+ mov(rd, rn); // need to generate at least one move!
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
+#ifdef AARCH64
+ if (c <= 0) {
+ add_slow(rd, rn, -c);
+ return;
+ }
+ if (c > right_n_bits(24)) {
+ guarantee(rd != rn, "no large sub_slow with only one register");
+ mov_slow(rd, c);
+ sub(rd, rn, rd);
+ } else {
+ int lo = c & right_n_bits(12);
+ int hi = (c >> 12) & right_n_bits(12);
+ if (lo != 0) {
+ sub(rd, rn, lo, lsl0);
+ }
+ if (hi != 0) {
+ sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
+ }
+ }
+#else
+ // This function is used in compiler for handling large frame offsets
+ if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
+ return add(rd, rn, (-c));
+ }
+ int low = c & 0x3fc;
+ if (low != 0) {
+ sub(rd, rn, low);
+ rn = rd;
+ }
+ if (c & ~0x3fc) {
+ assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
+ sub(rd, rn, c & ~0x3fc);
+ } else if (rd != rn) {
+ assert(c == 0, "");
+ mov(rd, rn); // need to generate at least one move!
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_slow(Register rd, address addr) {
+ // do *not* call the non relocated mov_related_address
+ mov_slow(rd, (intptr_t)addr);
+}
+
+void MacroAssembler::mov_slow(Register rd, const char *str) {
+ mov_slow(rd, (intptr_t)str);
+}
+
+#ifdef AARCH64
+
+// Common code for mov_slow and instr_count_for_mov_slow.
+// Returns number of instructions of mov_slow pattern,
+// generating it if non-null MacroAssembler is given.
+int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
+ // This code pattern is matched in NativeIntruction::is_mov_slow.
+ // Update it at modifications.
+
+ const intx mask = right_n_bits(16);
+ // 1 movz instruction
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ if ((c & ~(mask << base_shift)) == 0) {
+ if (masm != NULL) {
+ masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
+ }
+ return 1;
+ }
+ }
+ // 1 movn instruction
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ if (((~c) & ~(mask << base_shift)) == 0) {
+ if (masm != NULL) {
+ masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
+ }
+ return 1;
+ }
+ }
+ // 1 orr instruction
+ {
+ LogicalImmediate imm(c, false);
+ if (imm.is_encoded()) {
+ if (masm != NULL) {
+ masm->orr(rd, ZR, imm);
+ }
+ return 1;
+ }
+ }
+ // 1 movz/movn + up to 3 movk instructions
+ int zeroes = 0;
+ int ones = 0;
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ int part = (c >> base_shift) & mask;
+ if (part == 0) {
+ ++zeroes;
+ } else if (part == mask) {
+ ++ones;
+ }
+ }
+ int def_bits = 0;
+ if (ones > zeroes) {
+ def_bits = mask;
+ }
+ int inst_count = 0;
+ for (int base_shift = 0; base_shift < 64; base_shift += 16) {
+ int part = (c >> base_shift) & mask;
+ if (part != def_bits) {
+ if (masm != NULL) {
+ if (inst_count > 0) {
+ masm->movk(rd, part, base_shift);
+ } else {
+ if (def_bits == 0) {
+ masm->movz(rd, part, base_shift);
+ } else {
+ masm->movn(rd, ~part & mask, base_shift);
+ }
+ }
+ }
+ inst_count++;
+ }
+ }
+ assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
+ return inst_count;
+}
+
+void MacroAssembler::mov_slow(Register rd, intptr_t c) {
+#ifdef ASSERT
+ int off = offset();
+#endif
+ (void) mov_slow_helper(rd, c, this);
+ assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
+}
+
+// Counts instructions generated by mov_slow(rd, c).
+int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
+ return mov_slow_helper(noreg, c, NULL);
+}
+
+int MacroAssembler::instr_count_for_mov_slow(address c) {
+ return mov_slow_helper(noreg, (intptr_t)c, NULL);
+}
+
+#else
+
+void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
+ if (AsmOperand::is_rotated_imm(c)) {
+ mov(rd, c, cond);
+ } else if (AsmOperand::is_rotated_imm(~c)) {
+ mvn(rd, ~c, cond);
+ } else if (VM_Version::supports_movw()) {
+ movw(rd, c & 0xffff, cond);
+ if ((unsigned int)c >> 16) {
+ movt(rd, (unsigned int)c >> 16, cond);
+ }
+ } else {
+ // Find first non-zero bit
+ int shift = 0;
+ while ((c & (3 << shift)) == 0) {
+ shift += 2;
+ }
+ // Put the least significant part of the constant
+ int mask = 0xff << shift;
+ mov(rd, c & mask, cond);
+ // Add up to 3 other parts of the constant;
+ // each of them can be represented as rotated_imm
+ if (c & (mask << 8)) {
+ orr(rd, rd, c & (mask << 8), cond);
+ }
+ if (c & (mask << 16)) {
+ orr(rd, rd, c & (mask << 16), cond);
+ }
+ if (c & (mask << 24)) {
+ orr(rd, rd, c & (mask << 24), cond);
+ }
+ }
+}
+
+#endif // AARCH64
+
+void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
+#ifdef AARCH64
+ bool patchable
+#else
+ AsmCondition cond
+#endif
+ ) {
+
+ if (o == NULL) {
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+ mov(rd, ZR);
+#else
+ mov(rd, 0, cond);
+#endif
+ return;
+ }
+
+ if (oop_index == 0) {
+ oop_index = oop_recorder()->allocate_oop_index(o);
+ }
+ relocate(oop_Relocation::spec(oop_index));
+
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+ ldr(rd, pc());
+#else
+ if (VM_Version::supports_movw()) {
+ movw(rd, 0, cond);
+ movt(rd, 0, cond);
+ } else {
+ ldr(rd, Address(PC), cond);
+ // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
+ nop();
+ }
+#endif
+}
+
+void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
+ if (o == NULL) {
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+#endif
+ mov(rd, 0);
+ return;
+ }
+
+ if (metadata_index == 0) {
+ metadata_index = oop_recorder()->allocate_metadata_index(o);
+ }
+ relocate(metadata_Relocation::spec(metadata_index));
+
+#ifdef AARCH64
+ if (patchable) {
+ nop();
+ }
+#ifdef COMPILER2
+ if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
+ mov_slow(rd, (address)o);
+ return;
+ }
+#endif
+ ldr(rd, pc());
+#else
+ if (VM_Version::supports_movw()) {
+ movw(rd, ((int)o) & 0xffff);
+ movt(rd, (unsigned int)o >> 16);
+ } else {
+ ldr(rd, Address(PC));
+ // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
+ nop();
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
+ Label skip_constant;
+ union {
+ jfloat f;
+ jint i;
+ } accessor;
+ accessor.f = c;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
+ Label L;
+ ldr_s(fd, target(L));
+ b(skip_constant);
+ bind(L);
+ emit_int32(accessor.i);
+ bind(skip_constant);
+#else
+ flds(fd, Address(PC), cond);
+ b(skip_constant);
+ emit_int32(accessor.i);
+ bind(skip_constant);
+#endif // AARCH64
+}
+
+void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
+ Label skip_constant;
+ union {
+ jdouble d;
+ jint i[2];
+ } accessor;
+ accessor.d = c;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - try to optimize loading of double constants with fmov
+ Label L;
+ ldr_d(fd, target(L));
+ b(skip_constant);
+ align(wordSize);
+ bind(L);
+ emit_int32(accessor.i[0]);
+ emit_int32(accessor.i[1]);
+ bind(skip_constant);
+#else
+ fldd(fd, Address(PC), cond);
+ b(skip_constant);
+ emit_int32(accessor.i[0]);
+ emit_int32(accessor.i[1]);
+ bind(skip_constant);
+#endif // AARCH64
+}
+
+void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
+ intptr_t addr = (intptr_t) address_of_global;
+#ifdef AARCH64
+ assert((addr & 0x3) == 0, "address should be aligned");
+
+ // FIXME: TODO
+ if (false && page_reachable_from_cache(address_of_global)) {
+ assert(false,"TODO: relocate");
+ //relocate();
+ adrp(reg, address_of_global);
+ ldrsw(reg, Address(reg, addr & 0xfff));
+ } else {
+ mov_slow(reg, addr & ~0x3fff);
+ ldrsw(reg, Address(reg, addr & 0x3fff));
+ }
+#else
+ mov_slow(reg, addr & ~0xfff);
+ ldr(reg, Address(reg, addr & 0xfff));
+#endif
+}
+
+void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
+#ifdef AARCH64
+ intptr_t addr = (intptr_t) address_of_global;
+ assert ((addr & 0x7) == 0, "address should be aligned");
+ mov_slow(reg, addr & ~0x7fff);
+ ldr(reg, Address(reg, addr & 0x7fff));
+#else
+ ldr_global_s32(reg, address_of_global);
+#endif
+}
+
+void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
+ intptr_t addr = (intptr_t) address_of_global;
+ mov_slow(reg, addr & ~0xfff);
+ ldrb(reg, Address(reg, addr & 0xfff));
+}
+
+void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
+#ifdef AARCH64
+ switch (bits) {
+ case 8: uxtb(rd, rn); break;
+ case 16: uxth(rd, rn); break;
+ case 32: mov_w(rd, rn); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ if (bits <= 8) {
+ andr(rd, rn, (1 << bits) - 1);
+ } else if (bits >= 24) {
+ bic(rd, rn, -1 << bits);
+ } else {
+ mov(rd, AsmOperand(rn, lsl, 32 - bits));
+ mov(rd, AsmOperand(rd, lsr, 32 - bits));
+ }
+#endif
+}
+
+void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
+#ifdef AARCH64
+ switch (bits) {
+ case 8: sxtb(rd, rn); break;
+ case 16: sxth(rd, rn); break;
+ case 32: sxtw(rd, rn); break;
+ default: ShouldNotReachHere();
+ }
+#else
+ mov(rd, AsmOperand(rn, lsl, 32 - bits));
+ mov(rd, AsmOperand(rd, asr, 32 - bits));
+#endif
+}
+
+#ifndef AARCH64
+
+void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmCondition cond) {
+ if (rd_lo != rn_hi) {
+ if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
+ if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
+ } else if (rd_hi != rn_lo) {
+ if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
+ if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
+ } else {
+ eor(rd_lo, rd_hi, rd_lo, cond);
+ eor(rd_hi, rd_lo, rd_hi, cond);
+ eor(rd_lo, rd_hi, rd_lo, cond);
+ }
+}
+
+void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, Register count) {
+ Register tmp;
+ if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
+ tmp = rd_lo;
+ } else {
+ tmp = rd_hi;
+ }
+ assert_different_registers(tmp, count, rn_lo, rn_hi);
+
+ subs(tmp, count, 32);
+ if (shift == lsl) {
+ assert_different_registers(rd_hi, rn_lo);
+ assert_different_registers(count, rd_hi);
+ mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
+ rsb(tmp, count, 32, mi);
+ if (rd_hi == rn_hi) {
+ mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
+ orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
+ } else {
+ mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
+ orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
+ }
+ mov(rd_lo, AsmOperand(rn_lo, shift, count));
+ } else {
+ assert_different_registers(rd_lo, rn_hi);
+ assert_different_registers(rd_lo, count);
+ mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
+ rsb(tmp, count, 32, mi);
+ if (rd_lo == rn_lo) {
+ mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
+ orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
+ } else {
+ mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
+ orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
+ }
+ mov(rd_hi, AsmOperand(rn_hi, shift, count));
+ }
+}
+
+void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
+ Register rn_lo, Register rn_hi,
+ AsmShift shift, int count) {
+ assert(count != 0 && (count & ~63) == 0, "must be");
+
+ if (shift == lsl) {
+ assert_different_registers(rd_hi, rn_lo);
+ if (count >= 32) {
+ mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
+ mov(rd_lo, 0);
+ } else {
+ mov(rd_hi, AsmOperand(rn_hi, lsl, count));
+ orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
+ mov(rd_lo, AsmOperand(rn_lo, lsl, count));
+ }
+ } else {
+ assert_different_registers(rd_lo, rn_hi);
+ if (count >= 32) {
+ if (count == 32) {
+ mov(rd_lo, rn_hi);
+ } else {
+ mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
+ }
+ if (shift == asr) {
+ mov(rd_hi, AsmOperand(rn_hi, asr, 0));
+ } else {
+ mov(rd_hi, 0);
+ }
+ } else {
+ mov(rd_lo, AsmOperand(rn_lo, lsr, count));
+ orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
+ mov(rd_hi, AsmOperand(rn_hi, shift, count));
+ }
+ }
+}
+#endif // !AARCH64
+
+void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
+ // This code pattern is matched in NativeIntruction::skip_verify_oop.
+ // Update it at modifications.
+ if (!VerifyOops) return;
+
+ char buffer[64];
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+ block_comment(buffer);
+ }
+#endif
+ const char* msg_buffer = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
+ msg_buffer = code_string(ss.as_string());
+ }
+
+ save_all_registers();
+
+ if (reg != R2) {
+ mov(R2, reg); // oop to verify
+ }
+ mov(R1, SP); // register save area
+
+ Label done;
+ InlinedString Lmsg(msg_buffer);
+ ldr_literal(R0, Lmsg); // message
+
+ // call indirectly to solve generation ordering problem
+ ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
+ call(Rtemp);
+
+ restore_all_registers();
+
+ b(done);
+#ifdef COMPILER2
+ int off = offset();
+#endif
+ bind_literal(Lmsg);
+#ifdef COMPILER2
+ if (offset() - off == 1 * wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+ bind(done);
+}
+
+void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
+ if (!VerifyOops) return;
+
+ const char* msg_buffer = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ if ((addr.base() == SP) && (addr.index()==noreg)) {
+ ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
+ } else {
+ ss.print("verify_oop_addr: %s", s);
+ }
+ ss.print(" (%s:%d)", file, line);
+ msg_buffer = code_string(ss.as_string());
+ }
+
+ int push_size = save_all_registers();
+
+ if (addr.base() == SP) {
+ // computes an addr that takes into account the push
+ if (addr.index() != noreg) {
+ Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
+ add(new_base, SP, push_size);
+ addr = addr.rebase(new_base);
+ } else {
+ addr = addr.plus_disp(push_size);
+ }
+ }
+
+ ldr(R2, addr); // oop to verify
+ mov(R1, SP); // register save area
+
+ Label done;
+ InlinedString Lmsg(msg_buffer);
+ ldr_literal(R0, Lmsg); // message
+
+ // call indirectly to solve generation ordering problem
+ ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
+ call(Rtemp);
+
+ restore_all_registers();
+
+ b(done);
+ bind_literal(Lmsg);
+ bind(done);
+}
+
+void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
+ if (needs_explicit_null_check(offset)) {
+#ifdef AARCH64
+ ldr(ZR, Address(reg));
+#else
+ assert_different_registers(reg, tmp);
+ if (tmp == noreg) {
+ tmp = Rtemp;
+ assert((! Thread::current()->is_Compiler_thread()) ||
+ (! (ciEnv::current()->task() == NULL)) ||
+ (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
+ "Rtemp not available in C2"); // explicit tmp register required
+ // XXX: could we mark the code buffer as not compatible with C2 ?
+ }
+ ldr(tmp, Address(reg));
+#endif
+ }
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ b(slow_case);
+ return;
+ }
+
+ CollectedHeap* ch = Universe::heap();
+
+ const Register top_addr = tmp1;
+ const Register heap_end = tmp2;
+
+ if (size_expression.is_register()) {
+ assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
+ } else {
+ assert_different_registers(obj, obj_end, top_addr, heap_end);
+ }
+
+ bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+ if (load_const) {
+ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
+ } else {
+ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
+ }
+ // Calculate new heap_top by adding the size of the object
+ Label retry;
+ bind(retry);
+
+#ifdef AARCH64
+ ldxr(obj, top_addr);
+#else
+ ldr(obj, Address(top_addr));
+#endif // AARCH64
+
+ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
+ add_rc(obj_end, obj, size_expression);
+ // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
+ cmp(obj_end, obj);
+ b(slow_case, lo);
+ // Update heap_top if allocation succeeded
+ cmp(obj_end, heap_end);
+ b(slow_case, hi);
+
+#ifdef AARCH64
+ stxr(heap_end/*scratched*/, obj_end, top_addr);
+ cbnz_w(heap_end, retry);
+#else
+ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
+ b(retry, ne);
+#endif // AARCH64
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ const Register tlab_end = tmp1;
+ assert_different_registers(obj, obj_end, tlab_end);
+
+ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
+ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
+ add_rc(obj_end, obj, size_expression);
+ cmp(obj_end, tlab_end);
+ b(slow_case, hi);
+ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+}
+
+void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ Label& try_eden, Label& slow_case) {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ b(slow_case);
+ return;
+ }
+
+ InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr());
+ Label discard_tlab, do_refill;
+ ldr(top, Address(Rthread, JavaThread::tlab_top_offset()));
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
+
+ // Calculate amount of free space
+ sub(tmp1, tmp1, top);
+ // Retain tlab and allocate in shared space
+ // if the amount of free space in tlab is too large to discard
+ cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize));
+ b(discard_tlab, ge);
+
+ // Increment waste limit to prevent getting stuck on this slow path
+ mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment());
+ add(tmp2, tmp2, tmp3);
+ str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
+ if (TLABStats) {
+ ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
+ add_32(tmp2, tmp2, 1);
+ str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
+ }
+ b(try_eden);
+ bind_literal(intArrayKlass_addr);
+
+ bind(discard_tlab);
+ if (TLABStats) {
+ ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
+ ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
+ add_32(tmp2, tmp2, 1);
+ add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize));
+ str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
+ str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
+ }
+ // If tlab is currently allocated (top or end != null)
+ // then fill [top, end + alignment_reserve) with array object
+ cbz(top, do_refill);
+
+ // Set up the mark word
+ mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
+ str(tmp2, Address(top, oopDesc::mark_offset_in_bytes()));
+ // Set klass to intArrayKlass and the length to the remaining space
+ ldr_literal(tmp2, intArrayKlass_addr);
+ add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() -
+ typeArrayOopDesc::header_size(T_INT) * HeapWordSize);
+ Register klass = tmp2;
+ ldr(klass, Address(tmp2));
+ logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint)
+ str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes()));
+ store_klass(klass, top); // blows klass:
+ klass = noreg;
+
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset()));
+ sub(tmp1, top, tmp1); // size of tlab's allocated portion
+ incr_allocated_bytes(tmp1, tmp2);
+
+ bind(do_refill);
+ // Refill the tlab with an eden allocation
+ ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset()));
+ logical_shift_left(tmp4, tmp1, LogHeapWordSize);
+ eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case);
+ str(top, Address(Rthread, JavaThread::tlab_start_offset()));
+ str(top, Address(Rthread, JavaThread::tlab_top_offset()));
+
+#ifdef ASSERT
+ // Verify that tmp1 contains tlab_end
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset()));
+ add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize));
+ cmp(tmp1, tmp2);
+ breakpoint(ne);
+#endif
+
+ sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+ str(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
+
+ if (ZeroTLAB) {
+ // clobbers start and tmp
+ // top must be preserved!
+ add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+ ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset()));
+ zero_memory(tmp2, tmp1, tmp3);
+ }
+}
+
+// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
+void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
+ Label loop;
+ const Register ptr = start;
+
+#ifdef AARCH64
+ // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
+ const Register size = tmp;
+ Label remaining, done;
+
+ sub(size, end, start);
+
+#ifdef ASSERT
+ { Label L;
+ tst(size, wordSize - 1);
+ b(L, eq);
+ stop("size is not a multiple of wordSize");
+ bind(L);
+ }
+#endif // ASSERT
+
+ subs(size, size, wordSize);
+ b(remaining, le);
+
+ // Zero by 2 words per iteration.
+ bind(loop);
+ subs(size, size, 2*wordSize);
+ stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
+ b(loop, gt);
+
+ bind(remaining);
+ b(done, ne);
+ str(ZR, Address(ptr));
+ bind(done);
+#else
+ mov(tmp, 0);
+ bind(loop);
+ cmp(ptr, end);
+ str(tmp, Address(ptr, wordSize, post_indexed), lo);
+ b(loop, lo);
+#endif // AARCH64
+}
+
+void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
+#ifdef AARCH64
+ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ add_rc(tmp, tmp, size_in_bytes);
+ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+#else
+ // Bump total bytes allocated by this thread
+ Label done;
+
+ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ adds(tmp, tmp, size_in_bytes);
+ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
+ b(done, cc);
+
+ // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
+ // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
+ // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
+ Register low, high;
+ // Select ether R0/R1 or R2/R3
+
+ if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
+ low = R2;
+ high = R3;
+ } else {
+ low = R0;
+ high = R1;
+ }
+ push(RegisterSet(low, high));
+
+ ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ adds(low, low, size_in_bytes);
+ adc(high, high, 0);
+ strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+
+ pop(RegisterSet(low, high));
+
+ bind(done);
+#endif // AARCH64
+}
+
+void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
+ // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
+ if (UseStackBanging) {
+ const int page_size = os::vm_page_size();
+
+ sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
+ strb(R0, Address(tmp));
+#ifdef AARCH64
+ for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
+ sub(tmp, tmp, page_size);
+ strb(R0, Address(tmp));
+ }
+#else
+ for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
+ strb(R0, Address(tmp, -0xff0, pre_indexed));
+ }
+#endif // AARCH64
+ }
+}
+
+void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
+ if (UseStackBanging) {
+ Label loop;
+
+ mov(tmp, SP);
+ add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
+#ifdef AARCH64
+ sub(tmp, tmp, Rsize);
+ bind(loop);
+ subs(Rsize, Rsize, os::vm_page_size());
+ strb(ZR, Address(tmp, Rsize));
+#else
+ bind(loop);
+ subs(Rsize, Rsize, 0xff0);
+ strb(R0, Address(tmp, -0xff0, pre_indexed));
+#endif // AARCH64
+ b(loop, hi);
+ }
+}
+
+void MacroAssembler::stop(const char* msg) {
+ // This code pattern is matched in NativeIntruction::is_stop.
+ // Update it at modifications.
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ block_comment("stop");
+ }
+#endif
+
+ InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
+ InlinedString Lmsg(msg);
+
+ // save all registers for further inspection
+ save_all_registers();
+
+ ldr_literal(R0, Lmsg); // message
+ mov(R1, SP); // register save area
+
+#ifdef AARCH64
+ ldr_literal(Rtemp, Ldebug);
+ br(Rtemp);
+#else
+ ldr_literal(PC, Ldebug); // call MacroAssembler::debug
+#endif // AARCH64
+
+#if defined(COMPILER2) && defined(AARCH64)
+ int off = offset();
+#endif
+ bind_literal(Lmsg);
+ bind_literal(Ldebug);
+#if defined(COMPILER2) && defined(AARCH64)
+ if (offset() - off == 2 * wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+}
+
+void MacroAssembler::warn(const char* msg) {
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ block_comment("warn");
+ }
+#endif
+
+ InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
+ InlinedString Lmsg(msg);
+ Label done;
+
+ int push_size = save_caller_save_registers();
+
+#ifdef AARCH64
+ // TODO-AARCH64 - get rid of extra debug parameters
+ mov(R1, LR);
+ mov(R2, FP);
+ add(R3, SP, push_size);
+#endif
+
+ ldr_literal(R0, Lmsg); // message
+ ldr_literal(LR, Lwarn); // call warning
+
+ call(LR);
+
+ restore_caller_save_registers();
+
+ b(done);
+ bind_literal(Lmsg);
+ bind_literal(Lwarn);
+ bind(done);
+}
+
+
+int MacroAssembler::save_all_registers() {
+ // This code pattern is matched in NativeIntruction::is_save_all_registers.
+ // Update it at modifications.
+#ifdef AARCH64
+ const Register tmp = Rtemp;
+ raw_push(R30, ZR);
+ for (int i = 28; i >= 0; i -= 2) {
+ raw_push(as_Register(i), as_Register(i+1));
+ }
+ mov_pc_to(tmp);
+ str(tmp, Address(SP, 31*wordSize));
+ ldr(tmp, Address(SP, tmp->encoding()*wordSize));
+ return 32*wordSize;
+#else
+ push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
+ return 15*wordSize;
+#endif // AARCH64
+}
+
+void MacroAssembler::restore_all_registers() {
+#ifdef AARCH64
+ for (int i = 0; i <= 28; i += 2) {
+ raw_pop(as_Register(i), as_Register(i+1));
+ }
+ raw_pop(R30, ZR);
+#else
+ pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers
+ add(SP, SP, wordSize); // discard saved PC
+#endif // AARCH64
+}
+
+int MacroAssembler::save_caller_save_registers() {
+#ifdef AARCH64
+ for (int i = 0; i <= 16; i += 2) {
+ raw_push(as_Register(i), as_Register(i+1));
+ }
+ raw_push(R18, LR);
+ return 20*wordSize;
+#else
+#if R9_IS_SCRATCHED
+ // Save also R10 to preserve alignment
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
+ return 8*wordSize;
+#else
+ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+ return 6*wordSize;
+#endif
+#endif // AARCH64
+}
+
+void MacroAssembler::restore_caller_save_registers() {
+#ifdef AARCH64
+ raw_pop(R18, LR);
+ for (int i = 16; i >= 0; i -= 2) {
+ raw_pop(as_Register(i), as_Register(i+1));
+ }
+#else
+#if R9_IS_SCRATCHED
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
+#else
+ pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
+#endif
+#endif // AARCH64
+}
+
+void MacroAssembler::debug(const char* msg, const intx* registers) {
+ // In order to get locks to work, we need to fake a in_VM state
+ JavaThread* thread = JavaThread::current();
+ thread->set_thread_state(_thread_in_vm);
+
+ if (ShowMessageBoxOnError) {
+ ttyLocker ttyl;
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+ BytecodeCounter::print();
+ }
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
+#ifdef AARCH64
+ // saved registers: R0-R30, PC
+ const int nregs = 32;
+#else
+ // saved registers: R0-R12, LR, PC
+ const int nregs = 15;
+ const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
+#endif // AARCH64
+
+ for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
+ tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
+ }
+
+#ifdef AARCH64
+ tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
+#endif // AARCH64
+
+ // derive original SP value from the address of register save area
+ tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs]));
+ }
+ BREAKPOINT;
+ } else {
+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+ }
+ assert(false, "DEBUG MESSAGE: %s", msg);
+ fatal("%s", msg); // returning from MacroAssembler::debug is not supported
+}
+
+void MacroAssembler::unimplemented(const char* what) {
+ const char* buf = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("unimplemented: %s", what);
+ buf = code_string(ss.as_string());
+ }
+ stop(buf);
+}
+
+
+// Implementation of FixedSizeCodeBlock
+
+FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
+_masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
+}
+
+FixedSizeCodeBlock::~FixedSizeCodeBlock() {
+ if (_enabled) {
+ address curr_pc = _masm->pc();
+
+ assert(_start < curr_pc, "invalid current pc");
+ guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
+
+ int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
+ for (int i = 0; i < nops_count; i++) {
+ _masm->nop();
+ }
+ }
+}
+
+#ifdef AARCH64
+
+// Serializes memory.
+// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
+void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
+ if (!os::is_MP()) return;
+
+ // TODO-AARCH64 investigate dsb vs dmb effects
+ if (order_constraint == StoreStore) {
+ dmb(DMB_st);
+ } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
+ dmb(DMB_ld);
+ } else {
+ dmb(DMB_all);
+ }
+}
+
+#else
+
+// Serializes memory. Potentially blows flags and reg.
+// tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
+// preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
+// load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
+void MacroAssembler::membar(Membar_mask_bits order_constraint,
+ Register tmp,
+ bool preserve_flags,
+ Register load_tgt) {
+ if (!os::is_MP()) return;
+
+ if (order_constraint == StoreStore) {
+ dmb(DMB_st, tmp);
+ } else if ((order_constraint & StoreLoad) ||
+ (order_constraint & LoadLoad) ||
+ (order_constraint & StoreStore) ||
+ (load_tgt == noreg) ||
+ preserve_flags) {
+ dmb(DMB_all, tmp);
+ } else {
+ // LoadStore: speculative stores reordeing is prohibited
+
+ // By providing an ordered load target register, we avoid an extra memory load reference
+ Label not_taken;
+ bind(not_taken);
+ cmp(load_tgt, load_tgt);
+ b(not_taken, ne);
+ }
+}
+
+#endif // AARCH64
+
+// If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
+// on failure, so fall-through can only mean success.
+// "one_shot" controls whether we loop and retry to mitigate spurious failures.
+// This is only needed for C2, which for some reason does not rety,
+// while C1/interpreter does.
+// TODO: measure if it makes a difference
+
+void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
+ Register base, Register tmp, Label &slow_case,
+ bool allow_fallthrough_on_failure, bool one_shot)
+{
+
+ bool fallthrough_is_success = false;
+
+ // ARM Litmus Test example does prefetching here.
+ // TODO: investigate if it helps performance
+
+ // The last store was to the displaced header, so to prevent
+ // reordering we must issue a StoreStore or Release barrier before
+ // the CAS store.
+
+#ifdef AARCH64
+
+ Register Rscratch = tmp;
+ Register Roop = base;
+ Register mark = oldval;
+ Register Rbox = newval;
+ Label loop;
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+
+ // Instead of StoreStore here, we use store-release-exclusive below
+
+ bind(loop);
+
+ ldaxr(tmp, base); // acquire
+ cmp(tmp, oldval);
+ b(slow_case, ne);
+ stlxr(tmp, newval, base); // release
+ if (one_shot) {
+ cmp_w(tmp, 0);
+ } else {
+ cbnz_w(tmp, loop);
+ fallthrough_is_success = true;
+ }
+
+ // MemBarAcquireLock would normally go here, but
+ // we already do ldaxr+stlxr above, which has
+ // Sequential Consistency
+
+#else
+ membar(MacroAssembler::StoreStore, noreg);
+
+ if (one_shot) {
+ ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
+ cmp(tmp, oldval);
+ strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
+ cmp(tmp, 0, eq);
+ } else {
+ atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
+ }
+
+ // MemBarAcquireLock barrier
+ // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
+ // but that doesn't prevent a load or store from floating up between
+ // the load and store in the CAS sequence, so play it safe and
+ // do a full fence.
+ membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
+#endif
+ if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
+ b(slow_case, ne);
+ }
+}
+
+void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
+ Register base, Register tmp, Label &slow_case,
+ bool allow_fallthrough_on_failure, bool one_shot)
+{
+
+ bool fallthrough_is_success = false;
+
+ assert_different_registers(oldval,newval,base,tmp);
+
+#ifdef AARCH64
+ Label loop;
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
+
+ bind(loop);
+ ldxr(tmp, base);
+ cmp(tmp, oldval);
+ b(slow_case, ne);
+ // MemBarReleaseLock barrier
+ stlxr(tmp, newval, base);
+ if (one_shot) {
+ cmp_w(tmp, 0);
+ } else {
+ cbnz_w(tmp, loop);
+ fallthrough_is_success = true;
+ }
+#else
+ // MemBarReleaseLock barrier
+ // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
+ // but that doesn't prevent a load or store from floating down between
+ // the load and store in the CAS sequence, so play it safe and
+ // do a full fence.
+ membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
+
+ if (one_shot) {
+ ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
+ cmp(tmp, oldval);
+ strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
+ cmp(tmp, 0, eq);
+ } else {
+ atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
+ }
+#endif
+ if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
+ b(slow_case, ne);
+ }
+
+ // ExitEnter
+ // According to JSR-133 Cookbook, this should be StoreLoad, the same
+ // barrier that follows volatile store.
+ // TODO: Should be able to remove on armv8 if volatile loads
+ // use the load-acquire instruction.
+ membar(StoreLoad, noreg);
+}
+
+#ifndef PRODUCT
+
+// Preserves flags and all registers.
+// On SMP the updated value might not be visible to external observers without a sychronization barrier
+void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
+ if (counter_addr != NULL) {
+ InlinedAddress counter_addr_literal((address)counter_addr);
+ Label done, retry;
+ if (cond != al) {
+ b(done, inverse(cond));
+ }
+
+#ifdef AARCH64
+ raw_push(R0, R1);
+ raw_push(R2, ZR);
+
+ ldr_literal(R0, counter_addr_literal);
+
+ bind(retry);
+ ldxr_w(R1, R0);
+ add_w(R1, R1, 1);
+ stxr_w(R2, R1, R0);
+ cbnz_w(R2, retry);
+
+ raw_pop(R2, ZR);
+ raw_pop(R0, R1);
+#else
+ push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
+ ldr_literal(R0, counter_addr_literal);
+
+ mrs(CPSR, Rtemp);
+
+ bind(retry);
+ ldr_s32(R1, Address(R0));
+ add(R2, R1, 1);
+ atomic_cas_bool(R1, R2, R0, 0, R3);
+ b(retry, ne);
+
+ msr(CPSR_fsxc, Rtemp);
+
+ pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
+#endif // AARCH64
+
+ b(done);
+ bind_literal(counter_addr_literal);
+
+ bind(done);
+ }
+}
+
+#endif // !PRODUCT
+
+
+// Building block for CAS cases of biased locking: makes CAS and records statistics.
+// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
+void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
+ Register tmp, Label& slow_case, int* counter_addr) {
+
+ cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
+#ifdef ASSERT
+ breakpoint(ne); // Fallthrough only on success
+#endif
+#ifndef PRODUCT
+ if (counter_addr != NULL) {
+ cond_atomic_inc32(al, counter_addr);
+ }
+#endif // !PRODUCT
+}
+
+int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Register tmp2,
+ Label& done, Label& slow_case,
+ BiasedLockingCounters* counters) {
+ // obj_reg must be preserved (at least) if the bias locking fails
+ // tmp_reg is a temporary register
+ // swap_reg was used as a temporary but contained a value
+ // that was used afterwards in some call pathes. Callers
+ // have been fixed so that swap_reg no longer needs to be
+ // saved.
+ // Rtemp in no longer scratched
+
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
+ guarantee(swap_reg!=tmp_reg, "invariant");
+ assert(tmp_reg != noreg, "must supply tmp_reg");
+
+#ifndef PRODUCT
+ if (PrintBiasedLockingStatistics && (counters == NULL)) {
+ counters = BiasedLocking::counters();
+ }
+#endif
+
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+ Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
+
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits
+ // First check to see whether biasing is even enabled for this object
+ Label cas_label;
+
+ // The null check applies to the mark loading, if we need to load it.
+ // If the mark has already been loaded in swap_reg then it has already
+ // been performed and the offset is irrelevant.
+ int null_check_offset = offset();
+ if (!swap_reg_contains_mark) {
+ ldr(swap_reg, mark_addr);
+ }
+
+ // On MP platform loads could return 'stale' values in some cases.
+ // That is acceptable since either CAS or slow case path is taken in the worst case.
+
+ andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ cmp(tmp_reg, markOopDesc::biased_lock_pattern);
+
+ b(cas_label, ne);
+
+ // The bias pattern is present in the object's header. Need to check
+ // whether the bias owner and the epoch are both still current.
+ load_klass(tmp_reg, obj_reg);
+ ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+ orr(tmp_reg, tmp_reg, Rthread);
+ eor(tmp_reg, tmp_reg, swap_reg);
+
+#ifdef AARCH64
+ ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
+#else
+ bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
+#endif // AARCH64
+
+#ifndef PRODUCT
+ if (counters != NULL) {
+ cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
+ }
+#endif // !PRODUCT
+
+ b(done, eq);
+
+ Label try_revoke_bias;
+ Label try_rebias;
+
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ b(try_revoke_bias, ne);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
+ b(try_rebias, ne);
+
+ // tmp_reg has the age, epoch and pattern bits cleared
+ // The remaining (owner) bits are (Thread ^ current_owner)
+
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+
+ // Note that we know the owner is not ourself. Hence, success can
+ // only happen when the owner bits is 0
+
+#ifdef AARCH64
+ // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
+ // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(swap_reg, swap_reg, tmp2);
+#else
+ // until the assembler can be made smarter, we need to make some assumptions about the values
+ // so we can optimize this:
+ assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
+
+ mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
+ mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
+#endif // AARCH64
+
+ orr(tmp_reg, swap_reg, Rthread); // new mark
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
+ (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
+
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+
+ b(done);
+
+ bind(try_rebias);
+
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+
+ // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
+
+ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
+
+ // owner bits 'random'. Set them to Rthread.
+#ifdef AARCH64
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(tmp_reg, tmp_reg, tmp2);
+#else
+ mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
+ mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
+#endif // AARCH64
+
+ orr(tmp_reg, tmp_reg, Rthread); // new mark
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
+ (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
+
+ // If the biasing toward our thread failed, then another thread
+ // succeeded in biasing it toward itself and we need to revoke that
+ // bias. The revocation will occur in the runtime in the slow case.
+
+ b(done);
+
+ bind(try_revoke_bias);
+
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+
+ // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
+
+ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
+
+ // owner bits 'random'. Clear them
+#ifdef AARCH64
+ mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
+ andr(tmp_reg, tmp_reg, tmp2);
+#else
+ mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
+ mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
+#endif // AARCH64
+
+ biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
+ (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
+
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+
+ bind(cas_label);
+
+ return null_check_offset;
+}
+
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+
+ // Check for biased locking unlock case, which is a no-op
+ // Note: we do not have to check the thread ID for two reasons.
+ // First, the interpreter checks for IllegalMonitorStateException at
+ // a higher level. Second, if the bias was revoked while we held the
+ // lock, the object could not be rebiased toward another thread, so
+ // the bias bit would be clear.
+ ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+
+ andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
+ cmp(tmp_reg, markOopDesc::biased_lock_pattern);
+ b(done, eq);
+}
+
+
+void MacroAssembler::resolve_jobject(Register value,
+ Register tmp1,
+ Register tmp2) {
+ assert_different_registers(value, tmp1, tmp2);
+ Label done, not_weak;
+ cbz(value, done); // Use NULL as-is.
+ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
+ tbz(value, 0, not_weak); // Test for jweak tag.
+ // Resolve jweak.
+ ldr(value, Address(value, -JNIHandles::weak_tag_value));
+ verify_oop(value);
+#if INCLUDE_ALL_GCS
+ if (UseG1GC) {
+ g1_write_barrier_pre(noreg, // store_addr
+ noreg, // new_val
+ value, // pre_val
+ tmp1, // tmp1
+ tmp2); // tmp2
+ }
+#endif // INCLUDE_ALL_GCS
+ b(done);
+ bind(not_weak);
+ // Resolve (untagged) jobject.
+ ldr(value, Address(value));
+ verify_oop(value);
+ bind(done);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////
+
+#if INCLUDE_ALL_GCS
+
+// G1 pre-barrier.
+// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// If store_addr != noreg, then previous value is loaded from [store_addr];
+// in such case store_addr and new_val registers are preserved;
+// otherwise pre_val register is preserved.
+void MacroAssembler::g1_write_barrier_pre(Register store_addr,
+ Register new_val,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ Label runtime;
+
+ if (store_addr != noreg) {
+ assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
+ } else {
+ assert (new_val == noreg, "should be");
+ assert_different_registers(pre_val, tmp1, tmp2, noreg);
+ }
+
+ Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_active()));
+ Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_buf()));
+
+ // Is marking active?
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
+ ldrb(tmp1, in_progress);
+ cbz(tmp1, done);
+
+ // Do we need to load the previous value?
+ if (store_addr != noreg) {
+ load_heap_oop(pre_val, Address(store_addr, 0));
+ }
+
+ // Is the previous value null?
+ cbz(pre_val, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+
+ ldr(tmp1, index); // tmp1 := *index_adr
+ ldr(tmp2, buffer);
+
+ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
+ b(runtime, lt); // If negative, goto runtime
+
+ str(tmp1, index); // *index_adr := tmp1
+
+ // Record the previous value
+ str(pre_val, Address(tmp2, tmp1));
+ b(done);
+
+ bind(runtime);
+
+ // save the live input values
+#ifdef AARCH64
+ if (store_addr != noreg) {
+ raw_push(store_addr, new_val);
+ } else {
+ raw_push(pre_val, ZR);
+ }
+#else
+ if (store_addr != noreg) {
+ // avoid raw_push to support any ordering of store_addr and new_val
+ push(RegisterSet(store_addr) | RegisterSet(new_val));
+ } else {
+ push(pre_val);
+ }
+#endif // AARCH64
+
+ if (pre_val != R0) {
+ mov(R0, pre_val);
+ }
+ mov(R1, Rthread);
+
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
+
+#ifdef AARCH64
+ if (store_addr != noreg) {
+ raw_pop(store_addr, new_val);
+ } else {
+ raw_pop(pre_val, ZR);
+ }
+#else
+ if (store_addr != noreg) {
+ pop(RegisterSet(store_addr) | RegisterSet(new_val));
+ } else {
+ pop(pre_val);
+ }
+#endif // AARCH64
+
+ bind(done);
+}
+
+// G1 post-barrier.
+// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+
+ Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_index()));
+ Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_buf()));
+
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ Label done;
+ Label runtime;
+
+ // Does store cross heap regions?
+
+ eor(tmp1, store_addr, new_val);
+#ifdef AARCH64
+ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
+ cbz(tmp1, done);
+#else
+ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
+ b(done, eq);
+#endif
+
+ // crosses regions, storing NULL?
+
+ cbz(new_val, done);
+
+ // storing region crossing non-NULL, is card already dirty?
+ const Register card_addr = tmp1;
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+ mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference);
+ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift));
+
+ ldrb(tmp2, Address(card_addr));
+ cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ b(done, eq);
+
+ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
+
+ assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code");
+ ldrb(tmp2, Address(card_addr));
+ cbz(tmp2, done);
+
+ // storing a region crossing, non-NULL oop, card is clean.
+ // dirty card and log.
+
+ strb(zero_register(tmp2), Address(card_addr));
+
+ ldr(tmp2, queue_index);
+ ldr(tmp3, buffer);
+
+ subs(tmp2, tmp2, wordSize);
+ b(runtime, lt); // go to runtime if now negative
+
+ str(tmp2, queue_index);
+
+ str(card_addr, Address(tmp3, tmp2));
+ b(done);
+
+ bind(runtime);
+
+ if (card_addr != R0) {
+ mov(R0, card_addr);
+ }
+ mov(R1, Rthread);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
+
+ bind(done);
+}
+
+#endif // INCLUDE_ALL_GCS
+
+//////////////////////////////////////////////////////////////////////////////////
+
+#ifdef AARCH64
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
+ switch (size_in_bytes) {
+ case 8: ldr(dst, src); break;
+ case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
+ case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
+ case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
+ switch (size_in_bytes) {
+ case 8: str(src, dst); break;
+ case 4: str_32(src, dst); break;
+ case 2: strh(src, dst); break;
+ case 1: strb(src, dst); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+#else
+
+void MacroAssembler::load_sized_value(Register dst, Address src,
+ size_t size_in_bytes, bool is_signed, AsmCondition cond) {
+ switch (size_in_bytes) {
+ case 4: ldr(dst, src, cond); break;
+ case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
+ case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
+ switch (size_in_bytes) {
+ case 4: str(src, dst, cond); break;
+ case 2: strh(src, dst, cond); break;
+ case 1: strb(src, dst, cond); break;
+ default: ShouldNotReachHere();
+ }
+}
+#endif // AARCH64
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <Rinterf, Rindex>.
+// The receiver klass is in Rklass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register Rklass,
+ Register Rinterf,
+ Register Rindex,
+ Register method_result,
+ Register temp_reg1,
+ Register temp_reg2,
+ Label& L_no_such_interface) {
+
+ assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex);
+
+ Register Ritable = temp_reg1;
+
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+ const int base = in_bytes(Klass::vtable_start_offset());
+ const int scale = exact_log2(vtableEntry::size_in_bytes());
+ ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
+ add(Ritable, Rklass, base);
+ add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale));
+
+ Label entry, search;
+
+ b(entry);
+
+ bind(search);
+ add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize);
+
+ bind(entry);
+
+ // Check that the entry is non-null. A null entry means that the receiver
+ // class doesn't implement the interface, and wasn't the same as the
+ // receiver class checked when the interface was resolved.
+
+ ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes()));
+ cbz(temp_reg2, L_no_such_interface);
+
+ cmp(Rinterf, temp_reg2);
+ b(search, ne);
+
+ ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes()));
+ add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass*
+ assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
+ assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
+
+ ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex));
+}
+
+#ifdef COMPILER2
+// TODO: 8 bytes at a time? pre-fetch?
+// Compare char[] arrays aligned to 4 bytes.
+void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+ Register limit, Register result,
+ Register chr1, Register chr2, Label& Ldone) {
+ Label Lvector, Lloop;
+
+ // Note: limit contains number of bytes (2*char_elements) != 0.
+ tst(limit, 0x2); // trailing character ?
+ b(Lvector, eq);
+
+ // compare the trailing char
+ sub(limit, limit, sizeof(jchar));
+ ldrh(chr1, Address(ary1, limit));
+ ldrh(chr2, Address(ary2, limit));
+ cmp(chr1, chr2);
+ mov(result, 0, ne); // not equal
+ b(Ldone, ne);
+
+ // only one char ?
+ tst(limit, limit);
+ mov(result, 1, eq);
+ b(Ldone, eq);
+
+ // word by word compare, dont't need alignment check
+ bind(Lvector);
+
+ // Shift ary1 and ary2 to the end of the arrays, negate limit
+ add(ary1, limit, ary1);
+ add(ary2, limit, ary2);
+ neg(limit, limit);
+
+ bind(Lloop);
+ ldr_u32(chr1, Address(ary1, limit));
+ ldr_u32(chr2, Address(ary2, limit));
+ cmp_32(chr1, chr2);
+ mov(result, 0, ne); // not equal
+ b(Ldone, ne);
+ adds(limit, limit, 2*sizeof(jchar));
+ b(Lloop, ne);
+
+ // Caller should set it:
+ // mov(result_reg, 1); //equal
+}
+#endif
+
+void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
+ mov_slow(tmpreg1, counter_addr);
+ ldr_s32(tmpreg2, tmpreg1);
+ add_32(tmpreg2, tmpreg2, 1);
+ str_32(tmpreg2, tmpreg1);
+}
+
+void MacroAssembler::floating_cmp(Register dst) {
+#ifdef AARCH64
+ NOT_TESTED();
+ cset(dst, gt); // 1 if '>', else 0
+ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
+#else
+ vmrs(dst, FPSCR);
+ orr(dst, dst, 0x08000000);
+ eor(dst, dst, AsmOperand(dst, lsl, 3));
+ mov(dst, AsmOperand(dst, asr, 30));
+#endif
+}
+
+void MacroAssembler::restore_default_fp_mode() {
+#ifdef AARCH64
+ msr(SysReg_FPCR, ZR);
+#else
+#ifndef __SOFTFP__
+ // Round to Near mode, IEEE compatible, masked exceptions
+ mov(Rtemp, 0);
+ vmsr(FPSCR, Rtemp);
+#endif // !__SOFTFP__
+#endif // AARCH64
+}
+
+#ifndef AARCH64
+// 24-bit word range == 26-bit byte range
+bool check26(int offset) {
+ // this could be simplified, but it mimics encoding and decoding
+ // an actual branch insrtuction
+ int off1 = offset << 6 >> 8;
+ int encoded = off1 & ((1<<24)-1);
+ int decoded = encoded << 8 >> 6;
+ return offset == decoded;
+}
+#endif // !AARCH64
+
+// Perform some slight adjustments so the default 32MB code cache
+// is fully reachable.
+static inline address first_cache_address() {
+ return CodeCache::low_bound() + sizeof(HeapBlock::Header);
+}
+static inline address last_cache_address() {
+ return CodeCache::high_bound() - Assembler::InstructionSize;
+}
+
+#ifdef AARCH64
+// Can we reach target using ADRP?
+bool MacroAssembler::page_reachable_from_cache(address target) {
+ intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
+ intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
+ intptr_t addr = (intptr_t)target & ~0xfff;
+
+ intptr_t loffset = addr - cl;
+ intptr_t hoffset = addr - ch;
+ return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
+}
+#endif
+
+// Can we reach target using unconditional branch or call from anywhere
+// in the code cache (because code can be relocated)?
+bool MacroAssembler::_reachable_from_cache(address target) {
+#ifdef __thumb__
+ if ((1 & (intptr_t)target) != 0) {
+ // Return false to avoid 'b' if we need switching to THUMB mode.
+ return false;
+ }
+#endif
+
+ address cl = first_cache_address();
+ address ch = last_cache_address();
+
+ if (ForceUnreachable) {
+ // Only addresses from CodeCache can be treated as reachable.
+ if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
+ return false;
+ }
+ }
+
+ intptr_t loffset = (intptr_t)target - (intptr_t)cl;
+ intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
+
+#ifdef AARCH64
+ return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
+#else
+ return check26(loffset - 8) && check26(hoffset - 8);
+#endif
+}
+
+bool MacroAssembler::reachable_from_cache(address target) {
+ assert(CodeCache::contains(pc()), "not supported");
+ return _reachable_from_cache(target);
+}
+
+// Can we reach the entire code cache from anywhere else in the code cache?
+bool MacroAssembler::_cache_fully_reachable() {
+ address cl = first_cache_address();
+ address ch = last_cache_address();
+ return _reachable_from_cache(cl) && _reachable_from_cache(ch);
+}
+
+bool MacroAssembler::cache_fully_reachable() {
+ assert(CodeCache::contains(pc()), "not supported");
+ return _cache_fully_reachable();
+}
+
+void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+ assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
+ if (reachable_from_cache(target)) {
+ relocate(rtype);
+ b(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Note: relocate is not needed for the code below,
+ // encoding targets in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ rtype = relocInfo::none;
+ }
+
+#ifdef AARCH64
+ assert (scratch != noreg, "should be specified");
+ InlinedAddress address_literal(target, rtype);
+ ldr_literal(scratch, address_literal);
+ br(scratch);
+ int off = offset();
+ bind_literal(address_literal);
+#ifdef COMPILER2
+ if (offset() - off == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+#else
+ if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
+ // Note: this version cannot be (atomically) patched
+ mov_slow(scratch, (intptr_t)target, cond);
+ bx(scratch, cond);
+ } else {
+ Label skip;
+ InlinedAddress address_literal(target);
+ if (cond != al) {
+ b(skip, inverse(cond));
+ }
+ relocate(rtype);
+ ldr_literal(PC, address_literal);
+ bind_literal(address_literal);
+ bind(skip);
+ }
+#endif // AARCH64
+}
+
+// Similar to jump except that:
+// - near calls are valid only if any destination in the cache is near
+// - no movt/movw (not atomically patchable)
+void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+ assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
+ if (cache_fully_reachable()) {
+ // Note: this assumes that all possible targets (the initial one
+ // and the addressed patched to) are all in the code cache.
+ assert(CodeCache::contains(target), "target might be too far");
+ relocate(rtype);
+ b(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Discard the relocation information if not needed for CacheCompiledCode
+ // since the next encodings are all in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ rtype = relocInfo::none;
+ }
+
+#ifdef AARCH64
+ assert (scratch != noreg, "should be specified");
+ InlinedAddress address_literal(target);
+ relocate(rtype);
+ ldr_literal(scratch, address_literal);
+ br(scratch);
+ int off = offset();
+ bind_literal(address_literal);
+#ifdef COMPILER2
+ if (offset() - off == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+#endif
+#else
+ {
+ Label skip;
+ InlinedAddress address_literal(target);
+ if (cond != al) {
+ b(skip, inverse(cond));
+ }
+ relocate(rtype);
+ ldr_literal(PC, address_literal);
+ bind_literal(address_literal);
+ bind(skip);
+ }
+#endif // AARCH64
+}
+
+void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
+ Register scratch = LR;
+ assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
+ if (reachable_from_cache(target)) {
+ relocate(rspec);
+ bl(target NOT_AARCH64_ARG(cond));
+ return;
+ }
+
+ // Note: relocate is not needed for the code below,
+ // encoding targets in absolute format.
+ if (ignore_non_patchable_relocations()) {
+ // This assumes the information was needed only for relocating the code.
+ rspec = RelocationHolder::none;
+ }
+
+#ifndef AARCH64
+ if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
+ // Note: this version cannot be (atomically) patched
+ mov_slow(scratch, (intptr_t)target, cond);
+ blx(scratch, cond);
+ return;
+ }
+#endif
+
+ {
+ Label ret_addr;
+#ifndef AARCH64
+ if (cond != al) {
+ b(ret_addr, inverse(cond));
+ }
+#endif
+
+
+#ifdef AARCH64
+ // TODO-AARCH64: make more optimal implementation
+ // [ Keep in sync with MacroAssembler::call_size ]
+ assert(rspec.type() == relocInfo::none, "call reloc not implemented");
+ mov_slow(scratch, target);
+ blr(scratch);
+#else
+ InlinedAddress address_literal(target);
+ relocate(rspec);
+ adr(LR, ret_addr);
+ ldr_literal(PC, address_literal);
+
+ bind_literal(address_literal);
+ bind(ret_addr);
+#endif
+ }
+}
+
+#if defined(AARCH64) && defined(COMPILER2)
+int MacroAssembler::call_size(address target, bool far, bool patchable) {
+ // FIXME: mov_slow is variable-length
+ if (!far) return 1; // bl
+ if (patchable) return 2; // ldr; blr
+ return instr_count_for_mov_slow((intptr_t)target) + 1;
+}
+#endif
+
+int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
+ assert(rspec.type() == relocInfo::static_call_type ||
+ rspec.type() == relocInfo::none ||
+ rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
+
+ // Always generate the relocation information, needed for patching
+ relocate(rspec); // used by NativeCall::is_call_before()
+ if (cache_fully_reachable()) {
+ // Note: this assumes that all possible targets (the initial one
+ // and the addresses patched to) are all in the code cache.
+ assert(CodeCache::contains(target), "target might be too far");
+ bl(target);
+ } else {
+#if defined(AARCH64) && defined(COMPILER2)
+ if (c2) {
+ // return address needs to match call_size().
+ // no need to trash Rtemp
+ int off = offset();
+ Label skip_literal;
+ InlinedAddress address_literal(target);
+ ldr_literal(LR, address_literal);
+ blr(LR);
+ int ret_addr_offset = offset();
+ assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
+ b(skip_literal);
+ int off2 = offset();
+ bind_literal(address_literal);
+ if (offset() - off2 == wordSize) {
+ // no padding, so insert nop for worst-case sizing
+ nop();
+ }
+ bind(skip_literal);
+ return ret_addr_offset;
+ }
+#endif
+ Label ret_addr;
+ InlinedAddress address_literal(target);
+#ifdef AARCH64
+ ldr_literal(Rtemp, address_literal);
+ adr(LR, ret_addr);
+ br(Rtemp);
+#else
+ adr(LR, ret_addr);
+ ldr_literal(PC, address_literal);
+#endif
+ bind_literal(address_literal);
+ bind(ret_addr);
+ }
+ return offset();
+}
+
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+ // OopHandle::resolve is an indirection.
+ ldr(result, Address(result, 0));
+}
+
+void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+ ldr(tmp, Address(method, Method::const_offset()));
+ ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
+ ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
+ ldr(mirror, Address(tmp, mirror_offset));
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Compressed pointers
+
+#ifdef AARCH64
+
+void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
+ if (UseCompressedClassPointers) {
+ ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
+ decode_klass_not_null(dst_klass);
+ } else {
+ ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
+ }
+}
+
+#else
+
+void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
+ ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
+}
+
+#endif // AARCH64
+
+// Blows src_klass.
+void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
+#ifdef AARCH64
+ if (UseCompressedClassPointers) {
+ assert(src_klass != dst_oop, "not enough registers");
+ encode_klass_not_null(src_klass);
+ str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+ return;
+ }
+#endif // AARCH64
+ str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
+}
+
+#ifdef AARCH64
+
+void MacroAssembler::store_klass_gap(Register dst) {
+ if (UseCompressedClassPointers) {
+ str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
+ }
+}
+
+#endif // AARCH64
+
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ ldr_w(dst, src);
+ decode_heap_oop(dst);
+ return;
+ }
+#endif // AARCH64
+ ldr(dst, src);
+}
+
+// Blows src and flags.
+void MacroAssembler::store_heap_oop(Register src, Address dst) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ assert(!dst.uses(src), "not enough registers");
+ encode_heap_oop(src);
+ str_w(src, dst);
+ return;
+ }
+#endif // AARCH64
+ str(src, dst);
+}
+
+void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
+#ifdef AARCH64
+ if (UseCompressedOops) {
+ str_w(src, dst);
+ return;
+ }
+#endif // AARCH64
+ str(src, dst);
+}
+
+
+#ifdef AARCH64
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register dst, Register src) {
+ // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
+ // Update it at modifications.
+ assert (UseCompressedOops, "must be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+ verify_oop(src);
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_oop_shift());
+ } else if (dst != src) {
+ mov(dst, src);
+ }
+ } else {
+ tst(src, src);
+ csel(dst, Rheap_base, src, eq);
+ sub(dst, dst, Rheap_base);
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_oop_shift());
+ }
+ }
+}
+
+// Same algorithm as oop.inline.hpp decode_heap_oop.
+void MacroAssembler::decode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+ assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ if (Universe::narrow_oop_base() != NULL) {
+ tst(src, src);
+ add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
+ csel(dst, dst, ZR, ne);
+ } else {
+ _lsl(dst, src, Universe::narrow_oop_shift());
+ }
+ verify_oop(dst);
+}
+
+#ifdef COMPILER2
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+// Must preserve condition codes, or C2 encodeHeapOop_not_null rule
+// must be changed.
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+ assert (UseCompressedOops, "must be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+ verify_oop(src);
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_oop_shift());
+ } else if (dst != src) {
+ mov(dst, src);
+ }
+ } else {
+ sub(dst, src, Rheap_base);
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_oop_shift());
+ }
+ }
+}
+
+// Same algorithm as oops.inline.hpp decode_heap_oop.
+// Must preserve condition codes, or C2 decodeHeapOop_not_null rule
+// must be changed.
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+ assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ if (Universe::narrow_oop_base() != NULL) {
+ add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
+ } else {
+ _lsl(dst, src, Universe::narrow_oop_shift());
+ }
+ verify_oop(dst);
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+ assert(UseCompressedClassPointers, "should only be used for compressed header");
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+
+ // Relocation with special format (see relocInfo_arm.hpp).
+ relocate(rspec);
+ narrowKlass encoded_k = Klass::encode_klass(k);
+ movz(dst, encoded_k & 0xffff, 0);
+ movk(dst, (encoded_k >> 16) & 0xffff, 16);
+}
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+ assert(UseCompressedOops, "should only be used for compressed header");
+ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+ relocate(rspec);
+ movz(dst, 0xffff, 0);
+ movk(dst, 0xffff, 16);
+}
+
+#endif // COMPILER2
+
+// Must preserve condition codes, or C2 encodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::encode_klass_not_null(Register r) {
+ if (Universe::narrow_klass_base() != NULL) {
+ // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
+ assert(r != Rheap_base, "Encoding a klass in Rheap_base");
+ mov_slow(Rheap_base, Universe::narrow_klass_base());
+ sub(r, r, Rheap_base);
+ }
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(r, r, Universe::narrow_klass_shift());
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ reinit_heapbase();
+ }
+}
+
+// Must preserve condition codes, or C2 encodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+ if (dst == src) {
+ encode_klass_not_null(src);
+ return;
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ mov_slow(dst, (int64_t)Universe::narrow_klass_base());
+ sub(dst, src, dst);
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(dst, dst, Universe::narrow_klass_shift());
+ }
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsr(dst, src, Universe::narrow_klass_shift());
+ } else {
+ mov(dst, src);
+ }
+ }
+}
+
+// Function instr_count_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL). Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_count_for_decode_klass_not_null() {
+ assert(UseCompressedClassPointers, "only for compressed klass ptrs");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ if (Universe::narrow_klass_base() != NULL) {
+ return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
+ 1 + // add
+ instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// Must preserve condition codes, or C2 decodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::decode_klass_not_null(Register r) {
+ int off = offset();
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ assert(r != Rheap_base, "Decoding a klass in Rheap_base");
+ // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_base() != NULL) {
+ // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
+ mov_slow(Rheap_base, Universe::narrow_klass_base());
+ add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
+ reinit_heapbase();
+ } else {
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ _lsl(r, r, Universe::narrow_klass_shift());
+ }
+ }
+ assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
+}
+
+// Must preserve condition codes, or C2 decodeKlass_not_null rule
+// must be changed.
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+ if (src == dst) {
+ decode_klass_not_null(src);
+ return;
+ }
+
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ assert(src != Rheap_base, "Decoding a klass in Rheap_base");
+ assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_base() != NULL) {
+ mov_slow(dst, Universe::narrow_klass_base());
+ add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
+ } else {
+ _lsl(dst, src, Universe::narrow_klass_shift());
+ }
+}
+
+
+void MacroAssembler::reinit_heapbase() {
+ if (UseCompressedOops || UseCompressedClassPointers) {
+ if (Universe::heap() != NULL) {
+ mov_slow(Rheap_base, Universe::narrow_ptrs_base());
+ } else {
+ ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
+ }
+ }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+ // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
+ // Update it at modifications.
+ assert (UseCompressedOops, "should be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ if (CheckCompressedOops) {
+ Label ok;
+ str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
+ raw_push(Rtemp, ZR);
+ mrs(Rtemp, Assembler::SysReg_NZCV);
+ str(Rtemp, Address(SP, 1 * wordSize));
+ mov_slow(Rtemp, Universe::narrow_ptrs_base());
+ cmp(Rheap_base, Rtemp);
+ b(ok, eq);
+ stop(msg);
+ bind(ok);
+ ldr(Rtemp, Address(SP, 1 * wordSize));
+ msr(Assembler::SysReg_NZCV, Rtemp);
+ raw_pop(Rtemp, ZR);
+ str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
+ }
+}
+#endif // ASSERT
+
+#endif // AARCH64
+
+#ifdef COMPILER2
+void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
+{
+ assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+ Register Rmark = Rscratch2;
+
+ assert(Roop != Rscratch, "");
+ assert(Roop != Rmark, "");
+ assert(Rbox != Rscratch, "");
+ assert(Rbox != Rmark, "");
+
+ Label fast_lock, done;
+
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ Label failed;
+#ifdef AARCH64
+ biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
+#else
+ biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
+#endif
+ bind(failed);
+ }
+
+ ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
+ tst(Rmark, markOopDesc::unlocked_value);
+ b(fast_lock, ne);
+
+ // Check for recursive lock
+ // See comments in InterpreterMacroAssembler::lock_object for
+ // explanations on the fast recursive locking check.
+#ifdef AARCH64
+ intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
+ Assembler::LogicalImmediate imm(mask, false);
+ mov(Rscratch, SP);
+ sub(Rscratch, Rmark, Rscratch);
+ ands(Rscratch, Rscratch, imm);
+ b(done, ne); // exit with failure
+ str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
+ b(done);
+
+#else
+ // -1- test low 2 bits
+ movs(Rscratch, AsmOperand(Rmark, lsl, 30));
+ // -2- test (hdr - SP) if the low two bits are 0
+ sub(Rscratch, Rmark, SP, eq);
+ movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
+ // If still 'eq' then recursive locking OK
+ str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
+ b(done);
+#endif
+
+ bind(fast_lock);
+ str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+
+ bool allow_fallthrough_on_failure = true;
+ bool one_shot = true;
+ cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+ bind(done);
+
+}
+
+void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
+{
+ assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+ Register Rmark = Rscratch2;
+
+ assert(Roop != Rscratch, "");
+ assert(Roop != Rmark, "");
+ assert(Rbox != Rscratch, "");
+ assert(Rbox != Rmark, "");
+
+ Label done;
+
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_exit(Roop, Rscratch, done);
+ }
+
+ ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+ // If hdr is NULL, we've got recursive locking and there's nothing more to do
+ cmp(Rmark, 0);
+ b(done, eq);
+
+ // Restore the object header
+ bool allow_fallthrough_on_failure = true;
+ bool one_shot = true;
+ cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+ bind(done);
+
+}
+#endif // COMPILER2
+