hotspot/src/cpu/arm/vm/macroAssembler_arm.cpp
changeset 42664 29142a56c193
child 43964 2f5e556a6037
equal deleted inserted replaced
42663:2335df372367 42664:29142a56c193
       
     1 /*
       
     2  * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  *
       
    23  */
       
    24 
       
    25 #include "precompiled.hpp"
       
    26 #include "asm/assembler.hpp"
       
    27 #include "asm/assembler.inline.hpp"
       
    28 #include "asm/macroAssembler.hpp"
       
    29 #include "ci/ciEnv.hpp"
       
    30 #include "code/nativeInst.hpp"
       
    31 #include "compiler/disassembler.hpp"
       
    32 #include "gc/shared/cardTableModRefBS.hpp"
       
    33 #include "gc/shared/collectedHeap.inline.hpp"
       
    34 #include "interpreter/interpreter.hpp"
       
    35 #include "memory/resourceArea.hpp"
       
    36 #include "oops/klass.inline.hpp"
       
    37 #include "prims/methodHandles.hpp"
       
    38 #include "runtime/biasedLocking.hpp"
       
    39 #include "runtime/interfaceSupport.hpp"
       
    40 #include "runtime/objectMonitor.hpp"
       
    41 #include "runtime/os.hpp"
       
    42 #include "runtime/sharedRuntime.hpp"
       
    43 #include "runtime/stubRoutines.hpp"
       
    44 #include "utilities/macros.hpp"
       
    45 #if INCLUDE_ALL_GCS
       
    46 #include "gc/g1/g1CollectedHeap.inline.hpp"
       
    47 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
       
    48 #include "gc/g1/heapRegion.hpp"
       
    49 #endif
       
    50 
       
    51 // Implementation of AddressLiteral
       
    52 
       
    53 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
       
    54   switch (rtype) {
       
    55   case relocInfo::oop_type:
       
    56     // Oops are a special case. Normally they would be their own section
       
    57     // but in cases like icBuffer they are literals in the code stream that
       
    58     // we don't have a section for. We use none so that we get a literal address
       
    59     // which is always patchable.
       
    60     break;
       
    61   case relocInfo::external_word_type:
       
    62     _rspec = external_word_Relocation::spec(_target);
       
    63     break;
       
    64   case relocInfo::internal_word_type:
       
    65     _rspec = internal_word_Relocation::spec(_target);
       
    66     break;
       
    67   case relocInfo::opt_virtual_call_type:
       
    68     _rspec = opt_virtual_call_Relocation::spec();
       
    69     break;
       
    70   case relocInfo::static_call_type:
       
    71     _rspec = static_call_Relocation::spec();
       
    72     break;
       
    73   case relocInfo::runtime_call_type:
       
    74     _rspec = runtime_call_Relocation::spec();
       
    75     break;
       
    76   case relocInfo::poll_type:
       
    77   case relocInfo::poll_return_type:
       
    78     _rspec = Relocation::spec_simple(rtype);
       
    79     break;
       
    80   case relocInfo::none:
       
    81     break;
       
    82   default:
       
    83     ShouldNotReachHere();
       
    84     break;
       
    85   }
       
    86 }
       
    87 
       
    88 // Initially added to the Assembler interface as a pure virtual:
       
    89 //   RegisterConstant delayed_value(..)
       
    90 // for:
       
    91 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
       
    92 // this was subsequently modified to its present name and return type
       
    93 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
       
    94                                                       Register tmp,
       
    95                                                       int offset) {
       
    96   ShouldNotReachHere();
       
    97   return RegisterOrConstant(-1);
       
    98 }
       
    99 
       
   100 
       
   101 #ifdef AARCH64
       
   102 // Note: ARM32 version is OS dependent
       
   103 void MacroAssembler::breakpoint(AsmCondition cond) {
       
   104   if (cond == al) {
       
   105     brk();
       
   106   } else {
       
   107     Label L;
       
   108     b(L, inverse(cond));
       
   109     brk();
       
   110     bind(L);
       
   111   }
       
   112 }
       
   113 #endif // AARCH64
       
   114 
       
   115 
       
   116 // virtual method calling
       
   117 void MacroAssembler::lookup_virtual_method(Register recv_klass,
       
   118                                            Register vtable_index,
       
   119                                            Register method_result) {
       
   120   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
       
   121   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
       
   122   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
       
   123   ldr(method_result, Address(recv_klass, base_offset));
       
   124 }
       
   125 
       
   126 
       
   127 // Simplified, combined version, good for typical uses.
       
   128 // Falls through on failure.
       
   129 void MacroAssembler::check_klass_subtype(Register sub_klass,
       
   130                                          Register super_klass,
       
   131                                          Register temp_reg,
       
   132                                          Register temp_reg2,
       
   133                                          Register temp_reg3,
       
   134                                          Label& L_success) {
       
   135   Label L_failure;
       
   136   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
       
   137   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
       
   138   bind(L_failure);
       
   139 };
       
   140 
       
   141 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
       
   142                                                    Register super_klass,
       
   143                                                    Register temp_reg,
       
   144                                                    Register temp_reg2,
       
   145                                                    Label* L_success,
       
   146                                                    Label* L_failure,
       
   147                                                    Label* L_slow_path) {
       
   148 
       
   149   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
       
   150   const Register super_check_offset = temp_reg2;
       
   151 
       
   152   Label L_fallthrough;
       
   153   int label_nulls = 0;
       
   154   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
       
   155   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
       
   156   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
       
   157   assert(label_nulls <= 1, "at most one NULL in the batch");
       
   158 
       
   159   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
       
   160   int sco_offset = in_bytes(Klass::super_check_offset_offset());
       
   161   Address super_check_offset_addr(super_klass, sco_offset);
       
   162 
       
   163   // If the pointers are equal, we are done (e.g., String[] elements).
       
   164   // This self-check enables sharing of secondary supertype arrays among
       
   165   // non-primary types such as array-of-interface.  Otherwise, each such
       
   166   // type would need its own customized SSA.
       
   167   // We move this check to the front of the fast path because many
       
   168   // type checks are in fact trivially successful in this manner,
       
   169   // so we get a nicely predicted branch right at the start of the check.
       
   170   cmp(sub_klass, super_klass);
       
   171   b(*L_success, eq);
       
   172 
       
   173   // Check the supertype display:
       
   174   ldr_u32(super_check_offset, super_check_offset_addr);
       
   175 
       
   176   Address super_check_addr(sub_klass, super_check_offset);
       
   177   ldr(temp_reg, super_check_addr);
       
   178   cmp(super_klass, temp_reg); // load displayed supertype
       
   179 
       
   180   // This check has worked decisively for primary supers.
       
   181   // Secondary supers are sought in the super_cache ('super_cache_addr').
       
   182   // (Secondary supers are interfaces and very deeply nested subtypes.)
       
   183   // This works in the same check above because of a tricky aliasing
       
   184   // between the super_cache and the primary super display elements.
       
   185   // (The 'super_check_addr' can address either, as the case requires.)
       
   186   // Note that the cache is updated below if it does not help us find
       
   187   // what we need immediately.
       
   188   // So if it was a primary super, we can just fail immediately.
       
   189   // Otherwise, it's the slow path for us (no success at this point).
       
   190 
       
   191   b(*L_success, eq);
       
   192   cmp_32(super_check_offset, sc_offset);
       
   193   if (L_failure == &L_fallthrough) {
       
   194     b(*L_slow_path, eq);
       
   195   } else {
       
   196     b(*L_failure, ne);
       
   197     if (L_slow_path != &L_fallthrough) {
       
   198       b(*L_slow_path);
       
   199     }
       
   200   }
       
   201 
       
   202   bind(L_fallthrough);
       
   203 }
       
   204 
       
   205 
       
   206 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
       
   207                                                    Register super_klass,
       
   208                                                    Register temp_reg,
       
   209                                                    Register temp2_reg,
       
   210                                                    Register temp3_reg,
       
   211                                                    Label* L_success,
       
   212                                                    Label* L_failure,
       
   213                                                    bool set_cond_codes) {
       
   214 #ifdef AARCH64
       
   215   NOT_IMPLEMENTED();
       
   216 #else
       
   217   // Note: if used by code that expects a register to be 0 on success,
       
   218   // this register must be temp_reg and set_cond_codes must be true
       
   219 
       
   220   Register saved_reg = noreg;
       
   221 
       
   222   // get additional tmp registers
       
   223   if (temp3_reg == noreg) {
       
   224     saved_reg = temp3_reg = LR;
       
   225     push(saved_reg);
       
   226   }
       
   227 
       
   228   assert(temp2_reg != noreg, "need all the temporary registers");
       
   229   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
       
   230 
       
   231   Register cmp_temp = temp_reg;
       
   232   Register scan_temp = temp3_reg;
       
   233   Register count_temp = temp2_reg;
       
   234 
       
   235   Label L_fallthrough;
       
   236   int label_nulls = 0;
       
   237   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
       
   238   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
       
   239   assert(label_nulls <= 1, "at most one NULL in the batch");
       
   240 
       
   241   // a couple of useful fields in sub_klass:
       
   242   int ss_offset = in_bytes(Klass::secondary_supers_offset());
       
   243   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
       
   244   Address secondary_supers_addr(sub_klass, ss_offset);
       
   245   Address super_cache_addr(     sub_klass, sc_offset);
       
   246 
       
   247 #ifndef PRODUCT
       
   248   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
       
   249 #endif
       
   250 
       
   251   // We will consult the secondary-super array.
       
   252   ldr(scan_temp, Address(sub_klass, ss_offset));
       
   253 
       
   254   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
       
   255   // else search_key is the
       
   256   Register search_key = super_klass;
       
   257 
       
   258   // Load the array length.
       
   259   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
       
   260   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
       
   261 
       
   262   add(count_temp, count_temp, 1);
       
   263 
       
   264   Label L_loop, L_setnz_and_fail, L_fail;
       
   265 
       
   266   // Top of search loop
       
   267   bind(L_loop);
       
   268   // Notes:
       
   269   //  scan_temp starts at the array elements
       
   270   //  count_temp is 1+size
       
   271   subs(count_temp, count_temp, 1);
       
   272   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
       
   273     // direct jump to L_failure if failed and no cleanup needed
       
   274     b(*L_failure, eq); // not found and
       
   275   } else {
       
   276     b(L_fail, eq); // not found in the array
       
   277   }
       
   278 
       
   279   // Load next super to check
       
   280   // In the array of super classes elements are pointer sized.
       
   281   int element_size = wordSize;
       
   282   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
       
   283 
       
   284   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
       
   285   subs(cmp_temp, cmp_temp, search_key);
       
   286 
       
   287   // A miss means we are NOT a subtype and need to keep looping
       
   288   b(L_loop, ne);
       
   289 
       
   290   // Falling out the bottom means we found a hit; we ARE a subtype
       
   291 
       
   292   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
       
   293 
       
   294   // Success.  Cache the super we found and proceed in triumph.
       
   295   str(super_klass, Address(sub_klass, sc_offset));
       
   296 
       
   297   if (saved_reg != noreg) {
       
   298     // Return success
       
   299     pop(saved_reg);
       
   300   }
       
   301 
       
   302   b(*L_success);
       
   303 
       
   304   bind(L_fail);
       
   305   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
       
   306   if (set_cond_codes) {
       
   307     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
       
   308   }
       
   309   if (saved_reg != noreg) {
       
   310     pop(saved_reg);
       
   311   }
       
   312   if (L_failure != &L_fallthrough) {
       
   313     b(*L_failure);
       
   314   }
       
   315 
       
   316   bind(L_fallthrough);
       
   317 #endif
       
   318 }
       
   319 
       
   320 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
       
   321 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
       
   322   assert_different_registers(params_base, params_count);
       
   323   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
       
   324   return Address(tmp, -Interpreter::stackElementSize);
       
   325 }
       
   326 
       
   327 
       
   328 void MacroAssembler::align(int modulus) {
       
   329   while (offset() % modulus != 0) {
       
   330     nop();
       
   331   }
       
   332 }
       
   333 
       
   334 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
       
   335                                         Register last_java_fp,
       
   336                                         bool save_last_java_pc,
       
   337                                         Register tmp) {
       
   338   int pc_offset;
       
   339   if (last_java_fp != noreg) {
       
   340     // optional
       
   341     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
       
   342     _fp_saved = true;
       
   343   } else {
       
   344     _fp_saved = false;
       
   345   }
       
   346   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
       
   347 #ifdef AARCH64
       
   348     pc_offset = mov_pc_to(tmp);
       
   349     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
       
   350 #else
       
   351     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
       
   352     pc_offset = offset() + VM_Version::stored_pc_adjustment();
       
   353 #endif
       
   354     _pc_saved = true;
       
   355   } else {
       
   356     _pc_saved = false;
       
   357     pc_offset = -1;
       
   358   }
       
   359   // According to comment in javaFrameAnchorm SP must be saved last, so that other
       
   360   // entries are valid when SP is set.
       
   361 
       
   362   // However, this is probably not a strong constrainst since for instance PC is
       
   363   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
       
   364   // we now write the fields in the expected order but we have not added a StoreStore
       
   365   // barrier.
       
   366 
       
   367   // XXX: if the ordering is really important, PC should always be saved (without forgetting
       
   368   // to update oop_map offsets) and a StoreStore barrier might be needed.
       
   369 
       
   370   if (last_java_sp == noreg) {
       
   371     last_java_sp = SP; // always saved
       
   372   }
       
   373 #ifdef AARCH64
       
   374   if (last_java_sp == SP) {
       
   375     mov(tmp, SP);
       
   376     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
       
   377   } else {
       
   378     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
       
   379   }
       
   380 #else
       
   381   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
       
   382 #endif
       
   383 
       
   384   return pc_offset; // for oopmaps
       
   385 }
       
   386 
       
   387 void MacroAssembler::reset_last_Java_frame(Register tmp) {
       
   388   const Register Rzero = zero_register(tmp);
       
   389   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
       
   390   if (_fp_saved) {
       
   391     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
       
   392   }
       
   393   if (_pc_saved) {
       
   394     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
       
   395   }
       
   396 }
       
   397 
       
   398 
       
   399 // Implementation of call_VM versions
       
   400 
       
   401 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
       
   402   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
       
   403   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
       
   404 
       
   405 #ifndef AARCH64
       
   406   // Safer to save R9 here since callers may have been written
       
   407   // assuming R9 survives. This is suboptimal but is not worth
       
   408   // optimizing for the few platforms where R9 is scratched.
       
   409   push(RegisterSet(R4) | R9ifScratched);
       
   410   mov(R4, SP);
       
   411   bic(SP, SP, StackAlignmentInBytes - 1);
       
   412 #endif // AARCH64
       
   413   call(entry_point, relocInfo::runtime_call_type);
       
   414 #ifndef AARCH64
       
   415   mov(SP, R4);
       
   416   pop(RegisterSet(R4) | R9ifScratched);
       
   417 #endif // AARCH64
       
   418 }
       
   419 
       
   420 
       
   421 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
       
   422   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
       
   423   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
       
   424 
       
   425   const Register tmp = Rtemp;
       
   426   assert_different_registers(oop_result, tmp);
       
   427 
       
   428   set_last_Java_frame(SP, FP, true, tmp);
       
   429 
       
   430 #ifdef ASSERT
       
   431   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
       
   432 #endif // ASSERT
       
   433 
       
   434 #ifndef AARCH64
       
   435 #if R9_IS_SCRATCHED
       
   436   // Safer to save R9 here since callers may have been written
       
   437   // assuming R9 survives. This is suboptimal but is not worth
       
   438   // optimizing for the few platforms where R9 is scratched.
       
   439 
       
   440   // Note: cannot save R9 above the saved SP (some calls expect for
       
   441   // instance the Java stack top at the saved SP)
       
   442   // => once saved (with set_last_Java_frame), decrease SP before rounding to
       
   443   // ensure the slot at SP will be free for R9).
       
   444   sub(SP, SP, 4);
       
   445   bic(SP, SP, StackAlignmentInBytes - 1);
       
   446   str(R9, Address(SP, 0));
       
   447 #else
       
   448   bic(SP, SP, StackAlignmentInBytes - 1);
       
   449 #endif // R9_IS_SCRATCHED
       
   450 #endif
       
   451 
       
   452   mov(R0, Rthread);
       
   453   call(entry_point, relocInfo::runtime_call_type);
       
   454 
       
   455 #ifndef AARCH64
       
   456 #if R9_IS_SCRATCHED
       
   457   ldr(R9, Address(SP, 0));
       
   458 #endif
       
   459   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
       
   460 #endif
       
   461 
       
   462   reset_last_Java_frame(tmp);
       
   463 
       
   464   // C++ interp handles this in the interpreter
       
   465   check_and_handle_popframe();
       
   466   check_and_handle_earlyret();
       
   467 
       
   468   if (check_exceptions) {
       
   469     // check for pending exceptions
       
   470     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
       
   471 #ifdef AARCH64
       
   472     Label L;
       
   473     cbz(tmp, L);
       
   474     mov_pc_to(Rexception_pc);
       
   475     b(StubRoutines::forward_exception_entry());
       
   476     bind(L);
       
   477 #else
       
   478     cmp(tmp, 0);
       
   479     mov(Rexception_pc, PC, ne);
       
   480     b(StubRoutines::forward_exception_entry(), ne);
       
   481 #endif // AARCH64
       
   482   }
       
   483 
       
   484   // get oop result if there is one and reset the value in the thread
       
   485   if (oop_result->is_valid()) {
       
   486     get_vm_result(oop_result, tmp);
       
   487   }
       
   488 }
       
   489 
       
   490 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
       
   491   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
       
   492 }
       
   493 
       
   494 
       
   495 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
       
   496   assert (arg_1 == R1, "fixed register for arg_1");
       
   497   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
       
   498 }
       
   499 
       
   500 
       
   501 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
       
   502   assert (arg_1 == R1, "fixed register for arg_1");
       
   503   assert (arg_2 == R2, "fixed register for arg_2");
       
   504   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
       
   505 }
       
   506 
       
   507 
       
   508 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
       
   509   assert (arg_1 == R1, "fixed register for arg_1");
       
   510   assert (arg_2 == R2, "fixed register for arg_2");
       
   511   assert (arg_3 == R3, "fixed register for arg_3");
       
   512   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
       
   513 }
       
   514 
       
   515 
       
   516 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
       
   517   // Not used on ARM
       
   518   Unimplemented();
       
   519 }
       
   520 
       
   521 
       
   522 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
       
   523   // Not used on ARM
       
   524   Unimplemented();
       
   525 }
       
   526 
       
   527 
       
   528 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
       
   529 // Not used on ARM
       
   530   Unimplemented();
       
   531 }
       
   532 
       
   533 
       
   534 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
       
   535   // Not used on ARM
       
   536   Unimplemented();
       
   537 }
       
   538 
       
   539 // Raw call, without saving/restoring registers, exception handling, etc.
       
   540 // Mainly used from various stubs.
       
   541 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
       
   542   const Register tmp = Rtemp; // Rtemp free since scratched by call
       
   543   set_last_Java_frame(SP, FP, true, tmp);
       
   544 #if R9_IS_SCRATCHED
       
   545   if (save_R9_if_scratched) {
       
   546     // Note: Saving also R10 for alignment.
       
   547     push(RegisterSet(R9, R10));
       
   548   }
       
   549 #endif
       
   550   mov(R0, Rthread);
       
   551   call(entry_point, relocInfo::runtime_call_type);
       
   552 #if R9_IS_SCRATCHED
       
   553   if (save_R9_if_scratched) {
       
   554     pop(RegisterSet(R9, R10));
       
   555   }
       
   556 #endif
       
   557   reset_last_Java_frame(tmp);
       
   558 }
       
   559 
       
   560 void MacroAssembler::call_VM_leaf(address entry_point) {
       
   561   call_VM_leaf_helper(entry_point, 0);
       
   562 }
       
   563 
       
   564 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
       
   565   assert (arg_1 == R0, "fixed register for arg_1");
       
   566   call_VM_leaf_helper(entry_point, 1);
       
   567 }
       
   568 
       
   569 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
       
   570   assert (arg_1 == R0, "fixed register for arg_1");
       
   571   assert (arg_2 == R1, "fixed register for arg_2");
       
   572   call_VM_leaf_helper(entry_point, 2);
       
   573 }
       
   574 
       
   575 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
       
   576   assert (arg_1 == R0, "fixed register for arg_1");
       
   577   assert (arg_2 == R1, "fixed register for arg_2");
       
   578   assert (arg_3 == R2, "fixed register for arg_3");
       
   579   call_VM_leaf_helper(entry_point, 3);
       
   580 }
       
   581 
       
   582 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
       
   583   assert (arg_1 == R0, "fixed register for arg_1");
       
   584   assert (arg_2 == R1, "fixed register for arg_2");
       
   585   assert (arg_3 == R2, "fixed register for arg_3");
       
   586   assert (arg_4 == R3, "fixed register for arg_4");
       
   587   call_VM_leaf_helper(entry_point, 4);
       
   588 }
       
   589 
       
   590 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
       
   591   assert_different_registers(oop_result, tmp);
       
   592   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
       
   593   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
       
   594   verify_oop(oop_result);
       
   595 }
       
   596 
       
   597 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
       
   598   assert_different_registers(metadata_result, tmp);
       
   599   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
       
   600   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
       
   601 }
       
   602 
       
   603 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
       
   604   if (arg2.is_register()) {
       
   605     add(dst, arg1, arg2.as_register());
       
   606   } else {
       
   607     add(dst, arg1, arg2.as_constant());
       
   608   }
       
   609 }
       
   610 
       
   611 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
       
   612 #ifdef AARCH64
       
   613   if (c == 0) {
       
   614     if (rd != rn) {
       
   615       mov(rd, rn);
       
   616     }
       
   617     return;
       
   618   }
       
   619   if (c < 0) {
       
   620     sub_slow(rd, rn, -c);
       
   621     return;
       
   622   }
       
   623   if (c > right_n_bits(24)) {
       
   624     guarantee(rd != rn, "no large add_slow with only one register");
       
   625     mov_slow(rd, c);
       
   626     add(rd, rn, rd);
       
   627   } else {
       
   628     int lo = c & right_n_bits(12);
       
   629     int hi = (c >> 12) & right_n_bits(12);
       
   630     if (lo != 0) {
       
   631       add(rd, rn, lo, lsl0);
       
   632     }
       
   633     if (hi != 0) {
       
   634       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
       
   635     }
       
   636   }
       
   637 #else
       
   638   // This function is used in compiler for handling large frame offsets
       
   639   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
       
   640     return sub(rd, rn, (-c));
       
   641   }
       
   642   int low = c & 0x3fc;
       
   643   if (low != 0) {
       
   644     add(rd, rn, low);
       
   645     rn = rd;
       
   646   }
       
   647   if (c & ~0x3fc) {
       
   648     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
       
   649     add(rd, rn, c & ~0x3fc);
       
   650   } else if (rd != rn) {
       
   651     assert(c == 0, "");
       
   652     mov(rd, rn); // need to generate at least one move!
       
   653   }
       
   654 #endif // AARCH64
       
   655 }
       
   656 
       
   657 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
       
   658 #ifdef AARCH64
       
   659   if (c <= 0) {
       
   660     add_slow(rd, rn, -c);
       
   661     return;
       
   662   }
       
   663   if (c > right_n_bits(24)) {
       
   664     guarantee(rd != rn, "no large sub_slow with only one register");
       
   665     mov_slow(rd, c);
       
   666     sub(rd, rn, rd);
       
   667   } else {
       
   668     int lo = c & right_n_bits(12);
       
   669     int hi = (c >> 12) & right_n_bits(12);
       
   670     if (lo != 0) {
       
   671       sub(rd, rn, lo, lsl0);
       
   672     }
       
   673     if (hi != 0) {
       
   674       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
       
   675     }
       
   676   }
       
   677 #else
       
   678   // This function is used in compiler for handling large frame offsets
       
   679   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
       
   680     return add(rd, rn, (-c));
       
   681   }
       
   682   int low = c & 0x3fc;
       
   683   if (low != 0) {
       
   684     sub(rd, rn, low);
       
   685     rn = rd;
       
   686   }
       
   687   if (c & ~0x3fc) {
       
   688     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
       
   689     sub(rd, rn, c & ~0x3fc);
       
   690   } else if (rd != rn) {
       
   691     assert(c == 0, "");
       
   692     mov(rd, rn); // need to generate at least one move!
       
   693   }
       
   694 #endif // AARCH64
       
   695 }
       
   696 
       
   697 void MacroAssembler::mov_slow(Register rd, address addr) {
       
   698   // do *not* call the non relocated mov_related_address
       
   699   mov_slow(rd, (intptr_t)addr);
       
   700 }
       
   701 
       
   702 void MacroAssembler::mov_slow(Register rd, const char *str) {
       
   703   mov_slow(rd, (intptr_t)str);
       
   704 }
       
   705 
       
   706 #ifdef AARCH64
       
   707 
       
   708 // Common code for mov_slow and instr_count_for_mov_slow.
       
   709 // Returns number of instructions of mov_slow pattern,
       
   710 // generating it if non-null MacroAssembler is given.
       
   711 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
       
   712   // This code pattern is matched in NativeIntruction::is_mov_slow.
       
   713   // Update it at modifications.
       
   714 
       
   715   const intx mask = right_n_bits(16);
       
   716   // 1 movz instruction
       
   717   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
       
   718     if ((c & ~(mask << base_shift)) == 0) {
       
   719       if (masm != NULL) {
       
   720         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
       
   721       }
       
   722       return 1;
       
   723     }
       
   724   }
       
   725   // 1 movn instruction
       
   726   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
       
   727     if (((~c) & ~(mask << base_shift)) == 0) {
       
   728       if (masm != NULL) {
       
   729         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
       
   730       }
       
   731       return 1;
       
   732     }
       
   733   }
       
   734   // 1 orr instruction
       
   735   {
       
   736     LogicalImmediate imm(c, false);
       
   737     if (imm.is_encoded()) {
       
   738       if (masm != NULL) {
       
   739         masm->orr(rd, ZR, imm);
       
   740       }
       
   741       return 1;
       
   742     }
       
   743   }
       
   744   // 1 movz/movn + up to 3 movk instructions
       
   745   int zeroes = 0;
       
   746   int ones = 0;
       
   747   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
       
   748     int part = (c >> base_shift) & mask;
       
   749     if (part == 0) {
       
   750       ++zeroes;
       
   751     } else if (part == mask) {
       
   752       ++ones;
       
   753     }
       
   754   }
       
   755   int def_bits = 0;
       
   756   if (ones > zeroes) {
       
   757     def_bits = mask;
       
   758   }
       
   759   int inst_count = 0;
       
   760   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
       
   761     int part = (c >> base_shift) & mask;
       
   762     if (part != def_bits) {
       
   763       if (masm != NULL) {
       
   764         if (inst_count > 0) {
       
   765           masm->movk(rd, part, base_shift);
       
   766         } else {
       
   767           if (def_bits == 0) {
       
   768             masm->movz(rd, part, base_shift);
       
   769           } else {
       
   770             masm->movn(rd, ~part & mask, base_shift);
       
   771           }
       
   772         }
       
   773       }
       
   774       inst_count++;
       
   775     }
       
   776   }
       
   777   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
       
   778   return inst_count;
       
   779 }
       
   780 
       
   781 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
       
   782 #ifdef ASSERT
       
   783   int off = offset();
       
   784 #endif
       
   785   (void) mov_slow_helper(rd, c, this);
       
   786   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
       
   787 }
       
   788 
       
   789 // Counts instructions generated by mov_slow(rd, c).
       
   790 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
       
   791   return mov_slow_helper(noreg, c, NULL);
       
   792 }
       
   793 
       
   794 int MacroAssembler::instr_count_for_mov_slow(address c) {
       
   795   return mov_slow_helper(noreg, (intptr_t)c, NULL);
       
   796 }
       
   797 
       
   798 #else
       
   799 
       
   800 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
       
   801   if (AsmOperand::is_rotated_imm(c)) {
       
   802     mov(rd, c, cond);
       
   803   } else if (AsmOperand::is_rotated_imm(~c)) {
       
   804     mvn(rd, ~c, cond);
       
   805   } else if (VM_Version::supports_movw()) {
       
   806     movw(rd, c & 0xffff, cond);
       
   807     if ((unsigned int)c >> 16) {
       
   808       movt(rd, (unsigned int)c >> 16, cond);
       
   809     }
       
   810   } else {
       
   811     // Find first non-zero bit
       
   812     int shift = 0;
       
   813     while ((c & (3 << shift)) == 0) {
       
   814       shift += 2;
       
   815     }
       
   816     // Put the least significant part of the constant
       
   817     int mask = 0xff << shift;
       
   818     mov(rd, c & mask, cond);
       
   819     // Add up to 3 other parts of the constant;
       
   820     // each of them can be represented as rotated_imm
       
   821     if (c & (mask << 8)) {
       
   822       orr(rd, rd, c & (mask << 8), cond);
       
   823     }
       
   824     if (c & (mask << 16)) {
       
   825       orr(rd, rd, c & (mask << 16), cond);
       
   826     }
       
   827     if (c & (mask << 24)) {
       
   828       orr(rd, rd, c & (mask << 24), cond);
       
   829     }
       
   830   }
       
   831 }
       
   832 
       
   833 #endif // AARCH64
       
   834 
       
   835 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
       
   836 #ifdef AARCH64
       
   837                              bool patchable
       
   838 #else
       
   839                              AsmCondition cond
       
   840 #endif
       
   841                              ) {
       
   842 
       
   843   if (o == NULL) {
       
   844 #ifdef AARCH64
       
   845     if (patchable) {
       
   846       nop();
       
   847     }
       
   848     mov(rd, ZR);
       
   849 #else
       
   850     mov(rd, 0, cond);
       
   851 #endif
       
   852     return;
       
   853   }
       
   854 
       
   855   if (oop_index == 0) {
       
   856     oop_index = oop_recorder()->allocate_oop_index(o);
       
   857   }
       
   858   relocate(oop_Relocation::spec(oop_index));
       
   859 
       
   860 #ifdef AARCH64
       
   861   if (patchable) {
       
   862     nop();
       
   863   }
       
   864   ldr(rd, pc());
       
   865 #else
       
   866   if (VM_Version::supports_movw()) {
       
   867     movw(rd, 0, cond);
       
   868     movt(rd, 0, cond);
       
   869   } else {
       
   870     ldr(rd, Address(PC), cond);
       
   871     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
       
   872     nop();
       
   873   }
       
   874 #endif
       
   875 }
       
   876 
       
   877 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
       
   878   if (o == NULL) {
       
   879 #ifdef AARCH64
       
   880     if (patchable) {
       
   881       nop();
       
   882     }
       
   883 #endif
       
   884     mov(rd, 0);
       
   885     return;
       
   886   }
       
   887 
       
   888   if (metadata_index == 0) {
       
   889     metadata_index = oop_recorder()->allocate_metadata_index(o);
       
   890   }
       
   891   relocate(metadata_Relocation::spec(metadata_index));
       
   892 
       
   893 #ifdef AARCH64
       
   894   if (patchable) {
       
   895     nop();
       
   896   }
       
   897 #ifdef COMPILER2
       
   898   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
       
   899     mov_slow(rd, (address)o);
       
   900     return;
       
   901   }
       
   902 #endif
       
   903   ldr(rd, pc());
       
   904 #else
       
   905   if (VM_Version::supports_movw()) {
       
   906     movw(rd, ((int)o) & 0xffff);
       
   907     movt(rd, (unsigned int)o >> 16);
       
   908   } else {
       
   909     ldr(rd, Address(PC));
       
   910     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
       
   911     nop();
       
   912   }
       
   913 #endif // AARCH64
       
   914 }
       
   915 
       
   916 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
       
   917   Label skip_constant;
       
   918   union {
       
   919     jfloat f;
       
   920     jint i;
       
   921   } accessor;
       
   922   accessor.f = c;
       
   923 
       
   924 #ifdef AARCH64
       
   925   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
       
   926   Label L;
       
   927   ldr_s(fd, target(L));
       
   928   b(skip_constant);
       
   929   bind(L);
       
   930   emit_int32(accessor.i);
       
   931   bind(skip_constant);
       
   932 #else
       
   933   flds(fd, Address(PC), cond);
       
   934   b(skip_constant);
       
   935   emit_int32(accessor.i);
       
   936   bind(skip_constant);
       
   937 #endif // AARCH64
       
   938 }
       
   939 
       
   940 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
       
   941   Label skip_constant;
       
   942   union {
       
   943     jdouble d;
       
   944     jint i[2];
       
   945   } accessor;
       
   946   accessor.d = c;
       
   947 
       
   948 #ifdef AARCH64
       
   949   // TODO-AARCH64 - try to optimize loading of double constants with fmov
       
   950   Label L;
       
   951   ldr_d(fd, target(L));
       
   952   b(skip_constant);
       
   953   align(wordSize);
       
   954   bind(L);
       
   955   emit_int32(accessor.i[0]);
       
   956   emit_int32(accessor.i[1]);
       
   957   bind(skip_constant);
       
   958 #else
       
   959   fldd(fd, Address(PC), cond);
       
   960   b(skip_constant);
       
   961   emit_int32(accessor.i[0]);
       
   962   emit_int32(accessor.i[1]);
       
   963   bind(skip_constant);
       
   964 #endif // AARCH64
       
   965 }
       
   966 
       
   967 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
       
   968   intptr_t addr = (intptr_t) address_of_global;
       
   969 #ifdef AARCH64
       
   970   assert((addr & 0x3) == 0, "address should be aligned");
       
   971 
       
   972   // FIXME: TODO
       
   973   if (false && page_reachable_from_cache(address_of_global)) {
       
   974     assert(false,"TODO: relocate");
       
   975     //relocate();
       
   976     adrp(reg, address_of_global);
       
   977     ldrsw(reg, Address(reg, addr & 0xfff));
       
   978   } else {
       
   979     mov_slow(reg, addr & ~0x3fff);
       
   980     ldrsw(reg, Address(reg, addr & 0x3fff));
       
   981   }
       
   982 #else
       
   983   mov_slow(reg, addr & ~0xfff);
       
   984   ldr(reg, Address(reg, addr & 0xfff));
       
   985 #endif
       
   986 }
       
   987 
       
   988 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
       
   989 #ifdef AARCH64
       
   990   intptr_t addr = (intptr_t) address_of_global;
       
   991   assert ((addr & 0x7) == 0, "address should be aligned");
       
   992   mov_slow(reg, addr & ~0x7fff);
       
   993   ldr(reg, Address(reg, addr & 0x7fff));
       
   994 #else
       
   995   ldr_global_s32(reg, address_of_global);
       
   996 #endif
       
   997 }
       
   998 
       
   999 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
       
  1000   intptr_t addr = (intptr_t) address_of_global;
       
  1001   mov_slow(reg, addr & ~0xfff);
       
  1002   ldrb(reg, Address(reg, addr & 0xfff));
       
  1003 }
       
  1004 
       
  1005 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
       
  1006 #ifdef AARCH64
       
  1007   switch (bits) {
       
  1008     case  8: uxtb(rd, rn); break;
       
  1009     case 16: uxth(rd, rn); break;
       
  1010     case 32: mov_w(rd, rn); break;
       
  1011     default: ShouldNotReachHere();
       
  1012   }
       
  1013 #else
       
  1014   if (bits <= 8) {
       
  1015     andr(rd, rn, (1 << bits) - 1);
       
  1016   } else if (bits >= 24) {
       
  1017     bic(rd, rn, -1 << bits);
       
  1018   } else {
       
  1019     mov(rd, AsmOperand(rn, lsl, 32 - bits));
       
  1020     mov(rd, AsmOperand(rd, lsr, 32 - bits));
       
  1021   }
       
  1022 #endif
       
  1023 }
       
  1024 
       
  1025 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
       
  1026 #ifdef AARCH64
       
  1027   switch (bits) {
       
  1028     case  8: sxtb(rd, rn); break;
       
  1029     case 16: sxth(rd, rn); break;
       
  1030     case 32: sxtw(rd, rn); break;
       
  1031     default: ShouldNotReachHere();
       
  1032   }
       
  1033 #else
       
  1034   mov(rd, AsmOperand(rn, lsl, 32 - bits));
       
  1035   mov(rd, AsmOperand(rd, asr, 32 - bits));
       
  1036 #endif
       
  1037 }
       
  1038 
       
  1039 #ifndef AARCH64
       
  1040 
       
  1041 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
       
  1042                                Register rn_lo, Register rn_hi,
       
  1043                                AsmCondition cond) {
       
  1044   if (rd_lo != rn_hi) {
       
  1045     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
       
  1046     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
       
  1047   } else if (rd_hi != rn_lo) {
       
  1048     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
       
  1049     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
       
  1050   } else {
       
  1051     eor(rd_lo, rd_hi, rd_lo, cond);
       
  1052     eor(rd_hi, rd_lo, rd_hi, cond);
       
  1053     eor(rd_lo, rd_hi, rd_lo, cond);
       
  1054   }
       
  1055 }
       
  1056 
       
  1057 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
       
  1058                                 Register rn_lo, Register rn_hi,
       
  1059                                 AsmShift shift, Register count) {
       
  1060   Register tmp;
       
  1061   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
       
  1062     tmp = rd_lo;
       
  1063   } else {
       
  1064     tmp = rd_hi;
       
  1065   }
       
  1066   assert_different_registers(tmp, count, rn_lo, rn_hi);
       
  1067 
       
  1068   subs(tmp, count, 32);
       
  1069   if (shift == lsl) {
       
  1070     assert_different_registers(rd_hi, rn_lo);
       
  1071     assert_different_registers(count, rd_hi);
       
  1072     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
       
  1073     rsb(tmp, count, 32, mi);
       
  1074     if (rd_hi == rn_hi) {
       
  1075       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
       
  1076       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
       
  1077     } else {
       
  1078       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
       
  1079       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
       
  1080     }
       
  1081     mov(rd_lo, AsmOperand(rn_lo, shift, count));
       
  1082   } else {
       
  1083     assert_different_registers(rd_lo, rn_hi);
       
  1084     assert_different_registers(rd_lo, count);
       
  1085     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
       
  1086     rsb(tmp, count, 32, mi);
       
  1087     if (rd_lo == rn_lo) {
       
  1088       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
       
  1089       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
       
  1090     } else {
       
  1091       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
       
  1092       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
       
  1093     }
       
  1094     mov(rd_hi, AsmOperand(rn_hi, shift, count));
       
  1095   }
       
  1096 }
       
  1097 
       
  1098 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
       
  1099                                 Register rn_lo, Register rn_hi,
       
  1100                                 AsmShift shift, int count) {
       
  1101   assert(count != 0 && (count & ~63) == 0, "must be");
       
  1102 
       
  1103   if (shift == lsl) {
       
  1104     assert_different_registers(rd_hi, rn_lo);
       
  1105     if (count >= 32) {
       
  1106       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
       
  1107       mov(rd_lo, 0);
       
  1108     } else {
       
  1109       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
       
  1110       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
       
  1111       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
       
  1112     }
       
  1113   } else {
       
  1114     assert_different_registers(rd_lo, rn_hi);
       
  1115     if (count >= 32) {
       
  1116       if (count == 32) {
       
  1117         mov(rd_lo, rn_hi);
       
  1118       } else {
       
  1119         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
       
  1120       }
       
  1121       if (shift == asr) {
       
  1122         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
       
  1123       } else {
       
  1124         mov(rd_hi, 0);
       
  1125       }
       
  1126     } else {
       
  1127       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
       
  1128       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
       
  1129       mov(rd_hi, AsmOperand(rn_hi, shift, count));
       
  1130     }
       
  1131   }
       
  1132 }
       
  1133 #endif // !AARCH64
       
  1134 
       
  1135 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
       
  1136   // This code pattern is matched in NativeIntruction::skip_verify_oop.
       
  1137   // Update it at modifications.
       
  1138   if (!VerifyOops) return;
       
  1139 
       
  1140   char buffer[64];
       
  1141 #ifdef COMPILER1
       
  1142   if (CommentedAssembly) {
       
  1143     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
       
  1144     block_comment(buffer);
       
  1145   }
       
  1146 #endif
       
  1147   const char* msg_buffer = NULL;
       
  1148   {
       
  1149     ResourceMark rm;
       
  1150     stringStream ss;
       
  1151     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
       
  1152     msg_buffer = code_string(ss.as_string());
       
  1153   }
       
  1154 
       
  1155   save_all_registers();
       
  1156 
       
  1157   if (reg != R2) {
       
  1158       mov(R2, reg);                              // oop to verify
       
  1159   }
       
  1160   mov(R1, SP);                                   // register save area
       
  1161 
       
  1162   Label done;
       
  1163   InlinedString Lmsg(msg_buffer);
       
  1164   ldr_literal(R0, Lmsg);                         // message
       
  1165 
       
  1166   // call indirectly to solve generation ordering problem
       
  1167   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
       
  1168   call(Rtemp);
       
  1169 
       
  1170   restore_all_registers();
       
  1171 
       
  1172   b(done);
       
  1173 #ifdef COMPILER2
       
  1174   int off = offset();
       
  1175 #endif
       
  1176   bind_literal(Lmsg);
       
  1177 #ifdef COMPILER2
       
  1178   if (offset() - off == 1 * wordSize) {
       
  1179     // no padding, so insert nop for worst-case sizing
       
  1180     nop();
       
  1181   }
       
  1182 #endif
       
  1183   bind(done);
       
  1184 }
       
  1185 
       
  1186 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
       
  1187   if (!VerifyOops) return;
       
  1188 
       
  1189   const char* msg_buffer = NULL;
       
  1190   {
       
  1191     ResourceMark rm;
       
  1192     stringStream ss;
       
  1193     if ((addr.base() == SP) && (addr.index()==noreg)) {
       
  1194       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
       
  1195     } else {
       
  1196       ss.print("verify_oop_addr: %s", s);
       
  1197     }
       
  1198     ss.print(" (%s:%d)", file, line);
       
  1199     msg_buffer = code_string(ss.as_string());
       
  1200   }
       
  1201 
       
  1202   int push_size = save_all_registers();
       
  1203 
       
  1204   if (addr.base() == SP) {
       
  1205     // computes an addr that takes into account the push
       
  1206     if (addr.index() != noreg) {
       
  1207       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
       
  1208       add(new_base, SP, push_size);
       
  1209       addr = addr.rebase(new_base);
       
  1210     } else {
       
  1211       addr = addr.plus_disp(push_size);
       
  1212     }
       
  1213   }
       
  1214 
       
  1215   ldr(R2, addr);                                 // oop to verify
       
  1216   mov(R1, SP);                                   // register save area
       
  1217 
       
  1218   Label done;
       
  1219   InlinedString Lmsg(msg_buffer);
       
  1220   ldr_literal(R0, Lmsg);                         // message
       
  1221 
       
  1222   // call indirectly to solve generation ordering problem
       
  1223   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
       
  1224   call(Rtemp);
       
  1225 
       
  1226   restore_all_registers();
       
  1227 
       
  1228   b(done);
       
  1229   bind_literal(Lmsg);
       
  1230   bind(done);
       
  1231 }
       
  1232 
       
  1233 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
       
  1234   if (needs_explicit_null_check(offset)) {
       
  1235 #ifdef AARCH64
       
  1236     ldr(ZR, Address(reg));
       
  1237 #else
       
  1238     assert_different_registers(reg, tmp);
       
  1239     if (tmp == noreg) {
       
  1240       tmp = Rtemp;
       
  1241       assert((! Thread::current()->is_Compiler_thread()) ||
       
  1242              (! (ciEnv::current()->task() == NULL)) ||
       
  1243              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
       
  1244              "Rtemp not available in C2"); // explicit tmp register required
       
  1245       // XXX: could we mark the code buffer as not compatible with C2 ?
       
  1246     }
       
  1247     ldr(tmp, Address(reg));
       
  1248 #endif
       
  1249   }
       
  1250 }
       
  1251 
       
  1252 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
       
  1253 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
       
  1254                                  RegisterOrConstant size_expression, Label& slow_case) {
       
  1255   if (!Universe::heap()->supports_inline_contig_alloc()) {
       
  1256     b(slow_case);
       
  1257     return;
       
  1258   }
       
  1259 
       
  1260   CollectedHeap* ch = Universe::heap();
       
  1261 
       
  1262   const Register top_addr = tmp1;
       
  1263   const Register heap_end = tmp2;
       
  1264 
       
  1265   if (size_expression.is_register()) {
       
  1266     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
       
  1267   } else {
       
  1268     assert_different_registers(obj, obj_end, top_addr, heap_end);
       
  1269   }
       
  1270 
       
  1271   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
       
  1272   if (load_const) {
       
  1273     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
       
  1274   } else {
       
  1275     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
       
  1276   }
       
  1277   // Calculate new heap_top by adding the size of the object
       
  1278   Label retry;
       
  1279   bind(retry);
       
  1280 
       
  1281 #ifdef AARCH64
       
  1282   ldxr(obj, top_addr);
       
  1283 #else
       
  1284   ldr(obj, Address(top_addr));
       
  1285 #endif // AARCH64
       
  1286 
       
  1287   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
       
  1288   add_rc(obj_end, obj, size_expression);
       
  1289   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
       
  1290   cmp(obj_end, obj);
       
  1291   b(slow_case, lo);
       
  1292   // Update heap_top if allocation succeeded
       
  1293   cmp(obj_end, heap_end);
       
  1294   b(slow_case, hi);
       
  1295 
       
  1296 #ifdef AARCH64
       
  1297   stxr(heap_end/*scratched*/, obj_end, top_addr);
       
  1298   cbnz_w(heap_end, retry);
       
  1299 #else
       
  1300   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
       
  1301   b(retry, ne);
       
  1302 #endif // AARCH64
       
  1303 }
       
  1304 
       
  1305 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
       
  1306 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
       
  1307                                  RegisterOrConstant size_expression, Label& slow_case) {
       
  1308   const Register tlab_end = tmp1;
       
  1309   assert_different_registers(obj, obj_end, tlab_end);
       
  1310 
       
  1311   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
       
  1312   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
       
  1313   add_rc(obj_end, obj, size_expression);
       
  1314   cmp(obj_end, tlab_end);
       
  1315   b(slow_case, hi);
       
  1316   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
       
  1317 }
       
  1318 
       
  1319 void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2,
       
  1320                                  Register tmp3, Register tmp4,
       
  1321                                Label& try_eden, Label& slow_case) {
       
  1322   if (!Universe::heap()->supports_inline_contig_alloc()) {
       
  1323     b(slow_case);
       
  1324     return;
       
  1325   }
       
  1326 
       
  1327   InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr());
       
  1328   Label discard_tlab, do_refill;
       
  1329   ldr(top,  Address(Rthread, JavaThread::tlab_top_offset()));
       
  1330   ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
       
  1331   ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
       
  1332 
       
  1333   // Calculate amount of free space
       
  1334   sub(tmp1, tmp1, top);
       
  1335   // Retain tlab and allocate in shared space
       
  1336   // if the amount of free space in tlab is too large to discard
       
  1337   cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize));
       
  1338   b(discard_tlab, ge);
       
  1339 
       
  1340   // Increment waste limit to prevent getting stuck on this slow path
       
  1341   mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment());
       
  1342   add(tmp2, tmp2, tmp3);
       
  1343   str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
       
  1344   if (TLABStats) {
       
  1345     ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
       
  1346     add_32(tmp2, tmp2, 1);
       
  1347     str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
       
  1348   }
       
  1349   b(try_eden);
       
  1350   bind_literal(intArrayKlass_addr);
       
  1351 
       
  1352   bind(discard_tlab);
       
  1353   if (TLABStats) {
       
  1354     ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
       
  1355     ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
       
  1356     add_32(tmp2, tmp2, 1);
       
  1357     add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize));
       
  1358     str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
       
  1359     str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
       
  1360   }
       
  1361   // If tlab is currently allocated (top or end != null)
       
  1362   // then fill [top, end + alignment_reserve) with array object
       
  1363   cbz(top, do_refill);
       
  1364 
       
  1365   // Set up the mark word
       
  1366   mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
       
  1367   str(tmp2, Address(top, oopDesc::mark_offset_in_bytes()));
       
  1368   // Set klass to intArrayKlass and the length to the remaining space
       
  1369   ldr_literal(tmp2, intArrayKlass_addr);
       
  1370   add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() -
       
  1371       typeArrayOopDesc::header_size(T_INT) * HeapWordSize);
       
  1372   Register klass = tmp2;
       
  1373   ldr(klass, Address(tmp2));
       
  1374   logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint)
       
  1375   str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes()));
       
  1376   store_klass(klass, top); // blows klass:
       
  1377   klass = noreg;
       
  1378 
       
  1379   ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset()));
       
  1380   sub(tmp1, top, tmp1); // size of tlab's allocated portion
       
  1381   incr_allocated_bytes(tmp1, tmp2);
       
  1382 
       
  1383   bind(do_refill);
       
  1384   // Refill the tlab with an eden allocation
       
  1385   ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset()));
       
  1386   logical_shift_left(tmp4, tmp1, LogHeapWordSize);
       
  1387   eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case);
       
  1388   str(top, Address(Rthread, JavaThread::tlab_start_offset()));
       
  1389   str(top, Address(Rthread, JavaThread::tlab_top_offset()));
       
  1390 
       
  1391 #ifdef ASSERT
       
  1392   // Verify that tmp1 contains tlab_end
       
  1393   ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset()));
       
  1394   add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize));
       
  1395   cmp(tmp1, tmp2);
       
  1396   breakpoint(ne);
       
  1397 #endif
       
  1398 
       
  1399   sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
       
  1400   str(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
       
  1401 
       
  1402   if (ZeroTLAB) {
       
  1403     // clobbers start and tmp
       
  1404     // top must be preserved!
       
  1405     add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
       
  1406     ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset()));
       
  1407     zero_memory(tmp2, tmp1, tmp3);
       
  1408   }
       
  1409 }
       
  1410 
       
  1411 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
       
  1412 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
       
  1413   Label loop;
       
  1414   const Register ptr = start;
       
  1415 
       
  1416 #ifdef AARCH64
       
  1417   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
       
  1418   const Register size = tmp;
       
  1419   Label remaining, done;
       
  1420 
       
  1421   sub(size, end, start);
       
  1422 
       
  1423 #ifdef ASSERT
       
  1424   { Label L;
       
  1425     tst(size, wordSize - 1);
       
  1426     b(L, eq);
       
  1427     stop("size is not a multiple of wordSize");
       
  1428     bind(L);
       
  1429   }
       
  1430 #endif // ASSERT
       
  1431 
       
  1432   subs(size, size, wordSize);
       
  1433   b(remaining, le);
       
  1434 
       
  1435   // Zero by 2 words per iteration.
       
  1436   bind(loop);
       
  1437   subs(size, size, 2*wordSize);
       
  1438   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
       
  1439   b(loop, gt);
       
  1440 
       
  1441   bind(remaining);
       
  1442   b(done, ne);
       
  1443   str(ZR, Address(ptr));
       
  1444   bind(done);
       
  1445 #else
       
  1446   mov(tmp, 0);
       
  1447   bind(loop);
       
  1448   cmp(ptr, end);
       
  1449   str(tmp, Address(ptr, wordSize, post_indexed), lo);
       
  1450   b(loop, lo);
       
  1451 #endif // AARCH64
       
  1452 }
       
  1453 
       
  1454 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
       
  1455 #ifdef AARCH64
       
  1456   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
  1457   add_rc(tmp, tmp, size_in_bytes);
       
  1458   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
  1459 #else
       
  1460   // Bump total bytes allocated by this thread
       
  1461   Label done;
       
  1462 
       
  1463   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
  1464   adds(tmp, tmp, size_in_bytes);
       
  1465   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
       
  1466   b(done, cc);
       
  1467 
       
  1468   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
       
  1469   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
       
  1470   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
       
  1471   Register low, high;
       
  1472   // Select ether R0/R1 or R2/R3
       
  1473 
       
  1474   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
       
  1475     low = R2;
       
  1476     high  = R3;
       
  1477   } else {
       
  1478     low = R0;
       
  1479     high  = R1;
       
  1480   }
       
  1481   push(RegisterSet(low, high));
       
  1482 
       
  1483   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
  1484   adds(low, low, size_in_bytes);
       
  1485   adc(high, high, 0);
       
  1486   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
  1487 
       
  1488   pop(RegisterSet(low, high));
       
  1489 
       
  1490   bind(done);
       
  1491 #endif // AARCH64
       
  1492 }
       
  1493 
       
  1494 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
       
  1495   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
       
  1496   if (UseStackBanging) {
       
  1497     const int page_size = os::vm_page_size();
       
  1498 
       
  1499     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
       
  1500     strb(R0, Address(tmp));
       
  1501 #ifdef AARCH64
       
  1502     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
       
  1503       sub(tmp, tmp, page_size);
       
  1504       strb(R0, Address(tmp));
       
  1505     }
       
  1506 #else
       
  1507     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
       
  1508       strb(R0, Address(tmp, -0xff0, pre_indexed));
       
  1509     }
       
  1510 #endif // AARCH64
       
  1511   }
       
  1512 }
       
  1513 
       
  1514 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
       
  1515   if (UseStackBanging) {
       
  1516     Label loop;
       
  1517 
       
  1518     mov(tmp, SP);
       
  1519     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
       
  1520 #ifdef AARCH64
       
  1521     sub(tmp, tmp, Rsize);
       
  1522     bind(loop);
       
  1523     subs(Rsize, Rsize, os::vm_page_size());
       
  1524     strb(ZR, Address(tmp, Rsize));
       
  1525 #else
       
  1526     bind(loop);
       
  1527     subs(Rsize, Rsize, 0xff0);
       
  1528     strb(R0, Address(tmp, -0xff0, pre_indexed));
       
  1529 #endif // AARCH64
       
  1530     b(loop, hi);
       
  1531   }
       
  1532 }
       
  1533 
       
  1534 void MacroAssembler::stop(const char* msg) {
       
  1535   // This code pattern is matched in NativeIntruction::is_stop.
       
  1536   // Update it at modifications.
       
  1537 #ifdef COMPILER1
       
  1538   if (CommentedAssembly) {
       
  1539     block_comment("stop");
       
  1540   }
       
  1541 #endif
       
  1542 
       
  1543   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
       
  1544   InlinedString Lmsg(msg);
       
  1545 
       
  1546   // save all registers for further inspection
       
  1547   save_all_registers();
       
  1548 
       
  1549   ldr_literal(R0, Lmsg);                     // message
       
  1550   mov(R1, SP);                               // register save area
       
  1551 
       
  1552 #ifdef AARCH64
       
  1553   ldr_literal(Rtemp, Ldebug);
       
  1554   br(Rtemp);
       
  1555 #else
       
  1556   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
       
  1557 #endif // AARCH64
       
  1558 
       
  1559 #if defined(COMPILER2) && defined(AARCH64)
       
  1560   int off = offset();
       
  1561 #endif
       
  1562   bind_literal(Lmsg);
       
  1563   bind_literal(Ldebug);
       
  1564 #if defined(COMPILER2) && defined(AARCH64)
       
  1565   if (offset() - off == 2 * wordSize) {
       
  1566     // no padding, so insert nop for worst-case sizing
       
  1567     nop();
       
  1568   }
       
  1569 #endif
       
  1570 }
       
  1571 
       
  1572 void MacroAssembler::warn(const char* msg) {
       
  1573 #ifdef COMPILER1
       
  1574   if (CommentedAssembly) {
       
  1575     block_comment("warn");
       
  1576   }
       
  1577 #endif
       
  1578 
       
  1579   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
       
  1580   InlinedString Lmsg(msg);
       
  1581   Label done;
       
  1582 
       
  1583   int push_size = save_caller_save_registers();
       
  1584 
       
  1585 #ifdef AARCH64
       
  1586   // TODO-AARCH64 - get rid of extra debug parameters
       
  1587   mov(R1, LR);
       
  1588   mov(R2, FP);
       
  1589   add(R3, SP, push_size);
       
  1590 #endif
       
  1591 
       
  1592   ldr_literal(R0, Lmsg);                    // message
       
  1593   ldr_literal(LR, Lwarn);                   // call warning
       
  1594 
       
  1595   call(LR);
       
  1596 
       
  1597   restore_caller_save_registers();
       
  1598 
       
  1599   b(done);
       
  1600   bind_literal(Lmsg);
       
  1601   bind_literal(Lwarn);
       
  1602   bind(done);
       
  1603 }
       
  1604 
       
  1605 
       
  1606 int MacroAssembler::save_all_registers() {
       
  1607   // This code pattern is matched in NativeIntruction::is_save_all_registers.
       
  1608   // Update it at modifications.
       
  1609 #ifdef AARCH64
       
  1610   const Register tmp = Rtemp;
       
  1611   raw_push(R30, ZR);
       
  1612   for (int i = 28; i >= 0; i -= 2) {
       
  1613       raw_push(as_Register(i), as_Register(i+1));
       
  1614   }
       
  1615   mov_pc_to(tmp);
       
  1616   str(tmp, Address(SP, 31*wordSize));
       
  1617   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
       
  1618   return 32*wordSize;
       
  1619 #else
       
  1620   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
       
  1621   return 15*wordSize;
       
  1622 #endif // AARCH64
       
  1623 }
       
  1624 
       
  1625 void MacroAssembler::restore_all_registers() {
       
  1626 #ifdef AARCH64
       
  1627   for (int i = 0; i <= 28; i += 2) {
       
  1628     raw_pop(as_Register(i), as_Register(i+1));
       
  1629   }
       
  1630   raw_pop(R30, ZR);
       
  1631 #else
       
  1632   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
       
  1633   add(SP, SP, wordSize);                         // discard saved PC
       
  1634 #endif // AARCH64
       
  1635 }
       
  1636 
       
  1637 int MacroAssembler::save_caller_save_registers() {
       
  1638 #ifdef AARCH64
       
  1639   for (int i = 0; i <= 16; i += 2) {
       
  1640     raw_push(as_Register(i), as_Register(i+1));
       
  1641   }
       
  1642   raw_push(R18, LR);
       
  1643   return 20*wordSize;
       
  1644 #else
       
  1645 #if R9_IS_SCRATCHED
       
  1646   // Save also R10 to preserve alignment
       
  1647   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
       
  1648   return 8*wordSize;
       
  1649 #else
       
  1650   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
       
  1651   return 6*wordSize;
       
  1652 #endif
       
  1653 #endif // AARCH64
       
  1654 }
       
  1655 
       
  1656 void MacroAssembler::restore_caller_save_registers() {
       
  1657 #ifdef AARCH64
       
  1658   raw_pop(R18, LR);
       
  1659   for (int i = 16; i >= 0; i -= 2) {
       
  1660     raw_pop(as_Register(i), as_Register(i+1));
       
  1661   }
       
  1662 #else
       
  1663 #if R9_IS_SCRATCHED
       
  1664   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
       
  1665 #else
       
  1666   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
       
  1667 #endif
       
  1668 #endif // AARCH64
       
  1669 }
       
  1670 
       
  1671 void MacroAssembler::debug(const char* msg, const intx* registers) {
       
  1672   // In order to get locks to work, we need to fake a in_VM state
       
  1673   JavaThread* thread = JavaThread::current();
       
  1674   thread->set_thread_state(_thread_in_vm);
       
  1675 
       
  1676   if (ShowMessageBoxOnError) {
       
  1677     ttyLocker ttyl;
       
  1678     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
       
  1679       BytecodeCounter::print();
       
  1680     }
       
  1681     if (os::message_box(msg, "Execution stopped, print registers?")) {
       
  1682 #ifdef AARCH64
       
  1683       // saved registers: R0-R30, PC
       
  1684       const int nregs = 32;
       
  1685 #else
       
  1686       // saved registers: R0-R12, LR, PC
       
  1687       const int nregs = 15;
       
  1688       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
       
  1689 #endif // AARCH64
       
  1690 
       
  1691       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
       
  1692         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
       
  1693       }
       
  1694 
       
  1695 #ifdef AARCH64
       
  1696       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
       
  1697 #endif // AARCH64
       
  1698 
       
  1699       // derive original SP value from the address of register save area
       
  1700       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
       
  1701     }
       
  1702     BREAKPOINT;
       
  1703   } else {
       
  1704     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
       
  1705   }
       
  1706   assert(false, "DEBUG MESSAGE: %s", msg);
       
  1707   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
       
  1708 }
       
  1709 
       
  1710 void MacroAssembler::unimplemented(const char* what) {
       
  1711   const char* buf = NULL;
       
  1712   {
       
  1713     ResourceMark rm;
       
  1714     stringStream ss;
       
  1715     ss.print("unimplemented: %s", what);
       
  1716     buf = code_string(ss.as_string());
       
  1717   }
       
  1718   stop(buf);
       
  1719 }
       
  1720 
       
  1721 
       
  1722 // Implementation of FixedSizeCodeBlock
       
  1723 
       
  1724 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
       
  1725 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
       
  1726 }
       
  1727 
       
  1728 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
       
  1729   if (_enabled) {
       
  1730     address curr_pc = _masm->pc();
       
  1731 
       
  1732     assert(_start < curr_pc, "invalid current pc");
       
  1733     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
       
  1734 
       
  1735     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
       
  1736     for (int i = 0; i < nops_count; i++) {
       
  1737       _masm->nop();
       
  1738     }
       
  1739   }
       
  1740 }
       
  1741 
       
  1742 #ifdef AARCH64
       
  1743 
       
  1744 // Serializes memory.
       
  1745 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
       
  1746 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
       
  1747   if (!os::is_MP()) return;
       
  1748 
       
  1749   // TODO-AARCH64 investigate dsb vs dmb effects
       
  1750   if (order_constraint == StoreStore) {
       
  1751     dmb(DMB_st);
       
  1752   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
       
  1753     dmb(DMB_ld);
       
  1754   } else {
       
  1755     dmb(DMB_all);
       
  1756   }
       
  1757 }
       
  1758 
       
  1759 #else
       
  1760 
       
  1761 // Serializes memory. Potentially blows flags and reg.
       
  1762 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
       
  1763 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
       
  1764 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
       
  1765 void MacroAssembler::membar(Membar_mask_bits order_constraint,
       
  1766                             Register tmp,
       
  1767                             bool preserve_flags,
       
  1768                             Register load_tgt) {
       
  1769   if (!os::is_MP()) return;
       
  1770 
       
  1771   if (order_constraint == StoreStore) {
       
  1772     dmb(DMB_st, tmp);
       
  1773   } else if ((order_constraint & StoreLoad)  ||
       
  1774              (order_constraint & LoadLoad)   ||
       
  1775              (order_constraint & StoreStore) ||
       
  1776              (load_tgt == noreg)             ||
       
  1777              preserve_flags) {
       
  1778     dmb(DMB_all, tmp);
       
  1779   } else {
       
  1780     // LoadStore: speculative stores reordeing is prohibited
       
  1781 
       
  1782     // By providing an ordered load target register, we avoid an extra memory load reference
       
  1783     Label not_taken;
       
  1784     bind(not_taken);
       
  1785     cmp(load_tgt, load_tgt);
       
  1786     b(not_taken, ne);
       
  1787   }
       
  1788 }
       
  1789 
       
  1790 #endif // AARCH64
       
  1791 
       
  1792 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
       
  1793 // on failure, so fall-through can only mean success.
       
  1794 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
       
  1795 // This is only needed for C2, which for some reason does not rety,
       
  1796 // while C1/interpreter does.
       
  1797 // TODO: measure if it makes a difference
       
  1798 
       
  1799 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
       
  1800   Register base, Register tmp, Label &slow_case,
       
  1801   bool allow_fallthrough_on_failure, bool one_shot)
       
  1802 {
       
  1803 
       
  1804   bool fallthrough_is_success = false;
       
  1805 
       
  1806   // ARM Litmus Test example does prefetching here.
       
  1807   // TODO: investigate if it helps performance
       
  1808 
       
  1809   // The last store was to the displaced header, so to prevent
       
  1810   // reordering we must issue a StoreStore or Release barrier before
       
  1811   // the CAS store.
       
  1812 
       
  1813 #ifdef AARCH64
       
  1814 
       
  1815   Register Rscratch = tmp;
       
  1816   Register Roop = base;
       
  1817   Register mark = oldval;
       
  1818   Register Rbox = newval;
       
  1819   Label loop;
       
  1820 
       
  1821   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
       
  1822 
       
  1823   // Instead of StoreStore here, we use store-release-exclusive below
       
  1824 
       
  1825   bind(loop);
       
  1826 
       
  1827   ldaxr(tmp, base);  // acquire
       
  1828   cmp(tmp, oldval);
       
  1829   b(slow_case, ne);
       
  1830   stlxr(tmp, newval, base); // release
       
  1831   if (one_shot) {
       
  1832     cmp_w(tmp, 0);
       
  1833   } else {
       
  1834     cbnz_w(tmp, loop);
       
  1835     fallthrough_is_success = true;
       
  1836   }
       
  1837 
       
  1838   // MemBarAcquireLock would normally go here, but
       
  1839   // we already do ldaxr+stlxr above, which has
       
  1840   // Sequential Consistency
       
  1841 
       
  1842 #else
       
  1843   membar(MacroAssembler::StoreStore, noreg);
       
  1844 
       
  1845   if (one_shot) {
       
  1846     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
       
  1847     cmp(tmp, oldval);
       
  1848     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
       
  1849     cmp(tmp, 0, eq);
       
  1850   } else {
       
  1851     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
       
  1852   }
       
  1853 
       
  1854   // MemBarAcquireLock barrier
       
  1855   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
       
  1856   // but that doesn't prevent a load or store from floating up between
       
  1857   // the load and store in the CAS sequence, so play it safe and
       
  1858   // do a full fence.
       
  1859   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
       
  1860 #endif
       
  1861   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
       
  1862     b(slow_case, ne);
       
  1863   }
       
  1864 }
       
  1865 
       
  1866 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
       
  1867   Register base, Register tmp, Label &slow_case,
       
  1868   bool allow_fallthrough_on_failure, bool one_shot)
       
  1869 {
       
  1870 
       
  1871   bool fallthrough_is_success = false;
       
  1872 
       
  1873   assert_different_registers(oldval,newval,base,tmp);
       
  1874 
       
  1875 #ifdef AARCH64
       
  1876   Label loop;
       
  1877 
       
  1878   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
       
  1879 
       
  1880   bind(loop);
       
  1881   ldxr(tmp, base);
       
  1882   cmp(tmp, oldval);
       
  1883   b(slow_case, ne);
       
  1884   // MemBarReleaseLock barrier
       
  1885   stlxr(tmp, newval, base);
       
  1886   if (one_shot) {
       
  1887     cmp_w(tmp, 0);
       
  1888   } else {
       
  1889     cbnz_w(tmp, loop);
       
  1890     fallthrough_is_success = true;
       
  1891   }
       
  1892 #else
       
  1893   // MemBarReleaseLock barrier
       
  1894   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
       
  1895   // but that doesn't prevent a load or store from floating down between
       
  1896   // the load and store in the CAS sequence, so play it safe and
       
  1897   // do a full fence.
       
  1898   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
       
  1899 
       
  1900   if (one_shot) {
       
  1901     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
       
  1902     cmp(tmp, oldval);
       
  1903     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
       
  1904     cmp(tmp, 0, eq);
       
  1905   } else {
       
  1906     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
       
  1907   }
       
  1908 #endif
       
  1909   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
       
  1910     b(slow_case, ne);
       
  1911   }
       
  1912 
       
  1913   // ExitEnter
       
  1914   // According to JSR-133 Cookbook, this should be StoreLoad, the same
       
  1915   // barrier that follows volatile store.
       
  1916   // TODO: Should be able to remove on armv8 if volatile loads
       
  1917   // use the load-acquire instruction.
       
  1918   membar(StoreLoad, noreg);
       
  1919 }
       
  1920 
       
  1921 #ifndef PRODUCT
       
  1922 
       
  1923 // Preserves flags and all registers.
       
  1924 // On SMP the updated value might not be visible to external observers without a sychronization barrier
       
  1925 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
       
  1926   if (counter_addr != NULL) {
       
  1927     InlinedAddress counter_addr_literal((address)counter_addr);
       
  1928     Label done, retry;
       
  1929     if (cond != al) {
       
  1930       b(done, inverse(cond));
       
  1931     }
       
  1932 
       
  1933 #ifdef AARCH64
       
  1934     raw_push(R0, R1);
       
  1935     raw_push(R2, ZR);
       
  1936 
       
  1937     ldr_literal(R0, counter_addr_literal);
       
  1938 
       
  1939     bind(retry);
       
  1940     ldxr_w(R1, R0);
       
  1941     add_w(R1, R1, 1);
       
  1942     stxr_w(R2, R1, R0);
       
  1943     cbnz_w(R2, retry);
       
  1944 
       
  1945     raw_pop(R2, ZR);
       
  1946     raw_pop(R0, R1);
       
  1947 #else
       
  1948     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
       
  1949     ldr_literal(R0, counter_addr_literal);
       
  1950 
       
  1951     mrs(CPSR, Rtemp);
       
  1952 
       
  1953     bind(retry);
       
  1954     ldr_s32(R1, Address(R0));
       
  1955     add(R2, R1, 1);
       
  1956     atomic_cas_bool(R1, R2, R0, 0, R3);
       
  1957     b(retry, ne);
       
  1958 
       
  1959     msr(CPSR_fsxc, Rtemp);
       
  1960 
       
  1961     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
       
  1962 #endif // AARCH64
       
  1963 
       
  1964     b(done);
       
  1965     bind_literal(counter_addr_literal);
       
  1966 
       
  1967     bind(done);
       
  1968   }
       
  1969 }
       
  1970 
       
  1971 #endif // !PRODUCT
       
  1972 
       
  1973 
       
  1974 // Building block for CAS cases of biased locking: makes CAS and records statistics.
       
  1975 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
       
  1976 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
       
  1977                                                  Register tmp, Label& slow_case, int* counter_addr) {
       
  1978 
       
  1979   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
       
  1980 #ifdef ASSERT
       
  1981   breakpoint(ne); // Fallthrough only on success
       
  1982 #endif
       
  1983 #ifndef PRODUCT
       
  1984   if (counter_addr != NULL) {
       
  1985     cond_atomic_inc32(al, counter_addr);
       
  1986   }
       
  1987 #endif // !PRODUCT
       
  1988 }
       
  1989 
       
  1990 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
       
  1991                                          bool swap_reg_contains_mark,
       
  1992                                          Register tmp2,
       
  1993                                          Label& done, Label& slow_case,
       
  1994                                          BiasedLockingCounters* counters) {
       
  1995   // obj_reg must be preserved (at least) if the bias locking fails
       
  1996   // tmp_reg is a temporary register
       
  1997   // swap_reg was used as a temporary but contained a value
       
  1998   //   that was used afterwards in some call pathes. Callers
       
  1999   //   have been fixed so that swap_reg no longer needs to be
       
  2000   //   saved.
       
  2001   // Rtemp in no longer scratched
       
  2002 
       
  2003   assert(UseBiasedLocking, "why call this otherwise?");
       
  2004   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
       
  2005   guarantee(swap_reg!=tmp_reg, "invariant");
       
  2006   assert(tmp_reg != noreg, "must supply tmp_reg");
       
  2007 
       
  2008 #ifndef PRODUCT
       
  2009   if (PrintBiasedLockingStatistics && (counters == NULL)) {
       
  2010     counters = BiasedLocking::counters();
       
  2011   }
       
  2012 #endif
       
  2013 
       
  2014   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
       
  2015   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
       
  2016 
       
  2017   // Biased locking
       
  2018   // See whether the lock is currently biased toward our thread and
       
  2019   // whether the epoch is still valid
       
  2020   // Note that the runtime guarantees sufficient alignment of JavaThread
       
  2021   // pointers to allow age to be placed into low bits
       
  2022   // First check to see whether biasing is even enabled for this object
       
  2023   Label cas_label;
       
  2024 
       
  2025   // The null check applies to the mark loading, if we need to load it.
       
  2026   // If the mark has already been loaded in swap_reg then it has already
       
  2027   // been performed and the offset is irrelevant.
       
  2028   int null_check_offset = offset();
       
  2029   if (!swap_reg_contains_mark) {
       
  2030     ldr(swap_reg, mark_addr);
       
  2031   }
       
  2032 
       
  2033   // On MP platform loads could return 'stale' values in some cases.
       
  2034   // That is acceptable since either CAS or slow case path is taken in the worst case.
       
  2035 
       
  2036   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
       
  2037   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
       
  2038 
       
  2039   b(cas_label, ne);
       
  2040 
       
  2041   // The bias pattern is present in the object's header. Need to check
       
  2042   // whether the bias owner and the epoch are both still current.
       
  2043   load_klass(tmp_reg, obj_reg);
       
  2044   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
       
  2045   orr(tmp_reg, tmp_reg, Rthread);
       
  2046   eor(tmp_reg, tmp_reg, swap_reg);
       
  2047 
       
  2048 #ifdef AARCH64
       
  2049   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
       
  2050 #else
       
  2051   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
       
  2052 #endif // AARCH64
       
  2053 
       
  2054 #ifndef PRODUCT
       
  2055   if (counters != NULL) {
       
  2056     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
       
  2057   }
       
  2058 #endif // !PRODUCT
       
  2059 
       
  2060   b(done, eq);
       
  2061 
       
  2062   Label try_revoke_bias;
       
  2063   Label try_rebias;
       
  2064 
       
  2065   // At this point we know that the header has the bias pattern and
       
  2066   // that we are not the bias owner in the current epoch. We need to
       
  2067   // figure out more details about the state of the header in order to
       
  2068   // know what operations can be legally performed on the object's
       
  2069   // header.
       
  2070 
       
  2071   // If the low three bits in the xor result aren't clear, that means
       
  2072   // the prototype header is no longer biased and we have to revoke
       
  2073   // the bias on this object.
       
  2074   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
       
  2075   b(try_revoke_bias, ne);
       
  2076 
       
  2077   // Biasing is still enabled for this data type. See whether the
       
  2078   // epoch of the current bias is still valid, meaning that the epoch
       
  2079   // bits of the mark word are equal to the epoch bits of the
       
  2080   // prototype header. (Note that the prototype header's epoch bits
       
  2081   // only change at a safepoint.) If not, attempt to rebias the object
       
  2082   // toward the current thread. Note that we must be absolutely sure
       
  2083   // that the current epoch is invalid in order to do this because
       
  2084   // otherwise the manipulations it performs on the mark word are
       
  2085   // illegal.
       
  2086   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
       
  2087   b(try_rebias, ne);
       
  2088 
       
  2089   // tmp_reg has the age, epoch and pattern bits cleared
       
  2090   // The remaining (owner) bits are (Thread ^ current_owner)
       
  2091 
       
  2092   // The epoch of the current bias is still valid but we know nothing
       
  2093   // about the owner; it might be set or it might be clear. Try to
       
  2094   // acquire the bias of the object using an atomic operation. If this
       
  2095   // fails we will go in to the runtime to revoke the object's bias.
       
  2096   // Note that we first construct the presumed unbiased header so we
       
  2097   // don't accidentally blow away another thread's valid bias.
       
  2098 
       
  2099   // Note that we know the owner is not ourself. Hence, success can
       
  2100   // only happen when the owner bits is 0
       
  2101 
       
  2102 #ifdef AARCH64
       
  2103   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
       
  2104   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
       
  2105   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
       
  2106   andr(swap_reg, swap_reg, tmp2);
       
  2107 #else
       
  2108   // until the assembler can be made smarter, we need to make some assumptions about the values
       
  2109   // so we can optimize this:
       
  2110   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
       
  2111 
       
  2112   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
       
  2113   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
       
  2114 #endif // AARCH64
       
  2115 
       
  2116   orr(tmp_reg, swap_reg, Rthread); // new mark
       
  2117 
       
  2118   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
       
  2119         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
       
  2120 
       
  2121   // If the biasing toward our thread failed, this means that
       
  2122   // another thread succeeded in biasing it toward itself and we
       
  2123   // need to revoke that bias. The revocation will occur in the
       
  2124   // interpreter runtime in the slow case.
       
  2125 
       
  2126   b(done);
       
  2127 
       
  2128   bind(try_rebias);
       
  2129 
       
  2130   // At this point we know the epoch has expired, meaning that the
       
  2131   // current "bias owner", if any, is actually invalid. Under these
       
  2132   // circumstances _only_, we are allowed to use the current header's
       
  2133   // value as the comparison value when doing the cas to acquire the
       
  2134   // bias in the current epoch. In other words, we allow transfer of
       
  2135   // the bias from one thread to another directly in this situation.
       
  2136 
       
  2137   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
       
  2138 
       
  2139   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
       
  2140 
       
  2141   // owner bits 'random'. Set them to Rthread.
       
  2142 #ifdef AARCH64
       
  2143   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
       
  2144   andr(tmp_reg, tmp_reg, tmp2);
       
  2145 #else
       
  2146   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
       
  2147   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
       
  2148 #endif // AARCH64
       
  2149 
       
  2150   orr(tmp_reg, tmp_reg, Rthread); // new mark
       
  2151 
       
  2152   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
       
  2153         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
       
  2154 
       
  2155   // If the biasing toward our thread failed, then another thread
       
  2156   // succeeded in biasing it toward itself and we need to revoke that
       
  2157   // bias. The revocation will occur in the runtime in the slow case.
       
  2158 
       
  2159   b(done);
       
  2160 
       
  2161   bind(try_revoke_bias);
       
  2162 
       
  2163   // The prototype mark in the klass doesn't have the bias bit set any
       
  2164   // more, indicating that objects of this data type are not supposed
       
  2165   // to be biased any more. We are going to try to reset the mark of
       
  2166   // this object to the prototype value and fall through to the
       
  2167   // CAS-based locking scheme. Note that if our CAS fails, it means
       
  2168   // that another thread raced us for the privilege of revoking the
       
  2169   // bias of this particular object, so it's okay to continue in the
       
  2170   // normal locking code.
       
  2171 
       
  2172   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
       
  2173 
       
  2174   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
       
  2175 
       
  2176   // owner bits 'random'. Clear them
       
  2177 #ifdef AARCH64
       
  2178   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
       
  2179   andr(tmp_reg, tmp_reg, tmp2);
       
  2180 #else
       
  2181   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
       
  2182   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
       
  2183 #endif // AARCH64
       
  2184 
       
  2185   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
       
  2186         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
       
  2187 
       
  2188   // Fall through to the normal CAS-based lock, because no matter what
       
  2189   // the result of the above CAS, some thread must have succeeded in
       
  2190   // removing the bias bit from the object's header.
       
  2191 
       
  2192   bind(cas_label);
       
  2193 
       
  2194   return null_check_offset;
       
  2195 }
       
  2196 
       
  2197 
       
  2198 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
       
  2199   assert(UseBiasedLocking, "why call this otherwise?");
       
  2200 
       
  2201   // Check for biased locking unlock case, which is a no-op
       
  2202   // Note: we do not have to check the thread ID for two reasons.
       
  2203   // First, the interpreter checks for IllegalMonitorStateException at
       
  2204   // a higher level. Second, if the bias was revoked while we held the
       
  2205   // lock, the object could not be rebiased toward another thread, so
       
  2206   // the bias bit would be clear.
       
  2207   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
       
  2208 
       
  2209   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
       
  2210   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
       
  2211   b(done, eq);
       
  2212 }
       
  2213 
       
  2214 #ifdef AARCH64
       
  2215 
       
  2216 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
       
  2217   switch (size_in_bytes) {
       
  2218     case  8: ldr(dst, src); break;
       
  2219     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
       
  2220     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
       
  2221     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
       
  2222     default: ShouldNotReachHere();
       
  2223   }
       
  2224 }
       
  2225 
       
  2226 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
       
  2227   switch (size_in_bytes) {
       
  2228     case  8: str(src, dst);    break;
       
  2229     case  4: str_32(src, dst); break;
       
  2230     case  2: strh(src, dst);   break;
       
  2231     case  1: strb(src, dst);   break;
       
  2232     default: ShouldNotReachHere();
       
  2233   }
       
  2234 }
       
  2235 
       
  2236 #else
       
  2237 
       
  2238 void MacroAssembler::load_sized_value(Register dst, Address src,
       
  2239                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
       
  2240   switch (size_in_bytes) {
       
  2241     case  4: ldr(dst, src, cond); break;
       
  2242     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
       
  2243     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
       
  2244     default: ShouldNotReachHere();
       
  2245   }
       
  2246 }
       
  2247 
       
  2248 
       
  2249 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
       
  2250   switch (size_in_bytes) {
       
  2251     case  4: str(src, dst, cond); break;
       
  2252     case  2: strh(src, dst, cond);   break;
       
  2253     case  1: strb(src, dst, cond);   break;
       
  2254     default: ShouldNotReachHere();
       
  2255   }
       
  2256 }
       
  2257 #endif // AARCH64
       
  2258 
       
  2259 // Look up the method for a megamorphic invokeinterface call.
       
  2260 // The target method is determined by <Rinterf, Rindex>.
       
  2261 // The receiver klass is in Rklass.
       
  2262 // On success, the result will be in method_result, and execution falls through.
       
  2263 // On failure, execution transfers to the given label.
       
  2264 void MacroAssembler::lookup_interface_method(Register Rklass,
       
  2265                                              Register Rinterf,
       
  2266                                              Register Rindex,
       
  2267                                              Register method_result,
       
  2268                                              Register temp_reg1,
       
  2269                                              Register temp_reg2,
       
  2270                                              Label& L_no_such_interface) {
       
  2271 
       
  2272   assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex);
       
  2273 
       
  2274   Register Ritable = temp_reg1;
       
  2275 
       
  2276   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
       
  2277   const int base = in_bytes(Klass::vtable_start_offset());
       
  2278   const int scale = exact_log2(vtableEntry::size_in_bytes());
       
  2279   ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
       
  2280   add(Ritable, Rklass, base);
       
  2281   add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale));
       
  2282 
       
  2283   Label entry, search;
       
  2284 
       
  2285   b(entry);
       
  2286 
       
  2287   bind(search);
       
  2288   add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize);
       
  2289 
       
  2290   bind(entry);
       
  2291 
       
  2292   // Check that the entry is non-null.  A null entry means that the receiver
       
  2293   // class doesn't implement the interface, and wasn't the same as the
       
  2294   // receiver class checked when the interface was resolved.
       
  2295 
       
  2296   ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes()));
       
  2297   cbz(temp_reg2, L_no_such_interface);
       
  2298 
       
  2299   cmp(Rinterf, temp_reg2);
       
  2300   b(search, ne);
       
  2301 
       
  2302   ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes()));
       
  2303   add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass*
       
  2304   assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
       
  2305   assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
       
  2306 
       
  2307   ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex));
       
  2308 }
       
  2309 
       
  2310 #ifdef COMPILER2
       
  2311 // TODO: 8 bytes at a time? pre-fetch?
       
  2312 // Compare char[] arrays aligned to 4 bytes.
       
  2313 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
       
  2314                                         Register limit, Register result,
       
  2315                                       Register chr1, Register chr2, Label& Ldone) {
       
  2316   Label Lvector, Lloop;
       
  2317 
       
  2318   // Note: limit contains number of bytes (2*char_elements) != 0.
       
  2319   tst(limit, 0x2); // trailing character ?
       
  2320   b(Lvector, eq);
       
  2321 
       
  2322   // compare the trailing char
       
  2323   sub(limit, limit, sizeof(jchar));
       
  2324   ldrh(chr1, Address(ary1, limit));
       
  2325   ldrh(chr2, Address(ary2, limit));
       
  2326   cmp(chr1, chr2);
       
  2327   mov(result, 0, ne);     // not equal
       
  2328   b(Ldone, ne);
       
  2329 
       
  2330   // only one char ?
       
  2331   tst(limit, limit);
       
  2332   mov(result, 1, eq);
       
  2333   b(Ldone, eq);
       
  2334 
       
  2335   // word by word compare, dont't need alignment check
       
  2336   bind(Lvector);
       
  2337 
       
  2338   // Shift ary1 and ary2 to the end of the arrays, negate limit
       
  2339   add(ary1, limit, ary1);
       
  2340   add(ary2, limit, ary2);
       
  2341   neg(limit, limit);
       
  2342 
       
  2343   bind(Lloop);
       
  2344   ldr_u32(chr1, Address(ary1, limit));
       
  2345   ldr_u32(chr2, Address(ary2, limit));
       
  2346   cmp_32(chr1, chr2);
       
  2347   mov(result, 0, ne);     // not equal
       
  2348   b(Ldone, ne);
       
  2349   adds(limit, limit, 2*sizeof(jchar));
       
  2350   b(Lloop, ne);
       
  2351 
       
  2352   // Caller should set it:
       
  2353   // mov(result_reg, 1);  //equal
       
  2354 }
       
  2355 #endif
       
  2356 
       
  2357 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
       
  2358   mov_slow(tmpreg1, counter_addr);
       
  2359   ldr_s32(tmpreg2, tmpreg1);
       
  2360   add_32(tmpreg2, tmpreg2, 1);
       
  2361   str_32(tmpreg2, tmpreg1);
       
  2362 }
       
  2363 
       
  2364 void MacroAssembler::floating_cmp(Register dst) {
       
  2365 #ifdef AARCH64
       
  2366   NOT_TESTED();
       
  2367   cset(dst, gt);            // 1 if '>', else 0
       
  2368   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
       
  2369 #else
       
  2370   vmrs(dst, FPSCR);
       
  2371   orr(dst, dst, 0x08000000);
       
  2372   eor(dst, dst, AsmOperand(dst, lsl, 3));
       
  2373   mov(dst, AsmOperand(dst, asr, 30));
       
  2374 #endif
       
  2375 }
       
  2376 
       
  2377 void MacroAssembler::restore_default_fp_mode() {
       
  2378 #ifdef AARCH64
       
  2379   msr(SysReg_FPCR, ZR);
       
  2380 #else
       
  2381 #ifndef __SOFTFP__
       
  2382   // Round to Near mode, IEEE compatible, masked exceptions
       
  2383   mov(Rtemp, 0);
       
  2384   vmsr(FPSCR, Rtemp);
       
  2385 #endif // !__SOFTFP__
       
  2386 #endif // AARCH64
       
  2387 }
       
  2388 
       
  2389 #ifndef AARCH64
       
  2390 // 24-bit word range == 26-bit byte range
       
  2391 bool check26(int offset) {
       
  2392   // this could be simplified, but it mimics encoding and decoding
       
  2393   // an actual branch insrtuction
       
  2394   int off1 = offset << 6 >> 8;
       
  2395   int encoded = off1 & ((1<<24)-1);
       
  2396   int decoded = encoded << 8 >> 6;
       
  2397   return offset == decoded;
       
  2398 }
       
  2399 #endif // !AARCH64
       
  2400 
       
  2401 // Perform some slight adjustments so the default 32MB code cache
       
  2402 // is fully reachable.
       
  2403 static inline address first_cache_address() {
       
  2404   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
       
  2405 }
       
  2406 static inline address last_cache_address() {
       
  2407   return CodeCache::high_bound() - Assembler::InstructionSize;
       
  2408 }
       
  2409 
       
  2410 #ifdef AARCH64
       
  2411 // Can we reach target using ADRP?
       
  2412 bool MacroAssembler::page_reachable_from_cache(address target) {
       
  2413   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
       
  2414   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
       
  2415   intptr_t addr = (intptr_t)target & ~0xfff;
       
  2416 
       
  2417   intptr_t loffset = addr - cl;
       
  2418   intptr_t hoffset = addr - ch;
       
  2419   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
       
  2420 }
       
  2421 #endif
       
  2422 
       
  2423 // Can we reach target using unconditional branch or call from anywhere
       
  2424 // in the code cache (because code can be relocated)?
       
  2425 bool MacroAssembler::_reachable_from_cache(address target) {
       
  2426 #ifdef __thumb__
       
  2427   if ((1 & (intptr_t)target) != 0) {
       
  2428     // Return false to avoid 'b' if we need switching to THUMB mode.
       
  2429     return false;
       
  2430   }
       
  2431 #endif
       
  2432 
       
  2433   address cl = first_cache_address();
       
  2434   address ch = last_cache_address();
       
  2435 
       
  2436   if (ForceUnreachable) {
       
  2437     // Only addresses from CodeCache can be treated as reachable.
       
  2438     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
       
  2439       return false;
       
  2440     }
       
  2441   }
       
  2442 
       
  2443   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
       
  2444   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
       
  2445 
       
  2446 #ifdef AARCH64
       
  2447   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
       
  2448 #else
       
  2449   return check26(loffset - 8) && check26(hoffset - 8);
       
  2450 #endif
       
  2451 }
       
  2452 
       
  2453 bool MacroAssembler::reachable_from_cache(address target) {
       
  2454   assert(CodeCache::contains(pc()), "not supported");
       
  2455   return _reachable_from_cache(target);
       
  2456 }
       
  2457 
       
  2458 // Can we reach the entire code cache from anywhere else in the code cache?
       
  2459 bool MacroAssembler::_cache_fully_reachable() {
       
  2460   address cl = first_cache_address();
       
  2461   address ch = last_cache_address();
       
  2462   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
       
  2463 }
       
  2464 
       
  2465 bool MacroAssembler::cache_fully_reachable() {
       
  2466   assert(CodeCache::contains(pc()), "not supported");
       
  2467   return _cache_fully_reachable();
       
  2468 }
       
  2469 
       
  2470 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
       
  2471   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
       
  2472   if (reachable_from_cache(target)) {
       
  2473     relocate(rtype);
       
  2474     b(target NOT_AARCH64_ARG(cond));
       
  2475     return;
       
  2476   }
       
  2477 
       
  2478   // Note: relocate is not needed for the code below,
       
  2479   // encoding targets in absolute format.
       
  2480   if (ignore_non_patchable_relocations()) {
       
  2481     rtype = relocInfo::none;
       
  2482   }
       
  2483 
       
  2484 #ifdef AARCH64
       
  2485   assert (scratch != noreg, "should be specified");
       
  2486   InlinedAddress address_literal(target, rtype);
       
  2487   ldr_literal(scratch, address_literal);
       
  2488   br(scratch);
       
  2489   int off = offset();
       
  2490   bind_literal(address_literal);
       
  2491 #ifdef COMPILER2
       
  2492   if (offset() - off == wordSize) {
       
  2493     // no padding, so insert nop for worst-case sizing
       
  2494     nop();
       
  2495   }
       
  2496 #endif
       
  2497 #else
       
  2498   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
       
  2499     // Note: this version cannot be (atomically) patched
       
  2500     mov_slow(scratch, (intptr_t)target, cond);
       
  2501     bx(scratch, cond);
       
  2502   } else {
       
  2503     Label skip;
       
  2504     InlinedAddress address_literal(target);
       
  2505     if (cond != al) {
       
  2506       b(skip, inverse(cond));
       
  2507     }
       
  2508     relocate(rtype);
       
  2509     ldr_literal(PC, address_literal);
       
  2510     bind_literal(address_literal);
       
  2511     bind(skip);
       
  2512   }
       
  2513 #endif // AARCH64
       
  2514 }
       
  2515 
       
  2516 // Similar to jump except that:
       
  2517 // - near calls are valid only if any destination in the cache is near
       
  2518 // - no movt/movw (not atomically patchable)
       
  2519 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
       
  2520   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
       
  2521   if (cache_fully_reachable()) {
       
  2522     // Note: this assumes that all possible targets (the initial one
       
  2523     // and the addressed patched to) are all in the code cache.
       
  2524     assert(CodeCache::contains(target), "target might be too far");
       
  2525     relocate(rtype);
       
  2526     b(target NOT_AARCH64_ARG(cond));
       
  2527     return;
       
  2528   }
       
  2529 
       
  2530   // Discard the relocation information if not needed for CacheCompiledCode
       
  2531   // since the next encodings are all in absolute format.
       
  2532   if (ignore_non_patchable_relocations()) {
       
  2533     rtype = relocInfo::none;
       
  2534   }
       
  2535 
       
  2536 #ifdef AARCH64
       
  2537   assert (scratch != noreg, "should be specified");
       
  2538   InlinedAddress address_literal(target);
       
  2539   relocate(rtype);
       
  2540   ldr_literal(scratch, address_literal);
       
  2541   br(scratch);
       
  2542   int off = offset();
       
  2543   bind_literal(address_literal);
       
  2544 #ifdef COMPILER2
       
  2545   if (offset() - off == wordSize) {
       
  2546     // no padding, so insert nop for worst-case sizing
       
  2547     nop();
       
  2548   }
       
  2549 #endif
       
  2550 #else
       
  2551   {
       
  2552     Label skip;
       
  2553     InlinedAddress address_literal(target);
       
  2554     if (cond != al) {
       
  2555       b(skip, inverse(cond));
       
  2556     }
       
  2557     relocate(rtype);
       
  2558     ldr_literal(PC, address_literal);
       
  2559     bind_literal(address_literal);
       
  2560     bind(skip);
       
  2561   }
       
  2562 #endif // AARCH64
       
  2563 }
       
  2564 
       
  2565 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
       
  2566   Register scratch = LR;
       
  2567   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
       
  2568   if (reachable_from_cache(target)) {
       
  2569     relocate(rspec);
       
  2570     bl(target NOT_AARCH64_ARG(cond));
       
  2571     return;
       
  2572   }
       
  2573 
       
  2574   // Note: relocate is not needed for the code below,
       
  2575   // encoding targets in absolute format.
       
  2576   if (ignore_non_patchable_relocations()) {
       
  2577     // This assumes the information was needed only for relocating the code.
       
  2578     rspec = RelocationHolder::none;
       
  2579   }
       
  2580 
       
  2581 #ifndef AARCH64
       
  2582   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
       
  2583     // Note: this version cannot be (atomically) patched
       
  2584     mov_slow(scratch, (intptr_t)target, cond);
       
  2585     blx(scratch, cond);
       
  2586     return;
       
  2587   }
       
  2588 #endif
       
  2589 
       
  2590   {
       
  2591     Label ret_addr;
       
  2592 #ifndef AARCH64
       
  2593     if (cond != al) {
       
  2594       b(ret_addr, inverse(cond));
       
  2595     }
       
  2596 #endif
       
  2597 
       
  2598 
       
  2599 #ifdef AARCH64
       
  2600     // TODO-AARCH64: make more optimal implementation
       
  2601     // [ Keep in sync with MacroAssembler::call_size ]
       
  2602     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
       
  2603     mov_slow(scratch, target);
       
  2604     blr(scratch);
       
  2605 #else
       
  2606     InlinedAddress address_literal(target);
       
  2607     relocate(rspec);
       
  2608     adr(LR, ret_addr);
       
  2609     ldr_literal(PC, address_literal);
       
  2610 
       
  2611     bind_literal(address_literal);
       
  2612     bind(ret_addr);
       
  2613 #endif
       
  2614   }
       
  2615 }
       
  2616 
       
  2617 #if defined(AARCH64) && defined(COMPILER2)
       
  2618 int MacroAssembler::call_size(address target, bool far, bool patchable) {
       
  2619   // FIXME: mov_slow is variable-length
       
  2620   if (!far) return 1; // bl
       
  2621   if (patchable) return 2;  // ldr; blr
       
  2622   return instr_count_for_mov_slow((intptr_t)target) + 1;
       
  2623 }
       
  2624 #endif
       
  2625 
       
  2626 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
       
  2627   assert(rspec.type() == relocInfo::static_call_type ||
       
  2628          rspec.type() == relocInfo::none ||
       
  2629          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
       
  2630 
       
  2631   // Always generate the relocation information, needed for patching
       
  2632   relocate(rspec); // used by NativeCall::is_call_before()
       
  2633   if (cache_fully_reachable()) {
       
  2634     // Note: this assumes that all possible targets (the initial one
       
  2635     // and the addresses patched to) are all in the code cache.
       
  2636     assert(CodeCache::contains(target), "target might be too far");
       
  2637     bl(target);
       
  2638   } else {
       
  2639 #if defined(AARCH64) && defined(COMPILER2)
       
  2640     if (c2) {
       
  2641       // return address needs to match call_size().
       
  2642       // no need to trash Rtemp
       
  2643       int off = offset();
       
  2644       Label skip_literal;
       
  2645       InlinedAddress address_literal(target);
       
  2646       ldr_literal(LR, address_literal);
       
  2647       blr(LR);
       
  2648       int ret_addr_offset = offset();
       
  2649       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
       
  2650       b(skip_literal);
       
  2651       int off2 = offset();
       
  2652       bind_literal(address_literal);
       
  2653       if (offset() - off2 == wordSize) {
       
  2654         // no padding, so insert nop for worst-case sizing
       
  2655         nop();
       
  2656       }
       
  2657       bind(skip_literal);
       
  2658       return ret_addr_offset;
       
  2659     }
       
  2660 #endif
       
  2661     Label ret_addr;
       
  2662     InlinedAddress address_literal(target);
       
  2663 #ifdef AARCH64
       
  2664     ldr_literal(Rtemp, address_literal);
       
  2665     adr(LR, ret_addr);
       
  2666     br(Rtemp);
       
  2667 #else
       
  2668     adr(LR, ret_addr);
       
  2669     ldr_literal(PC, address_literal);
       
  2670 #endif
       
  2671     bind_literal(address_literal);
       
  2672     bind(ret_addr);
       
  2673   }
       
  2674   return offset();
       
  2675 }
       
  2676 
       
  2677 
       
  2678 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
       
  2679   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
       
  2680   ldr(tmp, Address(method, Method::const_offset()));
       
  2681   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
       
  2682   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
       
  2683   ldr(mirror, Address(tmp, mirror_offset));
       
  2684 }
       
  2685 
       
  2686 ///////////////////////////////////////////////////////////////////////////////
       
  2687 
       
  2688 // Compressed pointers
       
  2689 
       
  2690 #ifdef AARCH64
       
  2691 
       
  2692 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
       
  2693   if (UseCompressedClassPointers) {
       
  2694     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
       
  2695     decode_klass_not_null(dst_klass);
       
  2696   } else {
       
  2697     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
       
  2698   }
       
  2699 }
       
  2700 
       
  2701 #else
       
  2702 
       
  2703 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
       
  2704   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
       
  2705 }
       
  2706 
       
  2707 #endif // AARCH64
       
  2708 
       
  2709 // Blows src_klass.
       
  2710 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
       
  2711 #ifdef AARCH64
       
  2712   if (UseCompressedClassPointers) {
       
  2713     assert(src_klass != dst_oop, "not enough registers");
       
  2714     encode_klass_not_null(src_klass);
       
  2715     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
       
  2716     return;
       
  2717   }
       
  2718 #endif // AARCH64
       
  2719   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
       
  2720 }
       
  2721 
       
  2722 #ifdef AARCH64
       
  2723 
       
  2724 void MacroAssembler::store_klass_gap(Register dst) {
       
  2725   if (UseCompressedClassPointers) {
       
  2726     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
       
  2727   }
       
  2728 }
       
  2729 
       
  2730 #endif // AARCH64
       
  2731 
       
  2732 
       
  2733 void MacroAssembler::load_heap_oop(Register dst, Address src) {
       
  2734 #ifdef AARCH64
       
  2735   if (UseCompressedOops) {
       
  2736     ldr_w(dst, src);
       
  2737     decode_heap_oop(dst);
       
  2738     return;
       
  2739   }
       
  2740 #endif // AARCH64
       
  2741   ldr(dst, src);
       
  2742 }
       
  2743 
       
  2744 // Blows src and flags.
       
  2745 void MacroAssembler::store_heap_oop(Register src, Address dst) {
       
  2746 #ifdef AARCH64
       
  2747   if (UseCompressedOops) {
       
  2748     assert(!dst.uses(src), "not enough registers");
       
  2749     encode_heap_oop(src);
       
  2750     str_w(src, dst);
       
  2751     return;
       
  2752   }
       
  2753 #endif // AARCH64
       
  2754   str(src, dst);
       
  2755 }
       
  2756 
       
  2757 void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
       
  2758 #ifdef AARCH64
       
  2759   if (UseCompressedOops) {
       
  2760     str_w(src, dst);
       
  2761     return;
       
  2762   }
       
  2763 #endif // AARCH64
       
  2764   str(src, dst);
       
  2765 }
       
  2766 
       
  2767 
       
  2768 #ifdef AARCH64
       
  2769 
       
  2770 // Algorithm must match oop.inline.hpp encode_heap_oop.
       
  2771 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
       
  2772   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
       
  2773   // Update it at modifications.
       
  2774   assert (UseCompressedOops, "must be compressed");
       
  2775   assert (Universe::heap() != NULL, "java heap should be initialized");
       
  2776 #ifdef ASSERT
       
  2777   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
       
  2778 #endif
       
  2779   verify_oop(src);
       
  2780   if (Universe::narrow_oop_base() == NULL) {
       
  2781     if (Universe::narrow_oop_shift() != 0) {
       
  2782       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2783       _lsr(dst, src, Universe::narrow_oop_shift());
       
  2784     } else if (dst != src) {
       
  2785       mov(dst, src);
       
  2786     }
       
  2787   } else {
       
  2788     tst(src, src);
       
  2789     csel(dst, Rheap_base, src, eq);
       
  2790     sub(dst, dst, Rheap_base);
       
  2791     if (Universe::narrow_oop_shift() != 0) {
       
  2792       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2793       _lsr(dst, dst, Universe::narrow_oop_shift());
       
  2794     }
       
  2795   }
       
  2796 }
       
  2797 
       
  2798 // Same algorithm as oop.inline.hpp decode_heap_oop.
       
  2799 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
       
  2800 #ifdef ASSERT
       
  2801   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
       
  2802 #endif
       
  2803   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2804   if (Universe::narrow_oop_base() != NULL) {
       
  2805     tst(src, src);
       
  2806     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
       
  2807     csel(dst, dst, ZR, ne);
       
  2808   } else {
       
  2809     _lsl(dst, src, Universe::narrow_oop_shift());
       
  2810   }
       
  2811   verify_oop(dst);
       
  2812 }
       
  2813 
       
  2814 #ifdef COMPILER2
       
  2815 // Algorithm must match oop.inline.hpp encode_heap_oop.
       
  2816 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
       
  2817 // must be changed.
       
  2818 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
       
  2819   assert (UseCompressedOops, "must be compressed");
       
  2820   assert (Universe::heap() != NULL, "java heap should be initialized");
       
  2821 #ifdef ASSERT
       
  2822   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
       
  2823 #endif
       
  2824   verify_oop(src);
       
  2825   if (Universe::narrow_oop_base() == NULL) {
       
  2826     if (Universe::narrow_oop_shift() != 0) {
       
  2827       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2828       _lsr(dst, src, Universe::narrow_oop_shift());
       
  2829     } else if (dst != src) {
       
  2830           mov(dst, src);
       
  2831     }
       
  2832   } else {
       
  2833     sub(dst, src, Rheap_base);
       
  2834     if (Universe::narrow_oop_shift() != 0) {
       
  2835       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2836       _lsr(dst, dst, Universe::narrow_oop_shift());
       
  2837     }
       
  2838   }
       
  2839 }
       
  2840 
       
  2841 // Same algorithm as oops.inline.hpp decode_heap_oop.
       
  2842 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
       
  2843 // must be changed.
       
  2844 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
       
  2845 #ifdef ASSERT
       
  2846   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
       
  2847 #endif
       
  2848   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       
  2849   if (Universe::narrow_oop_base() != NULL) {
       
  2850     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
       
  2851   } else {
       
  2852     _lsl(dst, src, Universe::narrow_oop_shift());
       
  2853   }
       
  2854   verify_oop(dst);
       
  2855 }
       
  2856 
       
  2857 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
       
  2858   assert(UseCompressedClassPointers, "should only be used for compressed header");
       
  2859   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
       
  2860   int klass_index = oop_recorder()->find_index(k);
       
  2861   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
       
  2862 
       
  2863   // Relocation with special format (see relocInfo_arm.hpp).
       
  2864   relocate(rspec);
       
  2865   narrowKlass encoded_k = Klass::encode_klass(k);
       
  2866   movz(dst, encoded_k & 0xffff, 0);
       
  2867   movk(dst, (encoded_k >> 16) & 0xffff, 16);
       
  2868 }
       
  2869 
       
  2870 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
       
  2871   assert(UseCompressedOops, "should only be used for compressed header");
       
  2872   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
       
  2873   int oop_index = oop_recorder()->find_index(obj);
       
  2874   RelocationHolder rspec = oop_Relocation::spec(oop_index);
       
  2875 
       
  2876   relocate(rspec);
       
  2877   movz(dst, 0xffff, 0);
       
  2878   movk(dst, 0xffff, 16);
       
  2879 }
       
  2880 
       
  2881 #endif // COMPILER2
       
  2882 
       
  2883 // Must preserve condition codes, or C2 encodeKlass_not_null rule
       
  2884 // must be changed.
       
  2885 void MacroAssembler::encode_klass_not_null(Register r) {
       
  2886   if (Universe::narrow_klass_base() != NULL) {
       
  2887     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
       
  2888     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
       
  2889     mov_slow(Rheap_base, Universe::narrow_klass_base());
       
  2890     sub(r, r, Rheap_base);
       
  2891   }
       
  2892   if (Universe::narrow_klass_shift() != 0) {
       
  2893     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
       
  2894     _lsr(r, r, Universe::narrow_klass_shift());
       
  2895   }
       
  2896   if (Universe::narrow_klass_base() != NULL) {
       
  2897     reinit_heapbase();
       
  2898   }
       
  2899 }
       
  2900 
       
  2901 // Must preserve condition codes, or C2 encodeKlass_not_null rule
       
  2902 // must be changed.
       
  2903 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
       
  2904   if (dst == src) {
       
  2905     encode_klass_not_null(src);
       
  2906     return;
       
  2907   }
       
  2908   if (Universe::narrow_klass_base() != NULL) {
       
  2909     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
       
  2910     sub(dst, src, dst);
       
  2911     if (Universe::narrow_klass_shift() != 0) {
       
  2912       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
       
  2913       _lsr(dst, dst, Universe::narrow_klass_shift());
       
  2914     }
       
  2915   } else {
       
  2916     if (Universe::narrow_klass_shift() != 0) {
       
  2917       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
       
  2918       _lsr(dst, src, Universe::narrow_klass_shift());
       
  2919     } else {
       
  2920       mov(dst, src);
       
  2921     }
       
  2922   }
       
  2923 }
       
  2924 
       
  2925 // Function instr_count_for_decode_klass_not_null() counts the instructions
       
  2926 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
       
  2927 // when (Universe::heap() != NULL).  Hence, if the instructions they
       
  2928 // generate change, then this method needs to be updated.
       
  2929 int MacroAssembler::instr_count_for_decode_klass_not_null() {
       
  2930   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
       
  2931   assert(Universe::heap() != NULL, "java heap should be initialized");
       
  2932   if (Universe::narrow_klass_base() != NULL) {
       
  2933     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
       
  2934       1 +                                                                 // add
       
  2935       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
       
  2936   } else {
       
  2937     if (Universe::narrow_klass_shift() != 0) {
       
  2938       return 1;
       
  2939     }
       
  2940   }
       
  2941   return 0;
       
  2942 }
       
  2943 
       
  2944 // Must preserve condition codes, or C2 decodeKlass_not_null rule
       
  2945 // must be changed.
       
  2946 void MacroAssembler::decode_klass_not_null(Register r) {
       
  2947   int off = offset();
       
  2948   assert(UseCompressedClassPointers, "should only be used for compressed headers");
       
  2949   assert(Universe::heap() != NULL, "java heap should be initialized");
       
  2950   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
       
  2951   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
       
  2952   // Also do not verify_oop as this is called by verify_oop.
       
  2953   if (Universe::narrow_klass_base() != NULL) {
       
  2954     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
       
  2955     mov_slow(Rheap_base, Universe::narrow_klass_base());
       
  2956     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
       
  2957     reinit_heapbase();
       
  2958   } else {
       
  2959     if (Universe::narrow_klass_shift() != 0) {
       
  2960       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
       
  2961       _lsl(r, r, Universe::narrow_klass_shift());
       
  2962     }
       
  2963   }
       
  2964   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
       
  2965 }
       
  2966 
       
  2967 // Must preserve condition codes, or C2 decodeKlass_not_null rule
       
  2968 // must be changed.
       
  2969 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
       
  2970   if (src == dst) {
       
  2971     decode_klass_not_null(src);
       
  2972     return;
       
  2973   }
       
  2974 
       
  2975   assert(UseCompressedClassPointers, "should only be used for compressed headers");
       
  2976   assert(Universe::heap() != NULL, "java heap should be initialized");
       
  2977   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
       
  2978   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
       
  2979   // Also do not verify_oop as this is called by verify_oop.
       
  2980   if (Universe::narrow_klass_base() != NULL) {
       
  2981     mov_slow(dst, Universe::narrow_klass_base());
       
  2982     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
       
  2983   } else {
       
  2984     _lsl(dst, src, Universe::narrow_klass_shift());
       
  2985   }
       
  2986 }
       
  2987 
       
  2988 
       
  2989 void MacroAssembler::reinit_heapbase() {
       
  2990   if (UseCompressedOops || UseCompressedClassPointers) {
       
  2991     if (Universe::heap() != NULL) {
       
  2992       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
       
  2993     } else {
       
  2994       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
       
  2995     }
       
  2996   }
       
  2997 }
       
  2998 
       
  2999 #ifdef ASSERT
       
  3000 void MacroAssembler::verify_heapbase(const char* msg) {
       
  3001   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
       
  3002   // Update it at modifications.
       
  3003   assert (UseCompressedOops, "should be compressed");
       
  3004   assert (Universe::heap() != NULL, "java heap should be initialized");
       
  3005   if (CheckCompressedOops) {
       
  3006     Label ok;
       
  3007     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
       
  3008     raw_push(Rtemp, ZR);
       
  3009     mrs(Rtemp, Assembler::SysReg_NZCV);
       
  3010     str(Rtemp, Address(SP, 1 * wordSize));
       
  3011     mov_slow(Rtemp, Universe::narrow_ptrs_base());
       
  3012     cmp(Rheap_base, Rtemp);
       
  3013     b(ok, eq);
       
  3014     stop(msg);
       
  3015     bind(ok);
       
  3016     ldr(Rtemp, Address(SP, 1 * wordSize));
       
  3017     msr(Assembler::SysReg_NZCV, Rtemp);
       
  3018     raw_pop(Rtemp, ZR);
       
  3019     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
       
  3020   }
       
  3021 }
       
  3022 #endif // ASSERT
       
  3023 
       
  3024 #endif // AARCH64
       
  3025 
       
  3026 #ifdef COMPILER2
       
  3027 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
       
  3028 {
       
  3029   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
       
  3030 
       
  3031   Register Rmark      = Rscratch2;
       
  3032 
       
  3033   assert(Roop != Rscratch, "");
       
  3034   assert(Roop != Rmark, "");
       
  3035   assert(Rbox != Rscratch, "");
       
  3036   assert(Rbox != Rmark, "");
       
  3037 
       
  3038   Label fast_lock, done;
       
  3039 
       
  3040   if (UseBiasedLocking && !UseOptoBiasInlining) {
       
  3041     Label failed;
       
  3042 #ifdef AARCH64
       
  3043     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
       
  3044 #else
       
  3045     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
       
  3046 #endif
       
  3047     bind(failed);
       
  3048   }
       
  3049 
       
  3050   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
       
  3051   tst(Rmark, markOopDesc::unlocked_value);
       
  3052   b(fast_lock, ne);
       
  3053 
       
  3054   // Check for recursive lock
       
  3055   // See comments in InterpreterMacroAssembler::lock_object for
       
  3056   // explanations on the fast recursive locking check.
       
  3057 #ifdef AARCH64
       
  3058   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
       
  3059   Assembler::LogicalImmediate imm(mask, false);
       
  3060   mov(Rscratch, SP);
       
  3061   sub(Rscratch, Rmark, Rscratch);
       
  3062   ands(Rscratch, Rscratch, imm);
       
  3063   b(done, ne); // exit with failure
       
  3064   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
       
  3065   b(done);
       
  3066 
       
  3067 #else
       
  3068   // -1- test low 2 bits
       
  3069   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
       
  3070   // -2- test (hdr - SP) if the low two bits are 0
       
  3071   sub(Rscratch, Rmark, SP, eq);
       
  3072   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
       
  3073   // If still 'eq' then recursive locking OK
       
  3074   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
       
  3075   b(done);
       
  3076 #endif
       
  3077 
       
  3078   bind(fast_lock);
       
  3079   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
       
  3080 
       
  3081   bool allow_fallthrough_on_failure = true;
       
  3082   bool one_shot = true;
       
  3083   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
       
  3084 
       
  3085   bind(done);
       
  3086 
       
  3087 }
       
  3088 
       
  3089 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
       
  3090 {
       
  3091   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
       
  3092 
       
  3093   Register Rmark      = Rscratch2;
       
  3094 
       
  3095   assert(Roop != Rscratch, "");
       
  3096   assert(Roop != Rmark, "");
       
  3097   assert(Rbox != Rscratch, "");
       
  3098   assert(Rbox != Rmark, "");
       
  3099 
       
  3100   Label done;
       
  3101 
       
  3102   if (UseBiasedLocking && !UseOptoBiasInlining) {
       
  3103     biased_locking_exit(Roop, Rscratch, done);
       
  3104   }
       
  3105 
       
  3106   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
       
  3107   // If hdr is NULL, we've got recursive locking and there's nothing more to do
       
  3108   cmp(Rmark, 0);
       
  3109   b(done, eq);
       
  3110 
       
  3111   // Restore the object header
       
  3112   bool allow_fallthrough_on_failure = true;
       
  3113   bool one_shot = true;
       
  3114   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
       
  3115 
       
  3116   bind(done);
       
  3117 
       
  3118 }
       
  3119 #endif // COMPILER2
       
  3120