|
1 /* |
|
2 * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 * |
|
23 */ |
|
24 |
|
25 #include "precompiled.hpp" |
|
26 #include "asm/assembler.hpp" |
|
27 #include "asm/assembler.inline.hpp" |
|
28 #include "asm/macroAssembler.hpp" |
|
29 #include "ci/ciEnv.hpp" |
|
30 #include "code/nativeInst.hpp" |
|
31 #include "compiler/disassembler.hpp" |
|
32 #include "gc/shared/cardTableModRefBS.hpp" |
|
33 #include "gc/shared/collectedHeap.inline.hpp" |
|
34 #include "interpreter/interpreter.hpp" |
|
35 #include "memory/resourceArea.hpp" |
|
36 #include "oops/klass.inline.hpp" |
|
37 #include "prims/methodHandles.hpp" |
|
38 #include "runtime/biasedLocking.hpp" |
|
39 #include "runtime/interfaceSupport.hpp" |
|
40 #include "runtime/objectMonitor.hpp" |
|
41 #include "runtime/os.hpp" |
|
42 #include "runtime/sharedRuntime.hpp" |
|
43 #include "runtime/stubRoutines.hpp" |
|
44 #include "utilities/macros.hpp" |
|
45 #if INCLUDE_ALL_GCS |
|
46 #include "gc/g1/g1CollectedHeap.inline.hpp" |
|
47 #include "gc/g1/g1SATBCardTableModRefBS.hpp" |
|
48 #include "gc/g1/heapRegion.hpp" |
|
49 #endif |
|
50 |
|
51 // Implementation of AddressLiteral |
|
52 |
|
53 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { |
|
54 switch (rtype) { |
|
55 case relocInfo::oop_type: |
|
56 // Oops are a special case. Normally they would be their own section |
|
57 // but in cases like icBuffer they are literals in the code stream that |
|
58 // we don't have a section for. We use none so that we get a literal address |
|
59 // which is always patchable. |
|
60 break; |
|
61 case relocInfo::external_word_type: |
|
62 _rspec = external_word_Relocation::spec(_target); |
|
63 break; |
|
64 case relocInfo::internal_word_type: |
|
65 _rspec = internal_word_Relocation::spec(_target); |
|
66 break; |
|
67 case relocInfo::opt_virtual_call_type: |
|
68 _rspec = opt_virtual_call_Relocation::spec(); |
|
69 break; |
|
70 case relocInfo::static_call_type: |
|
71 _rspec = static_call_Relocation::spec(); |
|
72 break; |
|
73 case relocInfo::runtime_call_type: |
|
74 _rspec = runtime_call_Relocation::spec(); |
|
75 break; |
|
76 case relocInfo::poll_type: |
|
77 case relocInfo::poll_return_type: |
|
78 _rspec = Relocation::spec_simple(rtype); |
|
79 break; |
|
80 case relocInfo::none: |
|
81 break; |
|
82 default: |
|
83 ShouldNotReachHere(); |
|
84 break; |
|
85 } |
|
86 } |
|
87 |
|
88 // Initially added to the Assembler interface as a pure virtual: |
|
89 // RegisterConstant delayed_value(..) |
|
90 // for: |
|
91 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) |
|
92 // this was subsequently modified to its present name and return type |
|
93 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, |
|
94 Register tmp, |
|
95 int offset) { |
|
96 ShouldNotReachHere(); |
|
97 return RegisterOrConstant(-1); |
|
98 } |
|
99 |
|
100 |
|
101 #ifdef AARCH64 |
|
102 // Note: ARM32 version is OS dependent |
|
103 void MacroAssembler::breakpoint(AsmCondition cond) { |
|
104 if (cond == al) { |
|
105 brk(); |
|
106 } else { |
|
107 Label L; |
|
108 b(L, inverse(cond)); |
|
109 brk(); |
|
110 bind(L); |
|
111 } |
|
112 } |
|
113 #endif // AARCH64 |
|
114 |
|
115 |
|
116 // virtual method calling |
|
117 void MacroAssembler::lookup_virtual_method(Register recv_klass, |
|
118 Register vtable_index, |
|
119 Register method_result) { |
|
120 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); |
|
121 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); |
|
122 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); |
|
123 ldr(method_result, Address(recv_klass, base_offset)); |
|
124 } |
|
125 |
|
126 |
|
127 // Simplified, combined version, good for typical uses. |
|
128 // Falls through on failure. |
|
129 void MacroAssembler::check_klass_subtype(Register sub_klass, |
|
130 Register super_klass, |
|
131 Register temp_reg, |
|
132 Register temp_reg2, |
|
133 Register temp_reg3, |
|
134 Label& L_success) { |
|
135 Label L_failure; |
|
136 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); |
|
137 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); |
|
138 bind(L_failure); |
|
139 }; |
|
140 |
|
141 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, |
|
142 Register super_klass, |
|
143 Register temp_reg, |
|
144 Register temp_reg2, |
|
145 Label* L_success, |
|
146 Label* L_failure, |
|
147 Label* L_slow_path) { |
|
148 |
|
149 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); |
|
150 const Register super_check_offset = temp_reg2; |
|
151 |
|
152 Label L_fallthrough; |
|
153 int label_nulls = 0; |
|
154 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
|
155 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
|
156 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } |
|
157 assert(label_nulls <= 1, "at most one NULL in the batch"); |
|
158 |
|
159 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
|
160 int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
|
161 Address super_check_offset_addr(super_klass, sco_offset); |
|
162 |
|
163 // If the pointers are equal, we are done (e.g., String[] elements). |
|
164 // This self-check enables sharing of secondary supertype arrays among |
|
165 // non-primary types such as array-of-interface. Otherwise, each such |
|
166 // type would need its own customized SSA. |
|
167 // We move this check to the front of the fast path because many |
|
168 // type checks are in fact trivially successful in this manner, |
|
169 // so we get a nicely predicted branch right at the start of the check. |
|
170 cmp(sub_klass, super_klass); |
|
171 b(*L_success, eq); |
|
172 |
|
173 // Check the supertype display: |
|
174 ldr_u32(super_check_offset, super_check_offset_addr); |
|
175 |
|
176 Address super_check_addr(sub_klass, super_check_offset); |
|
177 ldr(temp_reg, super_check_addr); |
|
178 cmp(super_klass, temp_reg); // load displayed supertype |
|
179 |
|
180 // This check has worked decisively for primary supers. |
|
181 // Secondary supers are sought in the super_cache ('super_cache_addr'). |
|
182 // (Secondary supers are interfaces and very deeply nested subtypes.) |
|
183 // This works in the same check above because of a tricky aliasing |
|
184 // between the super_cache and the primary super display elements. |
|
185 // (The 'super_check_addr' can address either, as the case requires.) |
|
186 // Note that the cache is updated below if it does not help us find |
|
187 // what we need immediately. |
|
188 // So if it was a primary super, we can just fail immediately. |
|
189 // Otherwise, it's the slow path for us (no success at this point). |
|
190 |
|
191 b(*L_success, eq); |
|
192 cmp_32(super_check_offset, sc_offset); |
|
193 if (L_failure == &L_fallthrough) { |
|
194 b(*L_slow_path, eq); |
|
195 } else { |
|
196 b(*L_failure, ne); |
|
197 if (L_slow_path != &L_fallthrough) { |
|
198 b(*L_slow_path); |
|
199 } |
|
200 } |
|
201 |
|
202 bind(L_fallthrough); |
|
203 } |
|
204 |
|
205 |
|
206 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, |
|
207 Register super_klass, |
|
208 Register temp_reg, |
|
209 Register temp2_reg, |
|
210 Register temp3_reg, |
|
211 Label* L_success, |
|
212 Label* L_failure, |
|
213 bool set_cond_codes) { |
|
214 #ifdef AARCH64 |
|
215 NOT_IMPLEMENTED(); |
|
216 #else |
|
217 // Note: if used by code that expects a register to be 0 on success, |
|
218 // this register must be temp_reg and set_cond_codes must be true |
|
219 |
|
220 Register saved_reg = noreg; |
|
221 |
|
222 // get additional tmp registers |
|
223 if (temp3_reg == noreg) { |
|
224 saved_reg = temp3_reg = LR; |
|
225 push(saved_reg); |
|
226 } |
|
227 |
|
228 assert(temp2_reg != noreg, "need all the temporary registers"); |
|
229 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); |
|
230 |
|
231 Register cmp_temp = temp_reg; |
|
232 Register scan_temp = temp3_reg; |
|
233 Register count_temp = temp2_reg; |
|
234 |
|
235 Label L_fallthrough; |
|
236 int label_nulls = 0; |
|
237 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
|
238 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
|
239 assert(label_nulls <= 1, "at most one NULL in the batch"); |
|
240 |
|
241 // a couple of useful fields in sub_klass: |
|
242 int ss_offset = in_bytes(Klass::secondary_supers_offset()); |
|
243 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
|
244 Address secondary_supers_addr(sub_klass, ss_offset); |
|
245 Address super_cache_addr( sub_klass, sc_offset); |
|
246 |
|
247 #ifndef PRODUCT |
|
248 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); |
|
249 #endif |
|
250 |
|
251 // We will consult the secondary-super array. |
|
252 ldr(scan_temp, Address(sub_klass, ss_offset)); |
|
253 |
|
254 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); |
|
255 // else search_key is the |
|
256 Register search_key = super_klass; |
|
257 |
|
258 // Load the array length. |
|
259 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); |
|
260 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); |
|
261 |
|
262 add(count_temp, count_temp, 1); |
|
263 |
|
264 Label L_loop, L_setnz_and_fail, L_fail; |
|
265 |
|
266 // Top of search loop |
|
267 bind(L_loop); |
|
268 // Notes: |
|
269 // scan_temp starts at the array elements |
|
270 // count_temp is 1+size |
|
271 subs(count_temp, count_temp, 1); |
|
272 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { |
|
273 // direct jump to L_failure if failed and no cleanup needed |
|
274 b(*L_failure, eq); // not found and |
|
275 } else { |
|
276 b(L_fail, eq); // not found in the array |
|
277 } |
|
278 |
|
279 // Load next super to check |
|
280 // In the array of super classes elements are pointer sized. |
|
281 int element_size = wordSize; |
|
282 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); |
|
283 |
|
284 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list |
|
285 subs(cmp_temp, cmp_temp, search_key); |
|
286 |
|
287 // A miss means we are NOT a subtype and need to keep looping |
|
288 b(L_loop, ne); |
|
289 |
|
290 // Falling out the bottom means we found a hit; we ARE a subtype |
|
291 |
|
292 // Note: temp_reg/cmp_temp is already 0 and flag Z is set |
|
293 |
|
294 // Success. Cache the super we found and proceed in triumph. |
|
295 str(super_klass, Address(sub_klass, sc_offset)); |
|
296 |
|
297 if (saved_reg != noreg) { |
|
298 // Return success |
|
299 pop(saved_reg); |
|
300 } |
|
301 |
|
302 b(*L_success); |
|
303 |
|
304 bind(L_fail); |
|
305 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here |
|
306 if (set_cond_codes) { |
|
307 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed |
|
308 } |
|
309 if (saved_reg != noreg) { |
|
310 pop(saved_reg); |
|
311 } |
|
312 if (L_failure != &L_fallthrough) { |
|
313 b(*L_failure); |
|
314 } |
|
315 |
|
316 bind(L_fallthrough); |
|
317 #endif |
|
318 } |
|
319 |
|
320 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. |
|
321 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { |
|
322 assert_different_registers(params_base, params_count); |
|
323 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); |
|
324 return Address(tmp, -Interpreter::stackElementSize); |
|
325 } |
|
326 |
|
327 |
|
328 void MacroAssembler::align(int modulus) { |
|
329 while (offset() % modulus != 0) { |
|
330 nop(); |
|
331 } |
|
332 } |
|
333 |
|
334 int MacroAssembler::set_last_Java_frame(Register last_java_sp, |
|
335 Register last_java_fp, |
|
336 bool save_last_java_pc, |
|
337 Register tmp) { |
|
338 int pc_offset; |
|
339 if (last_java_fp != noreg) { |
|
340 // optional |
|
341 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); |
|
342 _fp_saved = true; |
|
343 } else { |
|
344 _fp_saved = false; |
|
345 } |
|
346 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM |
|
347 #ifdef AARCH64 |
|
348 pc_offset = mov_pc_to(tmp); |
|
349 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); |
|
350 #else |
|
351 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); |
|
352 pc_offset = offset() + VM_Version::stored_pc_adjustment(); |
|
353 #endif |
|
354 _pc_saved = true; |
|
355 } else { |
|
356 _pc_saved = false; |
|
357 pc_offset = -1; |
|
358 } |
|
359 // According to comment in javaFrameAnchorm SP must be saved last, so that other |
|
360 // entries are valid when SP is set. |
|
361 |
|
362 // However, this is probably not a strong constrainst since for instance PC is |
|
363 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, |
|
364 // we now write the fields in the expected order but we have not added a StoreStore |
|
365 // barrier. |
|
366 |
|
367 // XXX: if the ordering is really important, PC should always be saved (without forgetting |
|
368 // to update oop_map offsets) and a StoreStore barrier might be needed. |
|
369 |
|
370 if (last_java_sp == noreg) { |
|
371 last_java_sp = SP; // always saved |
|
372 } |
|
373 #ifdef AARCH64 |
|
374 if (last_java_sp == SP) { |
|
375 mov(tmp, SP); |
|
376 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
|
377 } else { |
|
378 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
|
379 } |
|
380 #else |
|
381 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); |
|
382 #endif |
|
383 |
|
384 return pc_offset; // for oopmaps |
|
385 } |
|
386 |
|
387 void MacroAssembler::reset_last_Java_frame(Register tmp) { |
|
388 const Register Rzero = zero_register(tmp); |
|
389 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); |
|
390 if (_fp_saved) { |
|
391 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); |
|
392 } |
|
393 if (_pc_saved) { |
|
394 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); |
|
395 } |
|
396 } |
|
397 |
|
398 |
|
399 // Implementation of call_VM versions |
|
400 |
|
401 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { |
|
402 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); |
|
403 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); |
|
404 |
|
405 #ifndef AARCH64 |
|
406 // Safer to save R9 here since callers may have been written |
|
407 // assuming R9 survives. This is suboptimal but is not worth |
|
408 // optimizing for the few platforms where R9 is scratched. |
|
409 push(RegisterSet(R4) | R9ifScratched); |
|
410 mov(R4, SP); |
|
411 bic(SP, SP, StackAlignmentInBytes - 1); |
|
412 #endif // AARCH64 |
|
413 call(entry_point, relocInfo::runtime_call_type); |
|
414 #ifndef AARCH64 |
|
415 mov(SP, R4); |
|
416 pop(RegisterSet(R4) | R9ifScratched); |
|
417 #endif // AARCH64 |
|
418 } |
|
419 |
|
420 |
|
421 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { |
|
422 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); |
|
423 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); |
|
424 |
|
425 const Register tmp = Rtemp; |
|
426 assert_different_registers(oop_result, tmp); |
|
427 |
|
428 set_last_Java_frame(SP, FP, true, tmp); |
|
429 |
|
430 #ifdef ASSERT |
|
431 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); |
|
432 #endif // ASSERT |
|
433 |
|
434 #ifndef AARCH64 |
|
435 #if R9_IS_SCRATCHED |
|
436 // Safer to save R9 here since callers may have been written |
|
437 // assuming R9 survives. This is suboptimal but is not worth |
|
438 // optimizing for the few platforms where R9 is scratched. |
|
439 |
|
440 // Note: cannot save R9 above the saved SP (some calls expect for |
|
441 // instance the Java stack top at the saved SP) |
|
442 // => once saved (with set_last_Java_frame), decrease SP before rounding to |
|
443 // ensure the slot at SP will be free for R9). |
|
444 sub(SP, SP, 4); |
|
445 bic(SP, SP, StackAlignmentInBytes - 1); |
|
446 str(R9, Address(SP, 0)); |
|
447 #else |
|
448 bic(SP, SP, StackAlignmentInBytes - 1); |
|
449 #endif // R9_IS_SCRATCHED |
|
450 #endif |
|
451 |
|
452 mov(R0, Rthread); |
|
453 call(entry_point, relocInfo::runtime_call_type); |
|
454 |
|
455 #ifndef AARCH64 |
|
456 #if R9_IS_SCRATCHED |
|
457 ldr(R9, Address(SP, 0)); |
|
458 #endif |
|
459 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); |
|
460 #endif |
|
461 |
|
462 reset_last_Java_frame(tmp); |
|
463 |
|
464 // C++ interp handles this in the interpreter |
|
465 check_and_handle_popframe(); |
|
466 check_and_handle_earlyret(); |
|
467 |
|
468 if (check_exceptions) { |
|
469 // check for pending exceptions |
|
470 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); |
|
471 #ifdef AARCH64 |
|
472 Label L; |
|
473 cbz(tmp, L); |
|
474 mov_pc_to(Rexception_pc); |
|
475 b(StubRoutines::forward_exception_entry()); |
|
476 bind(L); |
|
477 #else |
|
478 cmp(tmp, 0); |
|
479 mov(Rexception_pc, PC, ne); |
|
480 b(StubRoutines::forward_exception_entry(), ne); |
|
481 #endif // AARCH64 |
|
482 } |
|
483 |
|
484 // get oop result if there is one and reset the value in the thread |
|
485 if (oop_result->is_valid()) { |
|
486 get_vm_result(oop_result, tmp); |
|
487 } |
|
488 } |
|
489 |
|
490 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { |
|
491 call_VM_helper(oop_result, entry_point, 0, check_exceptions); |
|
492 } |
|
493 |
|
494 |
|
495 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { |
|
496 assert (arg_1 == R1, "fixed register for arg_1"); |
|
497 call_VM_helper(oop_result, entry_point, 1, check_exceptions); |
|
498 } |
|
499 |
|
500 |
|
501 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { |
|
502 assert (arg_1 == R1, "fixed register for arg_1"); |
|
503 assert (arg_2 == R2, "fixed register for arg_2"); |
|
504 call_VM_helper(oop_result, entry_point, 2, check_exceptions); |
|
505 } |
|
506 |
|
507 |
|
508 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { |
|
509 assert (arg_1 == R1, "fixed register for arg_1"); |
|
510 assert (arg_2 == R2, "fixed register for arg_2"); |
|
511 assert (arg_3 == R3, "fixed register for arg_3"); |
|
512 call_VM_helper(oop_result, entry_point, 3, check_exceptions); |
|
513 } |
|
514 |
|
515 |
|
516 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { |
|
517 // Not used on ARM |
|
518 Unimplemented(); |
|
519 } |
|
520 |
|
521 |
|
522 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { |
|
523 // Not used on ARM |
|
524 Unimplemented(); |
|
525 } |
|
526 |
|
527 |
|
528 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { |
|
529 // Not used on ARM |
|
530 Unimplemented(); |
|
531 } |
|
532 |
|
533 |
|
534 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { |
|
535 // Not used on ARM |
|
536 Unimplemented(); |
|
537 } |
|
538 |
|
539 // Raw call, without saving/restoring registers, exception handling, etc. |
|
540 // Mainly used from various stubs. |
|
541 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { |
|
542 const Register tmp = Rtemp; // Rtemp free since scratched by call |
|
543 set_last_Java_frame(SP, FP, true, tmp); |
|
544 #if R9_IS_SCRATCHED |
|
545 if (save_R9_if_scratched) { |
|
546 // Note: Saving also R10 for alignment. |
|
547 push(RegisterSet(R9, R10)); |
|
548 } |
|
549 #endif |
|
550 mov(R0, Rthread); |
|
551 call(entry_point, relocInfo::runtime_call_type); |
|
552 #if R9_IS_SCRATCHED |
|
553 if (save_R9_if_scratched) { |
|
554 pop(RegisterSet(R9, R10)); |
|
555 } |
|
556 #endif |
|
557 reset_last_Java_frame(tmp); |
|
558 } |
|
559 |
|
560 void MacroAssembler::call_VM_leaf(address entry_point) { |
|
561 call_VM_leaf_helper(entry_point, 0); |
|
562 } |
|
563 |
|
564 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { |
|
565 assert (arg_1 == R0, "fixed register for arg_1"); |
|
566 call_VM_leaf_helper(entry_point, 1); |
|
567 } |
|
568 |
|
569 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { |
|
570 assert (arg_1 == R0, "fixed register for arg_1"); |
|
571 assert (arg_2 == R1, "fixed register for arg_2"); |
|
572 call_VM_leaf_helper(entry_point, 2); |
|
573 } |
|
574 |
|
575 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { |
|
576 assert (arg_1 == R0, "fixed register for arg_1"); |
|
577 assert (arg_2 == R1, "fixed register for arg_2"); |
|
578 assert (arg_3 == R2, "fixed register for arg_3"); |
|
579 call_VM_leaf_helper(entry_point, 3); |
|
580 } |
|
581 |
|
582 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { |
|
583 assert (arg_1 == R0, "fixed register for arg_1"); |
|
584 assert (arg_2 == R1, "fixed register for arg_2"); |
|
585 assert (arg_3 == R2, "fixed register for arg_3"); |
|
586 assert (arg_4 == R3, "fixed register for arg_4"); |
|
587 call_VM_leaf_helper(entry_point, 4); |
|
588 } |
|
589 |
|
590 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { |
|
591 assert_different_registers(oop_result, tmp); |
|
592 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); |
|
593 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); |
|
594 verify_oop(oop_result); |
|
595 } |
|
596 |
|
597 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { |
|
598 assert_different_registers(metadata_result, tmp); |
|
599 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); |
|
600 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); |
|
601 } |
|
602 |
|
603 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { |
|
604 if (arg2.is_register()) { |
|
605 add(dst, arg1, arg2.as_register()); |
|
606 } else { |
|
607 add(dst, arg1, arg2.as_constant()); |
|
608 } |
|
609 } |
|
610 |
|
611 void MacroAssembler::add_slow(Register rd, Register rn, int c) { |
|
612 #ifdef AARCH64 |
|
613 if (c == 0) { |
|
614 if (rd != rn) { |
|
615 mov(rd, rn); |
|
616 } |
|
617 return; |
|
618 } |
|
619 if (c < 0) { |
|
620 sub_slow(rd, rn, -c); |
|
621 return; |
|
622 } |
|
623 if (c > right_n_bits(24)) { |
|
624 guarantee(rd != rn, "no large add_slow with only one register"); |
|
625 mov_slow(rd, c); |
|
626 add(rd, rn, rd); |
|
627 } else { |
|
628 int lo = c & right_n_bits(12); |
|
629 int hi = (c >> 12) & right_n_bits(12); |
|
630 if (lo != 0) { |
|
631 add(rd, rn, lo, lsl0); |
|
632 } |
|
633 if (hi != 0) { |
|
634 add(rd, (lo == 0) ? rn : rd, hi, lsl12); |
|
635 } |
|
636 } |
|
637 #else |
|
638 // This function is used in compiler for handling large frame offsets |
|
639 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { |
|
640 return sub(rd, rn, (-c)); |
|
641 } |
|
642 int low = c & 0x3fc; |
|
643 if (low != 0) { |
|
644 add(rd, rn, low); |
|
645 rn = rd; |
|
646 } |
|
647 if (c & ~0x3fc) { |
|
648 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); |
|
649 add(rd, rn, c & ~0x3fc); |
|
650 } else if (rd != rn) { |
|
651 assert(c == 0, ""); |
|
652 mov(rd, rn); // need to generate at least one move! |
|
653 } |
|
654 #endif // AARCH64 |
|
655 } |
|
656 |
|
657 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { |
|
658 #ifdef AARCH64 |
|
659 if (c <= 0) { |
|
660 add_slow(rd, rn, -c); |
|
661 return; |
|
662 } |
|
663 if (c > right_n_bits(24)) { |
|
664 guarantee(rd != rn, "no large sub_slow with only one register"); |
|
665 mov_slow(rd, c); |
|
666 sub(rd, rn, rd); |
|
667 } else { |
|
668 int lo = c & right_n_bits(12); |
|
669 int hi = (c >> 12) & right_n_bits(12); |
|
670 if (lo != 0) { |
|
671 sub(rd, rn, lo, lsl0); |
|
672 } |
|
673 if (hi != 0) { |
|
674 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); |
|
675 } |
|
676 } |
|
677 #else |
|
678 // This function is used in compiler for handling large frame offsets |
|
679 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { |
|
680 return add(rd, rn, (-c)); |
|
681 } |
|
682 int low = c & 0x3fc; |
|
683 if (low != 0) { |
|
684 sub(rd, rn, low); |
|
685 rn = rd; |
|
686 } |
|
687 if (c & ~0x3fc) { |
|
688 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); |
|
689 sub(rd, rn, c & ~0x3fc); |
|
690 } else if (rd != rn) { |
|
691 assert(c == 0, ""); |
|
692 mov(rd, rn); // need to generate at least one move! |
|
693 } |
|
694 #endif // AARCH64 |
|
695 } |
|
696 |
|
697 void MacroAssembler::mov_slow(Register rd, address addr) { |
|
698 // do *not* call the non relocated mov_related_address |
|
699 mov_slow(rd, (intptr_t)addr); |
|
700 } |
|
701 |
|
702 void MacroAssembler::mov_slow(Register rd, const char *str) { |
|
703 mov_slow(rd, (intptr_t)str); |
|
704 } |
|
705 |
|
706 #ifdef AARCH64 |
|
707 |
|
708 // Common code for mov_slow and instr_count_for_mov_slow. |
|
709 // Returns number of instructions of mov_slow pattern, |
|
710 // generating it if non-null MacroAssembler is given. |
|
711 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { |
|
712 // This code pattern is matched in NativeIntruction::is_mov_slow. |
|
713 // Update it at modifications. |
|
714 |
|
715 const intx mask = right_n_bits(16); |
|
716 // 1 movz instruction |
|
717 for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
|
718 if ((c & ~(mask << base_shift)) == 0) { |
|
719 if (masm != NULL) { |
|
720 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); |
|
721 } |
|
722 return 1; |
|
723 } |
|
724 } |
|
725 // 1 movn instruction |
|
726 for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
|
727 if (((~c) & ~(mask << base_shift)) == 0) { |
|
728 if (masm != NULL) { |
|
729 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); |
|
730 } |
|
731 return 1; |
|
732 } |
|
733 } |
|
734 // 1 orr instruction |
|
735 { |
|
736 LogicalImmediate imm(c, false); |
|
737 if (imm.is_encoded()) { |
|
738 if (masm != NULL) { |
|
739 masm->orr(rd, ZR, imm); |
|
740 } |
|
741 return 1; |
|
742 } |
|
743 } |
|
744 // 1 movz/movn + up to 3 movk instructions |
|
745 int zeroes = 0; |
|
746 int ones = 0; |
|
747 for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
|
748 int part = (c >> base_shift) & mask; |
|
749 if (part == 0) { |
|
750 ++zeroes; |
|
751 } else if (part == mask) { |
|
752 ++ones; |
|
753 } |
|
754 } |
|
755 int def_bits = 0; |
|
756 if (ones > zeroes) { |
|
757 def_bits = mask; |
|
758 } |
|
759 int inst_count = 0; |
|
760 for (int base_shift = 0; base_shift < 64; base_shift += 16) { |
|
761 int part = (c >> base_shift) & mask; |
|
762 if (part != def_bits) { |
|
763 if (masm != NULL) { |
|
764 if (inst_count > 0) { |
|
765 masm->movk(rd, part, base_shift); |
|
766 } else { |
|
767 if (def_bits == 0) { |
|
768 masm->movz(rd, part, base_shift); |
|
769 } else { |
|
770 masm->movn(rd, ~part & mask, base_shift); |
|
771 } |
|
772 } |
|
773 } |
|
774 inst_count++; |
|
775 } |
|
776 } |
|
777 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); |
|
778 return inst_count; |
|
779 } |
|
780 |
|
781 void MacroAssembler::mov_slow(Register rd, intptr_t c) { |
|
782 #ifdef ASSERT |
|
783 int off = offset(); |
|
784 #endif |
|
785 (void) mov_slow_helper(rd, c, this); |
|
786 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); |
|
787 } |
|
788 |
|
789 // Counts instructions generated by mov_slow(rd, c). |
|
790 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { |
|
791 return mov_slow_helper(noreg, c, NULL); |
|
792 } |
|
793 |
|
794 int MacroAssembler::instr_count_for_mov_slow(address c) { |
|
795 return mov_slow_helper(noreg, (intptr_t)c, NULL); |
|
796 } |
|
797 |
|
798 #else |
|
799 |
|
800 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { |
|
801 if (AsmOperand::is_rotated_imm(c)) { |
|
802 mov(rd, c, cond); |
|
803 } else if (AsmOperand::is_rotated_imm(~c)) { |
|
804 mvn(rd, ~c, cond); |
|
805 } else if (VM_Version::supports_movw()) { |
|
806 movw(rd, c & 0xffff, cond); |
|
807 if ((unsigned int)c >> 16) { |
|
808 movt(rd, (unsigned int)c >> 16, cond); |
|
809 } |
|
810 } else { |
|
811 // Find first non-zero bit |
|
812 int shift = 0; |
|
813 while ((c & (3 << shift)) == 0) { |
|
814 shift += 2; |
|
815 } |
|
816 // Put the least significant part of the constant |
|
817 int mask = 0xff << shift; |
|
818 mov(rd, c & mask, cond); |
|
819 // Add up to 3 other parts of the constant; |
|
820 // each of them can be represented as rotated_imm |
|
821 if (c & (mask << 8)) { |
|
822 orr(rd, rd, c & (mask << 8), cond); |
|
823 } |
|
824 if (c & (mask << 16)) { |
|
825 orr(rd, rd, c & (mask << 16), cond); |
|
826 } |
|
827 if (c & (mask << 24)) { |
|
828 orr(rd, rd, c & (mask << 24), cond); |
|
829 } |
|
830 } |
|
831 } |
|
832 |
|
833 #endif // AARCH64 |
|
834 |
|
835 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, |
|
836 #ifdef AARCH64 |
|
837 bool patchable |
|
838 #else |
|
839 AsmCondition cond |
|
840 #endif |
|
841 ) { |
|
842 |
|
843 if (o == NULL) { |
|
844 #ifdef AARCH64 |
|
845 if (patchable) { |
|
846 nop(); |
|
847 } |
|
848 mov(rd, ZR); |
|
849 #else |
|
850 mov(rd, 0, cond); |
|
851 #endif |
|
852 return; |
|
853 } |
|
854 |
|
855 if (oop_index == 0) { |
|
856 oop_index = oop_recorder()->allocate_oop_index(o); |
|
857 } |
|
858 relocate(oop_Relocation::spec(oop_index)); |
|
859 |
|
860 #ifdef AARCH64 |
|
861 if (patchable) { |
|
862 nop(); |
|
863 } |
|
864 ldr(rd, pc()); |
|
865 #else |
|
866 if (VM_Version::supports_movw()) { |
|
867 movw(rd, 0, cond); |
|
868 movt(rd, 0, cond); |
|
869 } else { |
|
870 ldr(rd, Address(PC), cond); |
|
871 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). |
|
872 nop(); |
|
873 } |
|
874 #endif |
|
875 } |
|
876 |
|
877 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { |
|
878 if (o == NULL) { |
|
879 #ifdef AARCH64 |
|
880 if (patchable) { |
|
881 nop(); |
|
882 } |
|
883 #endif |
|
884 mov(rd, 0); |
|
885 return; |
|
886 } |
|
887 |
|
888 if (metadata_index == 0) { |
|
889 metadata_index = oop_recorder()->allocate_metadata_index(o); |
|
890 } |
|
891 relocate(metadata_Relocation::spec(metadata_index)); |
|
892 |
|
893 #ifdef AARCH64 |
|
894 if (patchable) { |
|
895 nop(); |
|
896 } |
|
897 #ifdef COMPILER2 |
|
898 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { |
|
899 mov_slow(rd, (address)o); |
|
900 return; |
|
901 } |
|
902 #endif |
|
903 ldr(rd, pc()); |
|
904 #else |
|
905 if (VM_Version::supports_movw()) { |
|
906 movw(rd, ((int)o) & 0xffff); |
|
907 movt(rd, (unsigned int)o >> 16); |
|
908 } else { |
|
909 ldr(rd, Address(PC)); |
|
910 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). |
|
911 nop(); |
|
912 } |
|
913 #endif // AARCH64 |
|
914 } |
|
915 |
|
916 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { |
|
917 Label skip_constant; |
|
918 union { |
|
919 jfloat f; |
|
920 jint i; |
|
921 } accessor; |
|
922 accessor.f = c; |
|
923 |
|
924 #ifdef AARCH64 |
|
925 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow |
|
926 Label L; |
|
927 ldr_s(fd, target(L)); |
|
928 b(skip_constant); |
|
929 bind(L); |
|
930 emit_int32(accessor.i); |
|
931 bind(skip_constant); |
|
932 #else |
|
933 flds(fd, Address(PC), cond); |
|
934 b(skip_constant); |
|
935 emit_int32(accessor.i); |
|
936 bind(skip_constant); |
|
937 #endif // AARCH64 |
|
938 } |
|
939 |
|
940 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { |
|
941 Label skip_constant; |
|
942 union { |
|
943 jdouble d; |
|
944 jint i[2]; |
|
945 } accessor; |
|
946 accessor.d = c; |
|
947 |
|
948 #ifdef AARCH64 |
|
949 // TODO-AARCH64 - try to optimize loading of double constants with fmov |
|
950 Label L; |
|
951 ldr_d(fd, target(L)); |
|
952 b(skip_constant); |
|
953 align(wordSize); |
|
954 bind(L); |
|
955 emit_int32(accessor.i[0]); |
|
956 emit_int32(accessor.i[1]); |
|
957 bind(skip_constant); |
|
958 #else |
|
959 fldd(fd, Address(PC), cond); |
|
960 b(skip_constant); |
|
961 emit_int32(accessor.i[0]); |
|
962 emit_int32(accessor.i[1]); |
|
963 bind(skip_constant); |
|
964 #endif // AARCH64 |
|
965 } |
|
966 |
|
967 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { |
|
968 intptr_t addr = (intptr_t) address_of_global; |
|
969 #ifdef AARCH64 |
|
970 assert((addr & 0x3) == 0, "address should be aligned"); |
|
971 |
|
972 // FIXME: TODO |
|
973 if (false && page_reachable_from_cache(address_of_global)) { |
|
974 assert(false,"TODO: relocate"); |
|
975 //relocate(); |
|
976 adrp(reg, address_of_global); |
|
977 ldrsw(reg, Address(reg, addr & 0xfff)); |
|
978 } else { |
|
979 mov_slow(reg, addr & ~0x3fff); |
|
980 ldrsw(reg, Address(reg, addr & 0x3fff)); |
|
981 } |
|
982 #else |
|
983 mov_slow(reg, addr & ~0xfff); |
|
984 ldr(reg, Address(reg, addr & 0xfff)); |
|
985 #endif |
|
986 } |
|
987 |
|
988 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { |
|
989 #ifdef AARCH64 |
|
990 intptr_t addr = (intptr_t) address_of_global; |
|
991 assert ((addr & 0x7) == 0, "address should be aligned"); |
|
992 mov_slow(reg, addr & ~0x7fff); |
|
993 ldr(reg, Address(reg, addr & 0x7fff)); |
|
994 #else |
|
995 ldr_global_s32(reg, address_of_global); |
|
996 #endif |
|
997 } |
|
998 |
|
999 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { |
|
1000 intptr_t addr = (intptr_t) address_of_global; |
|
1001 mov_slow(reg, addr & ~0xfff); |
|
1002 ldrb(reg, Address(reg, addr & 0xfff)); |
|
1003 } |
|
1004 |
|
1005 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { |
|
1006 #ifdef AARCH64 |
|
1007 switch (bits) { |
|
1008 case 8: uxtb(rd, rn); break; |
|
1009 case 16: uxth(rd, rn); break; |
|
1010 case 32: mov_w(rd, rn); break; |
|
1011 default: ShouldNotReachHere(); |
|
1012 } |
|
1013 #else |
|
1014 if (bits <= 8) { |
|
1015 andr(rd, rn, (1 << bits) - 1); |
|
1016 } else if (bits >= 24) { |
|
1017 bic(rd, rn, -1 << bits); |
|
1018 } else { |
|
1019 mov(rd, AsmOperand(rn, lsl, 32 - bits)); |
|
1020 mov(rd, AsmOperand(rd, lsr, 32 - bits)); |
|
1021 } |
|
1022 #endif |
|
1023 } |
|
1024 |
|
1025 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { |
|
1026 #ifdef AARCH64 |
|
1027 switch (bits) { |
|
1028 case 8: sxtb(rd, rn); break; |
|
1029 case 16: sxth(rd, rn); break; |
|
1030 case 32: sxtw(rd, rn); break; |
|
1031 default: ShouldNotReachHere(); |
|
1032 } |
|
1033 #else |
|
1034 mov(rd, AsmOperand(rn, lsl, 32 - bits)); |
|
1035 mov(rd, AsmOperand(rd, asr, 32 - bits)); |
|
1036 #endif |
|
1037 } |
|
1038 |
|
1039 #ifndef AARCH64 |
|
1040 |
|
1041 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, |
|
1042 Register rn_lo, Register rn_hi, |
|
1043 AsmCondition cond) { |
|
1044 if (rd_lo != rn_hi) { |
|
1045 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } |
|
1046 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } |
|
1047 } else if (rd_hi != rn_lo) { |
|
1048 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } |
|
1049 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } |
|
1050 } else { |
|
1051 eor(rd_lo, rd_hi, rd_lo, cond); |
|
1052 eor(rd_hi, rd_lo, rd_hi, cond); |
|
1053 eor(rd_lo, rd_hi, rd_lo, cond); |
|
1054 } |
|
1055 } |
|
1056 |
|
1057 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, |
|
1058 Register rn_lo, Register rn_hi, |
|
1059 AsmShift shift, Register count) { |
|
1060 Register tmp; |
|
1061 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { |
|
1062 tmp = rd_lo; |
|
1063 } else { |
|
1064 tmp = rd_hi; |
|
1065 } |
|
1066 assert_different_registers(tmp, count, rn_lo, rn_hi); |
|
1067 |
|
1068 subs(tmp, count, 32); |
|
1069 if (shift == lsl) { |
|
1070 assert_different_registers(rd_hi, rn_lo); |
|
1071 assert_different_registers(count, rd_hi); |
|
1072 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); |
|
1073 rsb(tmp, count, 32, mi); |
|
1074 if (rd_hi == rn_hi) { |
|
1075 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); |
|
1076 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); |
|
1077 } else { |
|
1078 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); |
|
1079 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); |
|
1080 } |
|
1081 mov(rd_lo, AsmOperand(rn_lo, shift, count)); |
|
1082 } else { |
|
1083 assert_different_registers(rd_lo, rn_hi); |
|
1084 assert_different_registers(rd_lo, count); |
|
1085 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); |
|
1086 rsb(tmp, count, 32, mi); |
|
1087 if (rd_lo == rn_lo) { |
|
1088 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); |
|
1089 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); |
|
1090 } else { |
|
1091 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); |
|
1092 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); |
|
1093 } |
|
1094 mov(rd_hi, AsmOperand(rn_hi, shift, count)); |
|
1095 } |
|
1096 } |
|
1097 |
|
1098 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, |
|
1099 Register rn_lo, Register rn_hi, |
|
1100 AsmShift shift, int count) { |
|
1101 assert(count != 0 && (count & ~63) == 0, "must be"); |
|
1102 |
|
1103 if (shift == lsl) { |
|
1104 assert_different_registers(rd_hi, rn_lo); |
|
1105 if (count >= 32) { |
|
1106 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); |
|
1107 mov(rd_lo, 0); |
|
1108 } else { |
|
1109 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); |
|
1110 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); |
|
1111 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); |
|
1112 } |
|
1113 } else { |
|
1114 assert_different_registers(rd_lo, rn_hi); |
|
1115 if (count >= 32) { |
|
1116 if (count == 32) { |
|
1117 mov(rd_lo, rn_hi); |
|
1118 } else { |
|
1119 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); |
|
1120 } |
|
1121 if (shift == asr) { |
|
1122 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); |
|
1123 } else { |
|
1124 mov(rd_hi, 0); |
|
1125 } |
|
1126 } else { |
|
1127 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); |
|
1128 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); |
|
1129 mov(rd_hi, AsmOperand(rn_hi, shift, count)); |
|
1130 } |
|
1131 } |
|
1132 } |
|
1133 #endif // !AARCH64 |
|
1134 |
|
1135 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { |
|
1136 // This code pattern is matched in NativeIntruction::skip_verify_oop. |
|
1137 // Update it at modifications. |
|
1138 if (!VerifyOops) return; |
|
1139 |
|
1140 char buffer[64]; |
|
1141 #ifdef COMPILER1 |
|
1142 if (CommentedAssembly) { |
|
1143 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); |
|
1144 block_comment(buffer); |
|
1145 } |
|
1146 #endif |
|
1147 const char* msg_buffer = NULL; |
|
1148 { |
|
1149 ResourceMark rm; |
|
1150 stringStream ss; |
|
1151 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); |
|
1152 msg_buffer = code_string(ss.as_string()); |
|
1153 } |
|
1154 |
|
1155 save_all_registers(); |
|
1156 |
|
1157 if (reg != R2) { |
|
1158 mov(R2, reg); // oop to verify |
|
1159 } |
|
1160 mov(R1, SP); // register save area |
|
1161 |
|
1162 Label done; |
|
1163 InlinedString Lmsg(msg_buffer); |
|
1164 ldr_literal(R0, Lmsg); // message |
|
1165 |
|
1166 // call indirectly to solve generation ordering problem |
|
1167 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); |
|
1168 call(Rtemp); |
|
1169 |
|
1170 restore_all_registers(); |
|
1171 |
|
1172 b(done); |
|
1173 #ifdef COMPILER2 |
|
1174 int off = offset(); |
|
1175 #endif |
|
1176 bind_literal(Lmsg); |
|
1177 #ifdef COMPILER2 |
|
1178 if (offset() - off == 1 * wordSize) { |
|
1179 // no padding, so insert nop for worst-case sizing |
|
1180 nop(); |
|
1181 } |
|
1182 #endif |
|
1183 bind(done); |
|
1184 } |
|
1185 |
|
1186 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { |
|
1187 if (!VerifyOops) return; |
|
1188 |
|
1189 const char* msg_buffer = NULL; |
|
1190 { |
|
1191 ResourceMark rm; |
|
1192 stringStream ss; |
|
1193 if ((addr.base() == SP) && (addr.index()==noreg)) { |
|
1194 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); |
|
1195 } else { |
|
1196 ss.print("verify_oop_addr: %s", s); |
|
1197 } |
|
1198 ss.print(" (%s:%d)", file, line); |
|
1199 msg_buffer = code_string(ss.as_string()); |
|
1200 } |
|
1201 |
|
1202 int push_size = save_all_registers(); |
|
1203 |
|
1204 if (addr.base() == SP) { |
|
1205 // computes an addr that takes into account the push |
|
1206 if (addr.index() != noreg) { |
|
1207 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index |
|
1208 add(new_base, SP, push_size); |
|
1209 addr = addr.rebase(new_base); |
|
1210 } else { |
|
1211 addr = addr.plus_disp(push_size); |
|
1212 } |
|
1213 } |
|
1214 |
|
1215 ldr(R2, addr); // oop to verify |
|
1216 mov(R1, SP); // register save area |
|
1217 |
|
1218 Label done; |
|
1219 InlinedString Lmsg(msg_buffer); |
|
1220 ldr_literal(R0, Lmsg); // message |
|
1221 |
|
1222 // call indirectly to solve generation ordering problem |
|
1223 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); |
|
1224 call(Rtemp); |
|
1225 |
|
1226 restore_all_registers(); |
|
1227 |
|
1228 b(done); |
|
1229 bind_literal(Lmsg); |
|
1230 bind(done); |
|
1231 } |
|
1232 |
|
1233 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { |
|
1234 if (needs_explicit_null_check(offset)) { |
|
1235 #ifdef AARCH64 |
|
1236 ldr(ZR, Address(reg)); |
|
1237 #else |
|
1238 assert_different_registers(reg, tmp); |
|
1239 if (tmp == noreg) { |
|
1240 tmp = Rtemp; |
|
1241 assert((! Thread::current()->is_Compiler_thread()) || |
|
1242 (! (ciEnv::current()->task() == NULL)) || |
|
1243 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), |
|
1244 "Rtemp not available in C2"); // explicit tmp register required |
|
1245 // XXX: could we mark the code buffer as not compatible with C2 ? |
|
1246 } |
|
1247 ldr(tmp, Address(reg)); |
|
1248 #endif |
|
1249 } |
|
1250 } |
|
1251 |
|
1252 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. |
|
1253 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, |
|
1254 RegisterOrConstant size_expression, Label& slow_case) { |
|
1255 if (!Universe::heap()->supports_inline_contig_alloc()) { |
|
1256 b(slow_case); |
|
1257 return; |
|
1258 } |
|
1259 |
|
1260 CollectedHeap* ch = Universe::heap(); |
|
1261 |
|
1262 const Register top_addr = tmp1; |
|
1263 const Register heap_end = tmp2; |
|
1264 |
|
1265 if (size_expression.is_register()) { |
|
1266 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); |
|
1267 } else { |
|
1268 assert_different_registers(obj, obj_end, top_addr, heap_end); |
|
1269 } |
|
1270 |
|
1271 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance |
|
1272 if (load_const) { |
|
1273 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); |
|
1274 } else { |
|
1275 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); |
|
1276 } |
|
1277 // Calculate new heap_top by adding the size of the object |
|
1278 Label retry; |
|
1279 bind(retry); |
|
1280 |
|
1281 #ifdef AARCH64 |
|
1282 ldxr(obj, top_addr); |
|
1283 #else |
|
1284 ldr(obj, Address(top_addr)); |
|
1285 #endif // AARCH64 |
|
1286 |
|
1287 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); |
|
1288 add_rc(obj_end, obj, size_expression); |
|
1289 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. |
|
1290 cmp(obj_end, obj); |
|
1291 b(slow_case, lo); |
|
1292 // Update heap_top if allocation succeeded |
|
1293 cmp(obj_end, heap_end); |
|
1294 b(slow_case, hi); |
|
1295 |
|
1296 #ifdef AARCH64 |
|
1297 stxr(heap_end/*scratched*/, obj_end, top_addr); |
|
1298 cbnz_w(heap_end, retry); |
|
1299 #else |
|
1300 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); |
|
1301 b(retry, ne); |
|
1302 #endif // AARCH64 |
|
1303 } |
|
1304 |
|
1305 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. |
|
1306 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, |
|
1307 RegisterOrConstant size_expression, Label& slow_case) { |
|
1308 const Register tlab_end = tmp1; |
|
1309 assert_different_registers(obj, obj_end, tlab_end); |
|
1310 |
|
1311 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); |
|
1312 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); |
|
1313 add_rc(obj_end, obj, size_expression); |
|
1314 cmp(obj_end, tlab_end); |
|
1315 b(slow_case, hi); |
|
1316 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); |
|
1317 } |
|
1318 |
|
1319 void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, |
|
1320 Register tmp3, Register tmp4, |
|
1321 Label& try_eden, Label& slow_case) { |
|
1322 if (!Universe::heap()->supports_inline_contig_alloc()) { |
|
1323 b(slow_case); |
|
1324 return; |
|
1325 } |
|
1326 |
|
1327 InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); |
|
1328 Label discard_tlab, do_refill; |
|
1329 ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); |
|
1330 ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); |
|
1331 ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); |
|
1332 |
|
1333 // Calculate amount of free space |
|
1334 sub(tmp1, tmp1, top); |
|
1335 // Retain tlab and allocate in shared space |
|
1336 // if the amount of free space in tlab is too large to discard |
|
1337 cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); |
|
1338 b(discard_tlab, ge); |
|
1339 |
|
1340 // Increment waste limit to prevent getting stuck on this slow path |
|
1341 mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); |
|
1342 add(tmp2, tmp2, tmp3); |
|
1343 str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); |
|
1344 if (TLABStats) { |
|
1345 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); |
|
1346 add_32(tmp2, tmp2, 1); |
|
1347 str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); |
|
1348 } |
|
1349 b(try_eden); |
|
1350 bind_literal(intArrayKlass_addr); |
|
1351 |
|
1352 bind(discard_tlab); |
|
1353 if (TLABStats) { |
|
1354 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); |
|
1355 ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); |
|
1356 add_32(tmp2, tmp2, 1); |
|
1357 add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); |
|
1358 str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); |
|
1359 str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); |
|
1360 } |
|
1361 // If tlab is currently allocated (top or end != null) |
|
1362 // then fill [top, end + alignment_reserve) with array object |
|
1363 cbz(top, do_refill); |
|
1364 |
|
1365 // Set up the mark word |
|
1366 mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); |
|
1367 str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); |
|
1368 // Set klass to intArrayKlass and the length to the remaining space |
|
1369 ldr_literal(tmp2, intArrayKlass_addr); |
|
1370 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - |
|
1371 typeArrayOopDesc::header_size(T_INT) * HeapWordSize); |
|
1372 Register klass = tmp2; |
|
1373 ldr(klass, Address(tmp2)); |
|
1374 logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) |
|
1375 str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); |
|
1376 store_klass(klass, top); // blows klass: |
|
1377 klass = noreg; |
|
1378 |
|
1379 ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); |
|
1380 sub(tmp1, top, tmp1); // size of tlab's allocated portion |
|
1381 incr_allocated_bytes(tmp1, tmp2); |
|
1382 |
|
1383 bind(do_refill); |
|
1384 // Refill the tlab with an eden allocation |
|
1385 ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); |
|
1386 logical_shift_left(tmp4, tmp1, LogHeapWordSize); |
|
1387 eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); |
|
1388 str(top, Address(Rthread, JavaThread::tlab_start_offset())); |
|
1389 str(top, Address(Rthread, JavaThread::tlab_top_offset())); |
|
1390 |
|
1391 #ifdef ASSERT |
|
1392 // Verify that tmp1 contains tlab_end |
|
1393 ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); |
|
1394 add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); |
|
1395 cmp(tmp1, tmp2); |
|
1396 breakpoint(ne); |
|
1397 #endif |
|
1398 |
|
1399 sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); |
|
1400 str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); |
|
1401 |
|
1402 if (ZeroTLAB) { |
|
1403 // clobbers start and tmp |
|
1404 // top must be preserved! |
|
1405 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); |
|
1406 ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); |
|
1407 zero_memory(tmp2, tmp1, tmp3); |
|
1408 } |
|
1409 } |
|
1410 |
|
1411 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. |
|
1412 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { |
|
1413 Label loop; |
|
1414 const Register ptr = start; |
|
1415 |
|
1416 #ifdef AARCH64 |
|
1417 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x |
|
1418 const Register size = tmp; |
|
1419 Label remaining, done; |
|
1420 |
|
1421 sub(size, end, start); |
|
1422 |
|
1423 #ifdef ASSERT |
|
1424 { Label L; |
|
1425 tst(size, wordSize - 1); |
|
1426 b(L, eq); |
|
1427 stop("size is not a multiple of wordSize"); |
|
1428 bind(L); |
|
1429 } |
|
1430 #endif // ASSERT |
|
1431 |
|
1432 subs(size, size, wordSize); |
|
1433 b(remaining, le); |
|
1434 |
|
1435 // Zero by 2 words per iteration. |
|
1436 bind(loop); |
|
1437 subs(size, size, 2*wordSize); |
|
1438 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); |
|
1439 b(loop, gt); |
|
1440 |
|
1441 bind(remaining); |
|
1442 b(done, ne); |
|
1443 str(ZR, Address(ptr)); |
|
1444 bind(done); |
|
1445 #else |
|
1446 mov(tmp, 0); |
|
1447 bind(loop); |
|
1448 cmp(ptr, end); |
|
1449 str(tmp, Address(ptr, wordSize, post_indexed), lo); |
|
1450 b(loop, lo); |
|
1451 #endif // AARCH64 |
|
1452 } |
|
1453 |
|
1454 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { |
|
1455 #ifdef AARCH64 |
|
1456 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
|
1457 add_rc(tmp, tmp, size_in_bytes); |
|
1458 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
|
1459 #else |
|
1460 // Bump total bytes allocated by this thread |
|
1461 Label done; |
|
1462 |
|
1463 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
|
1464 adds(tmp, tmp, size_in_bytes); |
|
1465 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); |
|
1466 b(done, cc); |
|
1467 |
|
1468 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) |
|
1469 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by |
|
1470 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. |
|
1471 Register low, high; |
|
1472 // Select ether R0/R1 or R2/R3 |
|
1473 |
|
1474 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { |
|
1475 low = R2; |
|
1476 high = R3; |
|
1477 } else { |
|
1478 low = R0; |
|
1479 high = R1; |
|
1480 } |
|
1481 push(RegisterSet(low, high)); |
|
1482 |
|
1483 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
|
1484 adds(low, low, size_in_bytes); |
|
1485 adc(high, high, 0); |
|
1486 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); |
|
1487 |
|
1488 pop(RegisterSet(low, high)); |
|
1489 |
|
1490 bind(done); |
|
1491 #endif // AARCH64 |
|
1492 } |
|
1493 |
|
1494 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { |
|
1495 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM |
|
1496 if (UseStackBanging) { |
|
1497 const int page_size = os::vm_page_size(); |
|
1498 |
|
1499 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); |
|
1500 strb(R0, Address(tmp)); |
|
1501 #ifdef AARCH64 |
|
1502 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { |
|
1503 sub(tmp, tmp, page_size); |
|
1504 strb(R0, Address(tmp)); |
|
1505 } |
|
1506 #else |
|
1507 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { |
|
1508 strb(R0, Address(tmp, -0xff0, pre_indexed)); |
|
1509 } |
|
1510 #endif // AARCH64 |
|
1511 } |
|
1512 } |
|
1513 |
|
1514 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { |
|
1515 if (UseStackBanging) { |
|
1516 Label loop; |
|
1517 |
|
1518 mov(tmp, SP); |
|
1519 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); |
|
1520 #ifdef AARCH64 |
|
1521 sub(tmp, tmp, Rsize); |
|
1522 bind(loop); |
|
1523 subs(Rsize, Rsize, os::vm_page_size()); |
|
1524 strb(ZR, Address(tmp, Rsize)); |
|
1525 #else |
|
1526 bind(loop); |
|
1527 subs(Rsize, Rsize, 0xff0); |
|
1528 strb(R0, Address(tmp, -0xff0, pre_indexed)); |
|
1529 #endif // AARCH64 |
|
1530 b(loop, hi); |
|
1531 } |
|
1532 } |
|
1533 |
|
1534 void MacroAssembler::stop(const char* msg) { |
|
1535 // This code pattern is matched in NativeIntruction::is_stop. |
|
1536 // Update it at modifications. |
|
1537 #ifdef COMPILER1 |
|
1538 if (CommentedAssembly) { |
|
1539 block_comment("stop"); |
|
1540 } |
|
1541 #endif |
|
1542 |
|
1543 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); |
|
1544 InlinedString Lmsg(msg); |
|
1545 |
|
1546 // save all registers for further inspection |
|
1547 save_all_registers(); |
|
1548 |
|
1549 ldr_literal(R0, Lmsg); // message |
|
1550 mov(R1, SP); // register save area |
|
1551 |
|
1552 #ifdef AARCH64 |
|
1553 ldr_literal(Rtemp, Ldebug); |
|
1554 br(Rtemp); |
|
1555 #else |
|
1556 ldr_literal(PC, Ldebug); // call MacroAssembler::debug |
|
1557 #endif // AARCH64 |
|
1558 |
|
1559 #if defined(COMPILER2) && defined(AARCH64) |
|
1560 int off = offset(); |
|
1561 #endif |
|
1562 bind_literal(Lmsg); |
|
1563 bind_literal(Ldebug); |
|
1564 #if defined(COMPILER2) && defined(AARCH64) |
|
1565 if (offset() - off == 2 * wordSize) { |
|
1566 // no padding, so insert nop for worst-case sizing |
|
1567 nop(); |
|
1568 } |
|
1569 #endif |
|
1570 } |
|
1571 |
|
1572 void MacroAssembler::warn(const char* msg) { |
|
1573 #ifdef COMPILER1 |
|
1574 if (CommentedAssembly) { |
|
1575 block_comment("warn"); |
|
1576 } |
|
1577 #endif |
|
1578 |
|
1579 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); |
|
1580 InlinedString Lmsg(msg); |
|
1581 Label done; |
|
1582 |
|
1583 int push_size = save_caller_save_registers(); |
|
1584 |
|
1585 #ifdef AARCH64 |
|
1586 // TODO-AARCH64 - get rid of extra debug parameters |
|
1587 mov(R1, LR); |
|
1588 mov(R2, FP); |
|
1589 add(R3, SP, push_size); |
|
1590 #endif |
|
1591 |
|
1592 ldr_literal(R0, Lmsg); // message |
|
1593 ldr_literal(LR, Lwarn); // call warning |
|
1594 |
|
1595 call(LR); |
|
1596 |
|
1597 restore_caller_save_registers(); |
|
1598 |
|
1599 b(done); |
|
1600 bind_literal(Lmsg); |
|
1601 bind_literal(Lwarn); |
|
1602 bind(done); |
|
1603 } |
|
1604 |
|
1605 |
|
1606 int MacroAssembler::save_all_registers() { |
|
1607 // This code pattern is matched in NativeIntruction::is_save_all_registers. |
|
1608 // Update it at modifications. |
|
1609 #ifdef AARCH64 |
|
1610 const Register tmp = Rtemp; |
|
1611 raw_push(R30, ZR); |
|
1612 for (int i = 28; i >= 0; i -= 2) { |
|
1613 raw_push(as_Register(i), as_Register(i+1)); |
|
1614 } |
|
1615 mov_pc_to(tmp); |
|
1616 str(tmp, Address(SP, 31*wordSize)); |
|
1617 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); |
|
1618 return 32*wordSize; |
|
1619 #else |
|
1620 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); |
|
1621 return 15*wordSize; |
|
1622 #endif // AARCH64 |
|
1623 } |
|
1624 |
|
1625 void MacroAssembler::restore_all_registers() { |
|
1626 #ifdef AARCH64 |
|
1627 for (int i = 0; i <= 28; i += 2) { |
|
1628 raw_pop(as_Register(i), as_Register(i+1)); |
|
1629 } |
|
1630 raw_pop(R30, ZR); |
|
1631 #else |
|
1632 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers |
|
1633 add(SP, SP, wordSize); // discard saved PC |
|
1634 #endif // AARCH64 |
|
1635 } |
|
1636 |
|
1637 int MacroAssembler::save_caller_save_registers() { |
|
1638 #ifdef AARCH64 |
|
1639 for (int i = 0; i <= 16; i += 2) { |
|
1640 raw_push(as_Register(i), as_Register(i+1)); |
|
1641 } |
|
1642 raw_push(R18, LR); |
|
1643 return 20*wordSize; |
|
1644 #else |
|
1645 #if R9_IS_SCRATCHED |
|
1646 // Save also R10 to preserve alignment |
|
1647 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); |
|
1648 return 8*wordSize; |
|
1649 #else |
|
1650 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); |
|
1651 return 6*wordSize; |
|
1652 #endif |
|
1653 #endif // AARCH64 |
|
1654 } |
|
1655 |
|
1656 void MacroAssembler::restore_caller_save_registers() { |
|
1657 #ifdef AARCH64 |
|
1658 raw_pop(R18, LR); |
|
1659 for (int i = 16; i >= 0; i -= 2) { |
|
1660 raw_pop(as_Register(i), as_Register(i+1)); |
|
1661 } |
|
1662 #else |
|
1663 #if R9_IS_SCRATCHED |
|
1664 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); |
|
1665 #else |
|
1666 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); |
|
1667 #endif |
|
1668 #endif // AARCH64 |
|
1669 } |
|
1670 |
|
1671 void MacroAssembler::debug(const char* msg, const intx* registers) { |
|
1672 // In order to get locks to work, we need to fake a in_VM state |
|
1673 JavaThread* thread = JavaThread::current(); |
|
1674 thread->set_thread_state(_thread_in_vm); |
|
1675 |
|
1676 if (ShowMessageBoxOnError) { |
|
1677 ttyLocker ttyl; |
|
1678 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
|
1679 BytecodeCounter::print(); |
|
1680 } |
|
1681 if (os::message_box(msg, "Execution stopped, print registers?")) { |
|
1682 #ifdef AARCH64 |
|
1683 // saved registers: R0-R30, PC |
|
1684 const int nregs = 32; |
|
1685 #else |
|
1686 // saved registers: R0-R12, LR, PC |
|
1687 const int nregs = 15; |
|
1688 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; |
|
1689 #endif // AARCH64 |
|
1690 |
|
1691 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { |
|
1692 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); |
|
1693 } |
|
1694 |
|
1695 #ifdef AARCH64 |
|
1696 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); |
|
1697 #endif // AARCH64 |
|
1698 |
|
1699 // derive original SP value from the address of register save area |
|
1700 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); |
|
1701 } |
|
1702 BREAKPOINT; |
|
1703 } else { |
|
1704 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); |
|
1705 } |
|
1706 assert(false, "DEBUG MESSAGE: %s", msg); |
|
1707 fatal("%s", msg); // returning from MacroAssembler::debug is not supported |
|
1708 } |
|
1709 |
|
1710 void MacroAssembler::unimplemented(const char* what) { |
|
1711 const char* buf = NULL; |
|
1712 { |
|
1713 ResourceMark rm; |
|
1714 stringStream ss; |
|
1715 ss.print("unimplemented: %s", what); |
|
1716 buf = code_string(ss.as_string()); |
|
1717 } |
|
1718 stop(buf); |
|
1719 } |
|
1720 |
|
1721 |
|
1722 // Implementation of FixedSizeCodeBlock |
|
1723 |
|
1724 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : |
|
1725 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { |
|
1726 } |
|
1727 |
|
1728 FixedSizeCodeBlock::~FixedSizeCodeBlock() { |
|
1729 if (_enabled) { |
|
1730 address curr_pc = _masm->pc(); |
|
1731 |
|
1732 assert(_start < curr_pc, "invalid current pc"); |
|
1733 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); |
|
1734 |
|
1735 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; |
|
1736 for (int i = 0; i < nops_count; i++) { |
|
1737 _masm->nop(); |
|
1738 } |
|
1739 } |
|
1740 } |
|
1741 |
|
1742 #ifdef AARCH64 |
|
1743 |
|
1744 // Serializes memory. |
|
1745 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM |
|
1746 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { |
|
1747 if (!os::is_MP()) return; |
|
1748 |
|
1749 // TODO-AARCH64 investigate dsb vs dmb effects |
|
1750 if (order_constraint == StoreStore) { |
|
1751 dmb(DMB_st); |
|
1752 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { |
|
1753 dmb(DMB_ld); |
|
1754 } else { |
|
1755 dmb(DMB_all); |
|
1756 } |
|
1757 } |
|
1758 |
|
1759 #else |
|
1760 |
|
1761 // Serializes memory. Potentially blows flags and reg. |
|
1762 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) |
|
1763 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. |
|
1764 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. |
|
1765 void MacroAssembler::membar(Membar_mask_bits order_constraint, |
|
1766 Register tmp, |
|
1767 bool preserve_flags, |
|
1768 Register load_tgt) { |
|
1769 if (!os::is_MP()) return; |
|
1770 |
|
1771 if (order_constraint == StoreStore) { |
|
1772 dmb(DMB_st, tmp); |
|
1773 } else if ((order_constraint & StoreLoad) || |
|
1774 (order_constraint & LoadLoad) || |
|
1775 (order_constraint & StoreStore) || |
|
1776 (load_tgt == noreg) || |
|
1777 preserve_flags) { |
|
1778 dmb(DMB_all, tmp); |
|
1779 } else { |
|
1780 // LoadStore: speculative stores reordeing is prohibited |
|
1781 |
|
1782 // By providing an ordered load target register, we avoid an extra memory load reference |
|
1783 Label not_taken; |
|
1784 bind(not_taken); |
|
1785 cmp(load_tgt, load_tgt); |
|
1786 b(not_taken, ne); |
|
1787 } |
|
1788 } |
|
1789 |
|
1790 #endif // AARCH64 |
|
1791 |
|
1792 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" |
|
1793 // on failure, so fall-through can only mean success. |
|
1794 // "one_shot" controls whether we loop and retry to mitigate spurious failures. |
|
1795 // This is only needed for C2, which for some reason does not rety, |
|
1796 // while C1/interpreter does. |
|
1797 // TODO: measure if it makes a difference |
|
1798 |
|
1799 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, |
|
1800 Register base, Register tmp, Label &slow_case, |
|
1801 bool allow_fallthrough_on_failure, bool one_shot) |
|
1802 { |
|
1803 |
|
1804 bool fallthrough_is_success = false; |
|
1805 |
|
1806 // ARM Litmus Test example does prefetching here. |
|
1807 // TODO: investigate if it helps performance |
|
1808 |
|
1809 // The last store was to the displaced header, so to prevent |
|
1810 // reordering we must issue a StoreStore or Release barrier before |
|
1811 // the CAS store. |
|
1812 |
|
1813 #ifdef AARCH64 |
|
1814 |
|
1815 Register Rscratch = tmp; |
|
1816 Register Roop = base; |
|
1817 Register mark = oldval; |
|
1818 Register Rbox = newval; |
|
1819 Label loop; |
|
1820 |
|
1821 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); |
|
1822 |
|
1823 // Instead of StoreStore here, we use store-release-exclusive below |
|
1824 |
|
1825 bind(loop); |
|
1826 |
|
1827 ldaxr(tmp, base); // acquire |
|
1828 cmp(tmp, oldval); |
|
1829 b(slow_case, ne); |
|
1830 stlxr(tmp, newval, base); // release |
|
1831 if (one_shot) { |
|
1832 cmp_w(tmp, 0); |
|
1833 } else { |
|
1834 cbnz_w(tmp, loop); |
|
1835 fallthrough_is_success = true; |
|
1836 } |
|
1837 |
|
1838 // MemBarAcquireLock would normally go here, but |
|
1839 // we already do ldaxr+stlxr above, which has |
|
1840 // Sequential Consistency |
|
1841 |
|
1842 #else |
|
1843 membar(MacroAssembler::StoreStore, noreg); |
|
1844 |
|
1845 if (one_shot) { |
|
1846 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); |
|
1847 cmp(tmp, oldval); |
|
1848 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); |
|
1849 cmp(tmp, 0, eq); |
|
1850 } else { |
|
1851 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); |
|
1852 } |
|
1853 |
|
1854 // MemBarAcquireLock barrier |
|
1855 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, |
|
1856 // but that doesn't prevent a load or store from floating up between |
|
1857 // the load and store in the CAS sequence, so play it safe and |
|
1858 // do a full fence. |
|
1859 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); |
|
1860 #endif |
|
1861 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { |
|
1862 b(slow_case, ne); |
|
1863 } |
|
1864 } |
|
1865 |
|
1866 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, |
|
1867 Register base, Register tmp, Label &slow_case, |
|
1868 bool allow_fallthrough_on_failure, bool one_shot) |
|
1869 { |
|
1870 |
|
1871 bool fallthrough_is_success = false; |
|
1872 |
|
1873 assert_different_registers(oldval,newval,base,tmp); |
|
1874 |
|
1875 #ifdef AARCH64 |
|
1876 Label loop; |
|
1877 |
|
1878 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); |
|
1879 |
|
1880 bind(loop); |
|
1881 ldxr(tmp, base); |
|
1882 cmp(tmp, oldval); |
|
1883 b(slow_case, ne); |
|
1884 // MemBarReleaseLock barrier |
|
1885 stlxr(tmp, newval, base); |
|
1886 if (one_shot) { |
|
1887 cmp_w(tmp, 0); |
|
1888 } else { |
|
1889 cbnz_w(tmp, loop); |
|
1890 fallthrough_is_success = true; |
|
1891 } |
|
1892 #else |
|
1893 // MemBarReleaseLock barrier |
|
1894 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, |
|
1895 // but that doesn't prevent a load or store from floating down between |
|
1896 // the load and store in the CAS sequence, so play it safe and |
|
1897 // do a full fence. |
|
1898 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); |
|
1899 |
|
1900 if (one_shot) { |
|
1901 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); |
|
1902 cmp(tmp, oldval); |
|
1903 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); |
|
1904 cmp(tmp, 0, eq); |
|
1905 } else { |
|
1906 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); |
|
1907 } |
|
1908 #endif |
|
1909 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { |
|
1910 b(slow_case, ne); |
|
1911 } |
|
1912 |
|
1913 // ExitEnter |
|
1914 // According to JSR-133 Cookbook, this should be StoreLoad, the same |
|
1915 // barrier that follows volatile store. |
|
1916 // TODO: Should be able to remove on armv8 if volatile loads |
|
1917 // use the load-acquire instruction. |
|
1918 membar(StoreLoad, noreg); |
|
1919 } |
|
1920 |
|
1921 #ifndef PRODUCT |
|
1922 |
|
1923 // Preserves flags and all registers. |
|
1924 // On SMP the updated value might not be visible to external observers without a sychronization barrier |
|
1925 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { |
|
1926 if (counter_addr != NULL) { |
|
1927 InlinedAddress counter_addr_literal((address)counter_addr); |
|
1928 Label done, retry; |
|
1929 if (cond != al) { |
|
1930 b(done, inverse(cond)); |
|
1931 } |
|
1932 |
|
1933 #ifdef AARCH64 |
|
1934 raw_push(R0, R1); |
|
1935 raw_push(R2, ZR); |
|
1936 |
|
1937 ldr_literal(R0, counter_addr_literal); |
|
1938 |
|
1939 bind(retry); |
|
1940 ldxr_w(R1, R0); |
|
1941 add_w(R1, R1, 1); |
|
1942 stxr_w(R2, R1, R0); |
|
1943 cbnz_w(R2, retry); |
|
1944 |
|
1945 raw_pop(R2, ZR); |
|
1946 raw_pop(R0, R1); |
|
1947 #else |
|
1948 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); |
|
1949 ldr_literal(R0, counter_addr_literal); |
|
1950 |
|
1951 mrs(CPSR, Rtemp); |
|
1952 |
|
1953 bind(retry); |
|
1954 ldr_s32(R1, Address(R0)); |
|
1955 add(R2, R1, 1); |
|
1956 atomic_cas_bool(R1, R2, R0, 0, R3); |
|
1957 b(retry, ne); |
|
1958 |
|
1959 msr(CPSR_fsxc, Rtemp); |
|
1960 |
|
1961 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); |
|
1962 #endif // AARCH64 |
|
1963 |
|
1964 b(done); |
|
1965 bind_literal(counter_addr_literal); |
|
1966 |
|
1967 bind(done); |
|
1968 } |
|
1969 } |
|
1970 |
|
1971 #endif // !PRODUCT |
|
1972 |
|
1973 |
|
1974 // Building block for CAS cases of biased locking: makes CAS and records statistics. |
|
1975 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. |
|
1976 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, |
|
1977 Register tmp, Label& slow_case, int* counter_addr) { |
|
1978 |
|
1979 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); |
|
1980 #ifdef ASSERT |
|
1981 breakpoint(ne); // Fallthrough only on success |
|
1982 #endif |
|
1983 #ifndef PRODUCT |
|
1984 if (counter_addr != NULL) { |
|
1985 cond_atomic_inc32(al, counter_addr); |
|
1986 } |
|
1987 #endif // !PRODUCT |
|
1988 } |
|
1989 |
|
1990 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, |
|
1991 bool swap_reg_contains_mark, |
|
1992 Register tmp2, |
|
1993 Label& done, Label& slow_case, |
|
1994 BiasedLockingCounters* counters) { |
|
1995 // obj_reg must be preserved (at least) if the bias locking fails |
|
1996 // tmp_reg is a temporary register |
|
1997 // swap_reg was used as a temporary but contained a value |
|
1998 // that was used afterwards in some call pathes. Callers |
|
1999 // have been fixed so that swap_reg no longer needs to be |
|
2000 // saved. |
|
2001 // Rtemp in no longer scratched |
|
2002 |
|
2003 assert(UseBiasedLocking, "why call this otherwise?"); |
|
2004 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); |
|
2005 guarantee(swap_reg!=tmp_reg, "invariant"); |
|
2006 assert(tmp_reg != noreg, "must supply tmp_reg"); |
|
2007 |
|
2008 #ifndef PRODUCT |
|
2009 if (PrintBiasedLockingStatistics && (counters == NULL)) { |
|
2010 counters = BiasedLocking::counters(); |
|
2011 } |
|
2012 #endif |
|
2013 |
|
2014 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
|
2015 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); |
|
2016 |
|
2017 // Biased locking |
|
2018 // See whether the lock is currently biased toward our thread and |
|
2019 // whether the epoch is still valid |
|
2020 // Note that the runtime guarantees sufficient alignment of JavaThread |
|
2021 // pointers to allow age to be placed into low bits |
|
2022 // First check to see whether biasing is even enabled for this object |
|
2023 Label cas_label; |
|
2024 |
|
2025 // The null check applies to the mark loading, if we need to load it. |
|
2026 // If the mark has already been loaded in swap_reg then it has already |
|
2027 // been performed and the offset is irrelevant. |
|
2028 int null_check_offset = offset(); |
|
2029 if (!swap_reg_contains_mark) { |
|
2030 ldr(swap_reg, mark_addr); |
|
2031 } |
|
2032 |
|
2033 // On MP platform loads could return 'stale' values in some cases. |
|
2034 // That is acceptable since either CAS or slow case path is taken in the worst case. |
|
2035 |
|
2036 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
|
2037 cmp(tmp_reg, markOopDesc::biased_lock_pattern); |
|
2038 |
|
2039 b(cas_label, ne); |
|
2040 |
|
2041 // The bias pattern is present in the object's header. Need to check |
|
2042 // whether the bias owner and the epoch are both still current. |
|
2043 load_klass(tmp_reg, obj_reg); |
|
2044 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); |
|
2045 orr(tmp_reg, tmp_reg, Rthread); |
|
2046 eor(tmp_reg, tmp_reg, swap_reg); |
|
2047 |
|
2048 #ifdef AARCH64 |
|
2049 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); |
|
2050 #else |
|
2051 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); |
|
2052 #endif // AARCH64 |
|
2053 |
|
2054 #ifndef PRODUCT |
|
2055 if (counters != NULL) { |
|
2056 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); |
|
2057 } |
|
2058 #endif // !PRODUCT |
|
2059 |
|
2060 b(done, eq); |
|
2061 |
|
2062 Label try_revoke_bias; |
|
2063 Label try_rebias; |
|
2064 |
|
2065 // At this point we know that the header has the bias pattern and |
|
2066 // that we are not the bias owner in the current epoch. We need to |
|
2067 // figure out more details about the state of the header in order to |
|
2068 // know what operations can be legally performed on the object's |
|
2069 // header. |
|
2070 |
|
2071 // If the low three bits in the xor result aren't clear, that means |
|
2072 // the prototype header is no longer biased and we have to revoke |
|
2073 // the bias on this object. |
|
2074 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
|
2075 b(try_revoke_bias, ne); |
|
2076 |
|
2077 // Biasing is still enabled for this data type. See whether the |
|
2078 // epoch of the current bias is still valid, meaning that the epoch |
|
2079 // bits of the mark word are equal to the epoch bits of the |
|
2080 // prototype header. (Note that the prototype header's epoch bits |
|
2081 // only change at a safepoint.) If not, attempt to rebias the object |
|
2082 // toward the current thread. Note that we must be absolutely sure |
|
2083 // that the current epoch is invalid in order to do this because |
|
2084 // otherwise the manipulations it performs on the mark word are |
|
2085 // illegal. |
|
2086 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); |
|
2087 b(try_rebias, ne); |
|
2088 |
|
2089 // tmp_reg has the age, epoch and pattern bits cleared |
|
2090 // The remaining (owner) bits are (Thread ^ current_owner) |
|
2091 |
|
2092 // The epoch of the current bias is still valid but we know nothing |
|
2093 // about the owner; it might be set or it might be clear. Try to |
|
2094 // acquire the bias of the object using an atomic operation. If this |
|
2095 // fails we will go in to the runtime to revoke the object's bias. |
|
2096 // Note that we first construct the presumed unbiased header so we |
|
2097 // don't accidentally blow away another thread's valid bias. |
|
2098 |
|
2099 // Note that we know the owner is not ourself. Hence, success can |
|
2100 // only happen when the owner bits is 0 |
|
2101 |
|
2102 #ifdef AARCH64 |
|
2103 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has |
|
2104 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. |
|
2105 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
|
2106 andr(swap_reg, swap_reg, tmp2); |
|
2107 #else |
|
2108 // until the assembler can be made smarter, we need to make some assumptions about the values |
|
2109 // so we can optimize this: |
|
2110 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); |
|
2111 |
|
2112 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); |
|
2113 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) |
|
2114 #endif // AARCH64 |
|
2115 |
|
2116 orr(tmp_reg, swap_reg, Rthread); // new mark |
|
2117 |
|
2118 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, |
|
2119 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); |
|
2120 |
|
2121 // If the biasing toward our thread failed, this means that |
|
2122 // another thread succeeded in biasing it toward itself and we |
|
2123 // need to revoke that bias. The revocation will occur in the |
|
2124 // interpreter runtime in the slow case. |
|
2125 |
|
2126 b(done); |
|
2127 |
|
2128 bind(try_rebias); |
|
2129 |
|
2130 // At this point we know the epoch has expired, meaning that the |
|
2131 // current "bias owner", if any, is actually invalid. Under these |
|
2132 // circumstances _only_, we are allowed to use the current header's |
|
2133 // value as the comparison value when doing the cas to acquire the |
|
2134 // bias in the current epoch. In other words, we allow transfer of |
|
2135 // the bias from one thread to another directly in this situation. |
|
2136 |
|
2137 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) |
|
2138 |
|
2139 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) |
|
2140 |
|
2141 // owner bits 'random'. Set them to Rthread. |
|
2142 #ifdef AARCH64 |
|
2143 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
|
2144 andr(tmp_reg, tmp_reg, tmp2); |
|
2145 #else |
|
2146 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); |
|
2147 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); |
|
2148 #endif // AARCH64 |
|
2149 |
|
2150 orr(tmp_reg, tmp_reg, Rthread); // new mark |
|
2151 |
|
2152 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, |
|
2153 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); |
|
2154 |
|
2155 // If the biasing toward our thread failed, then another thread |
|
2156 // succeeded in biasing it toward itself and we need to revoke that |
|
2157 // bias. The revocation will occur in the runtime in the slow case. |
|
2158 |
|
2159 b(done); |
|
2160 |
|
2161 bind(try_revoke_bias); |
|
2162 |
|
2163 // The prototype mark in the klass doesn't have the bias bit set any |
|
2164 // more, indicating that objects of this data type are not supposed |
|
2165 // to be biased any more. We are going to try to reset the mark of |
|
2166 // this object to the prototype value and fall through to the |
|
2167 // CAS-based locking scheme. Note that if our CAS fails, it means |
|
2168 // that another thread raced us for the privilege of revoking the |
|
2169 // bias of this particular object, so it's okay to continue in the |
|
2170 // normal locking code. |
|
2171 |
|
2172 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) |
|
2173 |
|
2174 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) |
|
2175 |
|
2176 // owner bits 'random'. Clear them |
|
2177 #ifdef AARCH64 |
|
2178 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); |
|
2179 andr(tmp_reg, tmp_reg, tmp2); |
|
2180 #else |
|
2181 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); |
|
2182 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); |
|
2183 #endif // AARCH64 |
|
2184 |
|
2185 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, |
|
2186 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); |
|
2187 |
|
2188 // Fall through to the normal CAS-based lock, because no matter what |
|
2189 // the result of the above CAS, some thread must have succeeded in |
|
2190 // removing the bias bit from the object's header. |
|
2191 |
|
2192 bind(cas_label); |
|
2193 |
|
2194 return null_check_offset; |
|
2195 } |
|
2196 |
|
2197 |
|
2198 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { |
|
2199 assert(UseBiasedLocking, "why call this otherwise?"); |
|
2200 |
|
2201 // Check for biased locking unlock case, which is a no-op |
|
2202 // Note: we do not have to check the thread ID for two reasons. |
|
2203 // First, the interpreter checks for IllegalMonitorStateException at |
|
2204 // a higher level. Second, if the bias was revoked while we held the |
|
2205 // lock, the object could not be rebiased toward another thread, so |
|
2206 // the bias bit would be clear. |
|
2207 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
2208 |
|
2209 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); |
|
2210 cmp(tmp_reg, markOopDesc::biased_lock_pattern); |
|
2211 b(done, eq); |
|
2212 } |
|
2213 |
|
2214 #ifdef AARCH64 |
|
2215 |
|
2216 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { |
|
2217 switch (size_in_bytes) { |
|
2218 case 8: ldr(dst, src); break; |
|
2219 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; |
|
2220 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; |
|
2221 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; |
|
2222 default: ShouldNotReachHere(); |
|
2223 } |
|
2224 } |
|
2225 |
|
2226 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { |
|
2227 switch (size_in_bytes) { |
|
2228 case 8: str(src, dst); break; |
|
2229 case 4: str_32(src, dst); break; |
|
2230 case 2: strh(src, dst); break; |
|
2231 case 1: strb(src, dst); break; |
|
2232 default: ShouldNotReachHere(); |
|
2233 } |
|
2234 } |
|
2235 |
|
2236 #else |
|
2237 |
|
2238 void MacroAssembler::load_sized_value(Register dst, Address src, |
|
2239 size_t size_in_bytes, bool is_signed, AsmCondition cond) { |
|
2240 switch (size_in_bytes) { |
|
2241 case 4: ldr(dst, src, cond); break; |
|
2242 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; |
|
2243 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; |
|
2244 default: ShouldNotReachHere(); |
|
2245 } |
|
2246 } |
|
2247 |
|
2248 |
|
2249 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { |
|
2250 switch (size_in_bytes) { |
|
2251 case 4: str(src, dst, cond); break; |
|
2252 case 2: strh(src, dst, cond); break; |
|
2253 case 1: strb(src, dst, cond); break; |
|
2254 default: ShouldNotReachHere(); |
|
2255 } |
|
2256 } |
|
2257 #endif // AARCH64 |
|
2258 |
|
2259 // Look up the method for a megamorphic invokeinterface call. |
|
2260 // The target method is determined by <Rinterf, Rindex>. |
|
2261 // The receiver klass is in Rklass. |
|
2262 // On success, the result will be in method_result, and execution falls through. |
|
2263 // On failure, execution transfers to the given label. |
|
2264 void MacroAssembler::lookup_interface_method(Register Rklass, |
|
2265 Register Rinterf, |
|
2266 Register Rindex, |
|
2267 Register method_result, |
|
2268 Register temp_reg1, |
|
2269 Register temp_reg2, |
|
2270 Label& L_no_such_interface) { |
|
2271 |
|
2272 assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); |
|
2273 |
|
2274 Register Ritable = temp_reg1; |
|
2275 |
|
2276 // Compute start of first itableOffsetEntry (which is at the end of the vtable) |
|
2277 const int base = in_bytes(Klass::vtable_start_offset()); |
|
2278 const int scale = exact_log2(vtableEntry::size_in_bytes()); |
|
2279 ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable |
|
2280 add(Ritable, Rklass, base); |
|
2281 add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); |
|
2282 |
|
2283 Label entry, search; |
|
2284 |
|
2285 b(entry); |
|
2286 |
|
2287 bind(search); |
|
2288 add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); |
|
2289 |
|
2290 bind(entry); |
|
2291 |
|
2292 // Check that the entry is non-null. A null entry means that the receiver |
|
2293 // class doesn't implement the interface, and wasn't the same as the |
|
2294 // receiver class checked when the interface was resolved. |
|
2295 |
|
2296 ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); |
|
2297 cbz(temp_reg2, L_no_such_interface); |
|
2298 |
|
2299 cmp(Rinterf, temp_reg2); |
|
2300 b(search, ne); |
|
2301 |
|
2302 ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); |
|
2303 add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* |
|
2304 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); |
|
2305 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); |
|
2306 |
|
2307 ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); |
|
2308 } |
|
2309 |
|
2310 #ifdef COMPILER2 |
|
2311 // TODO: 8 bytes at a time? pre-fetch? |
|
2312 // Compare char[] arrays aligned to 4 bytes. |
|
2313 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, |
|
2314 Register limit, Register result, |
|
2315 Register chr1, Register chr2, Label& Ldone) { |
|
2316 Label Lvector, Lloop; |
|
2317 |
|
2318 // Note: limit contains number of bytes (2*char_elements) != 0. |
|
2319 tst(limit, 0x2); // trailing character ? |
|
2320 b(Lvector, eq); |
|
2321 |
|
2322 // compare the trailing char |
|
2323 sub(limit, limit, sizeof(jchar)); |
|
2324 ldrh(chr1, Address(ary1, limit)); |
|
2325 ldrh(chr2, Address(ary2, limit)); |
|
2326 cmp(chr1, chr2); |
|
2327 mov(result, 0, ne); // not equal |
|
2328 b(Ldone, ne); |
|
2329 |
|
2330 // only one char ? |
|
2331 tst(limit, limit); |
|
2332 mov(result, 1, eq); |
|
2333 b(Ldone, eq); |
|
2334 |
|
2335 // word by word compare, dont't need alignment check |
|
2336 bind(Lvector); |
|
2337 |
|
2338 // Shift ary1 and ary2 to the end of the arrays, negate limit |
|
2339 add(ary1, limit, ary1); |
|
2340 add(ary2, limit, ary2); |
|
2341 neg(limit, limit); |
|
2342 |
|
2343 bind(Lloop); |
|
2344 ldr_u32(chr1, Address(ary1, limit)); |
|
2345 ldr_u32(chr2, Address(ary2, limit)); |
|
2346 cmp_32(chr1, chr2); |
|
2347 mov(result, 0, ne); // not equal |
|
2348 b(Ldone, ne); |
|
2349 adds(limit, limit, 2*sizeof(jchar)); |
|
2350 b(Lloop, ne); |
|
2351 |
|
2352 // Caller should set it: |
|
2353 // mov(result_reg, 1); //equal |
|
2354 } |
|
2355 #endif |
|
2356 |
|
2357 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { |
|
2358 mov_slow(tmpreg1, counter_addr); |
|
2359 ldr_s32(tmpreg2, tmpreg1); |
|
2360 add_32(tmpreg2, tmpreg2, 1); |
|
2361 str_32(tmpreg2, tmpreg1); |
|
2362 } |
|
2363 |
|
2364 void MacroAssembler::floating_cmp(Register dst) { |
|
2365 #ifdef AARCH64 |
|
2366 NOT_TESTED(); |
|
2367 cset(dst, gt); // 1 if '>', else 0 |
|
2368 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 |
|
2369 #else |
|
2370 vmrs(dst, FPSCR); |
|
2371 orr(dst, dst, 0x08000000); |
|
2372 eor(dst, dst, AsmOperand(dst, lsl, 3)); |
|
2373 mov(dst, AsmOperand(dst, asr, 30)); |
|
2374 #endif |
|
2375 } |
|
2376 |
|
2377 void MacroAssembler::restore_default_fp_mode() { |
|
2378 #ifdef AARCH64 |
|
2379 msr(SysReg_FPCR, ZR); |
|
2380 #else |
|
2381 #ifndef __SOFTFP__ |
|
2382 // Round to Near mode, IEEE compatible, masked exceptions |
|
2383 mov(Rtemp, 0); |
|
2384 vmsr(FPSCR, Rtemp); |
|
2385 #endif // !__SOFTFP__ |
|
2386 #endif // AARCH64 |
|
2387 } |
|
2388 |
|
2389 #ifndef AARCH64 |
|
2390 // 24-bit word range == 26-bit byte range |
|
2391 bool check26(int offset) { |
|
2392 // this could be simplified, but it mimics encoding and decoding |
|
2393 // an actual branch insrtuction |
|
2394 int off1 = offset << 6 >> 8; |
|
2395 int encoded = off1 & ((1<<24)-1); |
|
2396 int decoded = encoded << 8 >> 6; |
|
2397 return offset == decoded; |
|
2398 } |
|
2399 #endif // !AARCH64 |
|
2400 |
|
2401 // Perform some slight adjustments so the default 32MB code cache |
|
2402 // is fully reachable. |
|
2403 static inline address first_cache_address() { |
|
2404 return CodeCache::low_bound() + sizeof(HeapBlock::Header); |
|
2405 } |
|
2406 static inline address last_cache_address() { |
|
2407 return CodeCache::high_bound() - Assembler::InstructionSize; |
|
2408 } |
|
2409 |
|
2410 #ifdef AARCH64 |
|
2411 // Can we reach target using ADRP? |
|
2412 bool MacroAssembler::page_reachable_from_cache(address target) { |
|
2413 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; |
|
2414 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; |
|
2415 intptr_t addr = (intptr_t)target & ~0xfff; |
|
2416 |
|
2417 intptr_t loffset = addr - cl; |
|
2418 intptr_t hoffset = addr - ch; |
|
2419 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); |
|
2420 } |
|
2421 #endif |
|
2422 |
|
2423 // Can we reach target using unconditional branch or call from anywhere |
|
2424 // in the code cache (because code can be relocated)? |
|
2425 bool MacroAssembler::_reachable_from_cache(address target) { |
|
2426 #ifdef __thumb__ |
|
2427 if ((1 & (intptr_t)target) != 0) { |
|
2428 // Return false to avoid 'b' if we need switching to THUMB mode. |
|
2429 return false; |
|
2430 } |
|
2431 #endif |
|
2432 |
|
2433 address cl = first_cache_address(); |
|
2434 address ch = last_cache_address(); |
|
2435 |
|
2436 if (ForceUnreachable) { |
|
2437 // Only addresses from CodeCache can be treated as reachable. |
|
2438 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { |
|
2439 return false; |
|
2440 } |
|
2441 } |
|
2442 |
|
2443 intptr_t loffset = (intptr_t)target - (intptr_t)cl; |
|
2444 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; |
|
2445 |
|
2446 #ifdef AARCH64 |
|
2447 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); |
|
2448 #else |
|
2449 return check26(loffset - 8) && check26(hoffset - 8); |
|
2450 #endif |
|
2451 } |
|
2452 |
|
2453 bool MacroAssembler::reachable_from_cache(address target) { |
|
2454 assert(CodeCache::contains(pc()), "not supported"); |
|
2455 return _reachable_from_cache(target); |
|
2456 } |
|
2457 |
|
2458 // Can we reach the entire code cache from anywhere else in the code cache? |
|
2459 bool MacroAssembler::_cache_fully_reachable() { |
|
2460 address cl = first_cache_address(); |
|
2461 address ch = last_cache_address(); |
|
2462 return _reachable_from_cache(cl) && _reachable_from_cache(ch); |
|
2463 } |
|
2464 |
|
2465 bool MacroAssembler::cache_fully_reachable() { |
|
2466 assert(CodeCache::contains(pc()), "not supported"); |
|
2467 return _cache_fully_reachable(); |
|
2468 } |
|
2469 |
|
2470 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { |
|
2471 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); |
|
2472 if (reachable_from_cache(target)) { |
|
2473 relocate(rtype); |
|
2474 b(target NOT_AARCH64_ARG(cond)); |
|
2475 return; |
|
2476 } |
|
2477 |
|
2478 // Note: relocate is not needed for the code below, |
|
2479 // encoding targets in absolute format. |
|
2480 if (ignore_non_patchable_relocations()) { |
|
2481 rtype = relocInfo::none; |
|
2482 } |
|
2483 |
|
2484 #ifdef AARCH64 |
|
2485 assert (scratch != noreg, "should be specified"); |
|
2486 InlinedAddress address_literal(target, rtype); |
|
2487 ldr_literal(scratch, address_literal); |
|
2488 br(scratch); |
|
2489 int off = offset(); |
|
2490 bind_literal(address_literal); |
|
2491 #ifdef COMPILER2 |
|
2492 if (offset() - off == wordSize) { |
|
2493 // no padding, so insert nop for worst-case sizing |
|
2494 nop(); |
|
2495 } |
|
2496 #endif |
|
2497 #else |
|
2498 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { |
|
2499 // Note: this version cannot be (atomically) patched |
|
2500 mov_slow(scratch, (intptr_t)target, cond); |
|
2501 bx(scratch, cond); |
|
2502 } else { |
|
2503 Label skip; |
|
2504 InlinedAddress address_literal(target); |
|
2505 if (cond != al) { |
|
2506 b(skip, inverse(cond)); |
|
2507 } |
|
2508 relocate(rtype); |
|
2509 ldr_literal(PC, address_literal); |
|
2510 bind_literal(address_literal); |
|
2511 bind(skip); |
|
2512 } |
|
2513 #endif // AARCH64 |
|
2514 } |
|
2515 |
|
2516 // Similar to jump except that: |
|
2517 // - near calls are valid only if any destination in the cache is near |
|
2518 // - no movt/movw (not atomically patchable) |
|
2519 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { |
|
2520 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); |
|
2521 if (cache_fully_reachable()) { |
|
2522 // Note: this assumes that all possible targets (the initial one |
|
2523 // and the addressed patched to) are all in the code cache. |
|
2524 assert(CodeCache::contains(target), "target might be too far"); |
|
2525 relocate(rtype); |
|
2526 b(target NOT_AARCH64_ARG(cond)); |
|
2527 return; |
|
2528 } |
|
2529 |
|
2530 // Discard the relocation information if not needed for CacheCompiledCode |
|
2531 // since the next encodings are all in absolute format. |
|
2532 if (ignore_non_patchable_relocations()) { |
|
2533 rtype = relocInfo::none; |
|
2534 } |
|
2535 |
|
2536 #ifdef AARCH64 |
|
2537 assert (scratch != noreg, "should be specified"); |
|
2538 InlinedAddress address_literal(target); |
|
2539 relocate(rtype); |
|
2540 ldr_literal(scratch, address_literal); |
|
2541 br(scratch); |
|
2542 int off = offset(); |
|
2543 bind_literal(address_literal); |
|
2544 #ifdef COMPILER2 |
|
2545 if (offset() - off == wordSize) { |
|
2546 // no padding, so insert nop for worst-case sizing |
|
2547 nop(); |
|
2548 } |
|
2549 #endif |
|
2550 #else |
|
2551 { |
|
2552 Label skip; |
|
2553 InlinedAddress address_literal(target); |
|
2554 if (cond != al) { |
|
2555 b(skip, inverse(cond)); |
|
2556 } |
|
2557 relocate(rtype); |
|
2558 ldr_literal(PC, address_literal); |
|
2559 bind_literal(address_literal); |
|
2560 bind(skip); |
|
2561 } |
|
2562 #endif // AARCH64 |
|
2563 } |
|
2564 |
|
2565 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { |
|
2566 Register scratch = LR; |
|
2567 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); |
|
2568 if (reachable_from_cache(target)) { |
|
2569 relocate(rspec); |
|
2570 bl(target NOT_AARCH64_ARG(cond)); |
|
2571 return; |
|
2572 } |
|
2573 |
|
2574 // Note: relocate is not needed for the code below, |
|
2575 // encoding targets in absolute format. |
|
2576 if (ignore_non_patchable_relocations()) { |
|
2577 // This assumes the information was needed only for relocating the code. |
|
2578 rspec = RelocationHolder::none; |
|
2579 } |
|
2580 |
|
2581 #ifndef AARCH64 |
|
2582 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { |
|
2583 // Note: this version cannot be (atomically) patched |
|
2584 mov_slow(scratch, (intptr_t)target, cond); |
|
2585 blx(scratch, cond); |
|
2586 return; |
|
2587 } |
|
2588 #endif |
|
2589 |
|
2590 { |
|
2591 Label ret_addr; |
|
2592 #ifndef AARCH64 |
|
2593 if (cond != al) { |
|
2594 b(ret_addr, inverse(cond)); |
|
2595 } |
|
2596 #endif |
|
2597 |
|
2598 |
|
2599 #ifdef AARCH64 |
|
2600 // TODO-AARCH64: make more optimal implementation |
|
2601 // [ Keep in sync with MacroAssembler::call_size ] |
|
2602 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); |
|
2603 mov_slow(scratch, target); |
|
2604 blr(scratch); |
|
2605 #else |
|
2606 InlinedAddress address_literal(target); |
|
2607 relocate(rspec); |
|
2608 adr(LR, ret_addr); |
|
2609 ldr_literal(PC, address_literal); |
|
2610 |
|
2611 bind_literal(address_literal); |
|
2612 bind(ret_addr); |
|
2613 #endif |
|
2614 } |
|
2615 } |
|
2616 |
|
2617 #if defined(AARCH64) && defined(COMPILER2) |
|
2618 int MacroAssembler::call_size(address target, bool far, bool patchable) { |
|
2619 // FIXME: mov_slow is variable-length |
|
2620 if (!far) return 1; // bl |
|
2621 if (patchable) return 2; // ldr; blr |
|
2622 return instr_count_for_mov_slow((intptr_t)target) + 1; |
|
2623 } |
|
2624 #endif |
|
2625 |
|
2626 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { |
|
2627 assert(rspec.type() == relocInfo::static_call_type || |
|
2628 rspec.type() == relocInfo::none || |
|
2629 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); |
|
2630 |
|
2631 // Always generate the relocation information, needed for patching |
|
2632 relocate(rspec); // used by NativeCall::is_call_before() |
|
2633 if (cache_fully_reachable()) { |
|
2634 // Note: this assumes that all possible targets (the initial one |
|
2635 // and the addresses patched to) are all in the code cache. |
|
2636 assert(CodeCache::contains(target), "target might be too far"); |
|
2637 bl(target); |
|
2638 } else { |
|
2639 #if defined(AARCH64) && defined(COMPILER2) |
|
2640 if (c2) { |
|
2641 // return address needs to match call_size(). |
|
2642 // no need to trash Rtemp |
|
2643 int off = offset(); |
|
2644 Label skip_literal; |
|
2645 InlinedAddress address_literal(target); |
|
2646 ldr_literal(LR, address_literal); |
|
2647 blr(LR); |
|
2648 int ret_addr_offset = offset(); |
|
2649 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); |
|
2650 b(skip_literal); |
|
2651 int off2 = offset(); |
|
2652 bind_literal(address_literal); |
|
2653 if (offset() - off2 == wordSize) { |
|
2654 // no padding, so insert nop for worst-case sizing |
|
2655 nop(); |
|
2656 } |
|
2657 bind(skip_literal); |
|
2658 return ret_addr_offset; |
|
2659 } |
|
2660 #endif |
|
2661 Label ret_addr; |
|
2662 InlinedAddress address_literal(target); |
|
2663 #ifdef AARCH64 |
|
2664 ldr_literal(Rtemp, address_literal); |
|
2665 adr(LR, ret_addr); |
|
2666 br(Rtemp); |
|
2667 #else |
|
2668 adr(LR, ret_addr); |
|
2669 ldr_literal(PC, address_literal); |
|
2670 #endif |
|
2671 bind_literal(address_literal); |
|
2672 bind(ret_addr); |
|
2673 } |
|
2674 return offset(); |
|
2675 } |
|
2676 |
|
2677 |
|
2678 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { |
|
2679 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); |
|
2680 ldr(tmp, Address(method, Method::const_offset())); |
|
2681 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); |
|
2682 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); |
|
2683 ldr(mirror, Address(tmp, mirror_offset)); |
|
2684 } |
|
2685 |
|
2686 /////////////////////////////////////////////////////////////////////////////// |
|
2687 |
|
2688 // Compressed pointers |
|
2689 |
|
2690 #ifdef AARCH64 |
|
2691 |
|
2692 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { |
|
2693 if (UseCompressedClassPointers) { |
|
2694 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); |
|
2695 decode_klass_not_null(dst_klass); |
|
2696 } else { |
|
2697 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); |
|
2698 } |
|
2699 } |
|
2700 |
|
2701 #else |
|
2702 |
|
2703 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { |
|
2704 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); |
|
2705 } |
|
2706 |
|
2707 #endif // AARCH64 |
|
2708 |
|
2709 // Blows src_klass. |
|
2710 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { |
|
2711 #ifdef AARCH64 |
|
2712 if (UseCompressedClassPointers) { |
|
2713 assert(src_klass != dst_oop, "not enough registers"); |
|
2714 encode_klass_not_null(src_klass); |
|
2715 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); |
|
2716 return; |
|
2717 } |
|
2718 #endif // AARCH64 |
|
2719 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); |
|
2720 } |
|
2721 |
|
2722 #ifdef AARCH64 |
|
2723 |
|
2724 void MacroAssembler::store_klass_gap(Register dst) { |
|
2725 if (UseCompressedClassPointers) { |
|
2726 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); |
|
2727 } |
|
2728 } |
|
2729 |
|
2730 #endif // AARCH64 |
|
2731 |
|
2732 |
|
2733 void MacroAssembler::load_heap_oop(Register dst, Address src) { |
|
2734 #ifdef AARCH64 |
|
2735 if (UseCompressedOops) { |
|
2736 ldr_w(dst, src); |
|
2737 decode_heap_oop(dst); |
|
2738 return; |
|
2739 } |
|
2740 #endif // AARCH64 |
|
2741 ldr(dst, src); |
|
2742 } |
|
2743 |
|
2744 // Blows src and flags. |
|
2745 void MacroAssembler::store_heap_oop(Register src, Address dst) { |
|
2746 #ifdef AARCH64 |
|
2747 if (UseCompressedOops) { |
|
2748 assert(!dst.uses(src), "not enough registers"); |
|
2749 encode_heap_oop(src); |
|
2750 str_w(src, dst); |
|
2751 return; |
|
2752 } |
|
2753 #endif // AARCH64 |
|
2754 str(src, dst); |
|
2755 } |
|
2756 |
|
2757 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { |
|
2758 #ifdef AARCH64 |
|
2759 if (UseCompressedOops) { |
|
2760 str_w(src, dst); |
|
2761 return; |
|
2762 } |
|
2763 #endif // AARCH64 |
|
2764 str(src, dst); |
|
2765 } |
|
2766 |
|
2767 |
|
2768 #ifdef AARCH64 |
|
2769 |
|
2770 // Algorithm must match oop.inline.hpp encode_heap_oop. |
|
2771 void MacroAssembler::encode_heap_oop(Register dst, Register src) { |
|
2772 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. |
|
2773 // Update it at modifications. |
|
2774 assert (UseCompressedOops, "must be compressed"); |
|
2775 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
2776 #ifdef ASSERT |
|
2777 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); |
|
2778 #endif |
|
2779 verify_oop(src); |
|
2780 if (Universe::narrow_oop_base() == NULL) { |
|
2781 if (Universe::narrow_oop_shift() != 0) { |
|
2782 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2783 _lsr(dst, src, Universe::narrow_oop_shift()); |
|
2784 } else if (dst != src) { |
|
2785 mov(dst, src); |
|
2786 } |
|
2787 } else { |
|
2788 tst(src, src); |
|
2789 csel(dst, Rheap_base, src, eq); |
|
2790 sub(dst, dst, Rheap_base); |
|
2791 if (Universe::narrow_oop_shift() != 0) { |
|
2792 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2793 _lsr(dst, dst, Universe::narrow_oop_shift()); |
|
2794 } |
|
2795 } |
|
2796 } |
|
2797 |
|
2798 // Same algorithm as oop.inline.hpp decode_heap_oop. |
|
2799 void MacroAssembler::decode_heap_oop(Register dst, Register src) { |
|
2800 #ifdef ASSERT |
|
2801 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); |
|
2802 #endif |
|
2803 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2804 if (Universe::narrow_oop_base() != NULL) { |
|
2805 tst(src, src); |
|
2806 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); |
|
2807 csel(dst, dst, ZR, ne); |
|
2808 } else { |
|
2809 _lsl(dst, src, Universe::narrow_oop_shift()); |
|
2810 } |
|
2811 verify_oop(dst); |
|
2812 } |
|
2813 |
|
2814 #ifdef COMPILER2 |
|
2815 // Algorithm must match oop.inline.hpp encode_heap_oop. |
|
2816 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule |
|
2817 // must be changed. |
|
2818 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { |
|
2819 assert (UseCompressedOops, "must be compressed"); |
|
2820 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
2821 #ifdef ASSERT |
|
2822 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); |
|
2823 #endif |
|
2824 verify_oop(src); |
|
2825 if (Universe::narrow_oop_base() == NULL) { |
|
2826 if (Universe::narrow_oop_shift() != 0) { |
|
2827 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2828 _lsr(dst, src, Universe::narrow_oop_shift()); |
|
2829 } else if (dst != src) { |
|
2830 mov(dst, src); |
|
2831 } |
|
2832 } else { |
|
2833 sub(dst, src, Rheap_base); |
|
2834 if (Universe::narrow_oop_shift() != 0) { |
|
2835 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2836 _lsr(dst, dst, Universe::narrow_oop_shift()); |
|
2837 } |
|
2838 } |
|
2839 } |
|
2840 |
|
2841 // Same algorithm as oops.inline.hpp decode_heap_oop. |
|
2842 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule |
|
2843 // must be changed. |
|
2844 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { |
|
2845 #ifdef ASSERT |
|
2846 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); |
|
2847 #endif |
|
2848 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); |
|
2849 if (Universe::narrow_oop_base() != NULL) { |
|
2850 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); |
|
2851 } else { |
|
2852 _lsl(dst, src, Universe::narrow_oop_shift()); |
|
2853 } |
|
2854 verify_oop(dst); |
|
2855 } |
|
2856 |
|
2857 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { |
|
2858 assert(UseCompressedClassPointers, "should only be used for compressed header"); |
|
2859 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
|
2860 int klass_index = oop_recorder()->find_index(k); |
|
2861 RelocationHolder rspec = metadata_Relocation::spec(klass_index); |
|
2862 |
|
2863 // Relocation with special format (see relocInfo_arm.hpp). |
|
2864 relocate(rspec); |
|
2865 narrowKlass encoded_k = Klass::encode_klass(k); |
|
2866 movz(dst, encoded_k & 0xffff, 0); |
|
2867 movk(dst, (encoded_k >> 16) & 0xffff, 16); |
|
2868 } |
|
2869 |
|
2870 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { |
|
2871 assert(UseCompressedOops, "should only be used for compressed header"); |
|
2872 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
|
2873 int oop_index = oop_recorder()->find_index(obj); |
|
2874 RelocationHolder rspec = oop_Relocation::spec(oop_index); |
|
2875 |
|
2876 relocate(rspec); |
|
2877 movz(dst, 0xffff, 0); |
|
2878 movk(dst, 0xffff, 16); |
|
2879 } |
|
2880 |
|
2881 #endif // COMPILER2 |
|
2882 |
|
2883 // Must preserve condition codes, or C2 encodeKlass_not_null rule |
|
2884 // must be changed. |
|
2885 void MacroAssembler::encode_klass_not_null(Register r) { |
|
2886 if (Universe::narrow_klass_base() != NULL) { |
|
2887 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. |
|
2888 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); |
|
2889 mov_slow(Rheap_base, Universe::narrow_klass_base()); |
|
2890 sub(r, r, Rheap_base); |
|
2891 } |
|
2892 if (Universe::narrow_klass_shift() != 0) { |
|
2893 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
2894 _lsr(r, r, Universe::narrow_klass_shift()); |
|
2895 } |
|
2896 if (Universe::narrow_klass_base() != NULL) { |
|
2897 reinit_heapbase(); |
|
2898 } |
|
2899 } |
|
2900 |
|
2901 // Must preserve condition codes, or C2 encodeKlass_not_null rule |
|
2902 // must be changed. |
|
2903 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { |
|
2904 if (dst == src) { |
|
2905 encode_klass_not_null(src); |
|
2906 return; |
|
2907 } |
|
2908 if (Universe::narrow_klass_base() != NULL) { |
|
2909 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); |
|
2910 sub(dst, src, dst); |
|
2911 if (Universe::narrow_klass_shift() != 0) { |
|
2912 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
2913 _lsr(dst, dst, Universe::narrow_klass_shift()); |
|
2914 } |
|
2915 } else { |
|
2916 if (Universe::narrow_klass_shift() != 0) { |
|
2917 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
2918 _lsr(dst, src, Universe::narrow_klass_shift()); |
|
2919 } else { |
|
2920 mov(dst, src); |
|
2921 } |
|
2922 } |
|
2923 } |
|
2924 |
|
2925 // Function instr_count_for_decode_klass_not_null() counts the instructions |
|
2926 // generated by decode_klass_not_null(register r) and reinit_heapbase(), |
|
2927 // when (Universe::heap() != NULL). Hence, if the instructions they |
|
2928 // generate change, then this method needs to be updated. |
|
2929 int MacroAssembler::instr_count_for_decode_klass_not_null() { |
|
2930 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); |
|
2931 assert(Universe::heap() != NULL, "java heap should be initialized"); |
|
2932 if (Universe::narrow_klass_base() != NULL) { |
|
2933 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow |
|
2934 1 + // add |
|
2935 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow |
|
2936 } else { |
|
2937 if (Universe::narrow_klass_shift() != 0) { |
|
2938 return 1; |
|
2939 } |
|
2940 } |
|
2941 return 0; |
|
2942 } |
|
2943 |
|
2944 // Must preserve condition codes, or C2 decodeKlass_not_null rule |
|
2945 // must be changed. |
|
2946 void MacroAssembler::decode_klass_not_null(Register r) { |
|
2947 int off = offset(); |
|
2948 assert(UseCompressedClassPointers, "should only be used for compressed headers"); |
|
2949 assert(Universe::heap() != NULL, "java heap should be initialized"); |
|
2950 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); |
|
2951 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. |
|
2952 // Also do not verify_oop as this is called by verify_oop. |
|
2953 if (Universe::narrow_klass_base() != NULL) { |
|
2954 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. |
|
2955 mov_slow(Rheap_base, Universe::narrow_klass_base()); |
|
2956 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); |
|
2957 reinit_heapbase(); |
|
2958 } else { |
|
2959 if (Universe::narrow_klass_shift() != 0) { |
|
2960 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
|
2961 _lsl(r, r, Universe::narrow_klass_shift()); |
|
2962 } |
|
2963 } |
|
2964 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); |
|
2965 } |
|
2966 |
|
2967 // Must preserve condition codes, or C2 decodeKlass_not_null rule |
|
2968 // must be changed. |
|
2969 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { |
|
2970 if (src == dst) { |
|
2971 decode_klass_not_null(src); |
|
2972 return; |
|
2973 } |
|
2974 |
|
2975 assert(UseCompressedClassPointers, "should only be used for compressed headers"); |
|
2976 assert(Universe::heap() != NULL, "java heap should be initialized"); |
|
2977 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); |
|
2978 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); |
|
2979 // Also do not verify_oop as this is called by verify_oop. |
|
2980 if (Universe::narrow_klass_base() != NULL) { |
|
2981 mov_slow(dst, Universe::narrow_klass_base()); |
|
2982 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); |
|
2983 } else { |
|
2984 _lsl(dst, src, Universe::narrow_klass_shift()); |
|
2985 } |
|
2986 } |
|
2987 |
|
2988 |
|
2989 void MacroAssembler::reinit_heapbase() { |
|
2990 if (UseCompressedOops || UseCompressedClassPointers) { |
|
2991 if (Universe::heap() != NULL) { |
|
2992 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); |
|
2993 } else { |
|
2994 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); |
|
2995 } |
|
2996 } |
|
2997 } |
|
2998 |
|
2999 #ifdef ASSERT |
|
3000 void MacroAssembler::verify_heapbase(const char* msg) { |
|
3001 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. |
|
3002 // Update it at modifications. |
|
3003 assert (UseCompressedOops, "should be compressed"); |
|
3004 assert (Universe::heap() != NULL, "java heap should be initialized"); |
|
3005 if (CheckCompressedOops) { |
|
3006 Label ok; |
|
3007 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); |
|
3008 raw_push(Rtemp, ZR); |
|
3009 mrs(Rtemp, Assembler::SysReg_NZCV); |
|
3010 str(Rtemp, Address(SP, 1 * wordSize)); |
|
3011 mov_slow(Rtemp, Universe::narrow_ptrs_base()); |
|
3012 cmp(Rheap_base, Rtemp); |
|
3013 b(ok, eq); |
|
3014 stop(msg); |
|
3015 bind(ok); |
|
3016 ldr(Rtemp, Address(SP, 1 * wordSize)); |
|
3017 msr(Assembler::SysReg_NZCV, Rtemp); |
|
3018 raw_pop(Rtemp, ZR); |
|
3019 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); |
|
3020 } |
|
3021 } |
|
3022 #endif // ASSERT |
|
3023 |
|
3024 #endif // AARCH64 |
|
3025 |
|
3026 #ifdef COMPILER2 |
|
3027 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) |
|
3028 { |
|
3029 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); |
|
3030 |
|
3031 Register Rmark = Rscratch2; |
|
3032 |
|
3033 assert(Roop != Rscratch, ""); |
|
3034 assert(Roop != Rmark, ""); |
|
3035 assert(Rbox != Rscratch, ""); |
|
3036 assert(Rbox != Rmark, ""); |
|
3037 |
|
3038 Label fast_lock, done; |
|
3039 |
|
3040 if (UseBiasedLocking && !UseOptoBiasInlining) { |
|
3041 Label failed; |
|
3042 #ifdef AARCH64 |
|
3043 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); |
|
3044 #else |
|
3045 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); |
|
3046 #endif |
|
3047 bind(failed); |
|
3048 } |
|
3049 |
|
3050 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); |
|
3051 tst(Rmark, markOopDesc::unlocked_value); |
|
3052 b(fast_lock, ne); |
|
3053 |
|
3054 // Check for recursive lock |
|
3055 // See comments in InterpreterMacroAssembler::lock_object for |
|
3056 // explanations on the fast recursive locking check. |
|
3057 #ifdef AARCH64 |
|
3058 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); |
|
3059 Assembler::LogicalImmediate imm(mask, false); |
|
3060 mov(Rscratch, SP); |
|
3061 sub(Rscratch, Rmark, Rscratch); |
|
3062 ands(Rscratch, Rscratch, imm); |
|
3063 b(done, ne); // exit with failure |
|
3064 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero |
|
3065 b(done); |
|
3066 |
|
3067 #else |
|
3068 // -1- test low 2 bits |
|
3069 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); |
|
3070 // -2- test (hdr - SP) if the low two bits are 0 |
|
3071 sub(Rscratch, Rmark, SP, eq); |
|
3072 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); |
|
3073 // If still 'eq' then recursive locking OK |
|
3074 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero |
|
3075 b(done); |
|
3076 #endif |
|
3077 |
|
3078 bind(fast_lock); |
|
3079 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); |
|
3080 |
|
3081 bool allow_fallthrough_on_failure = true; |
|
3082 bool one_shot = true; |
|
3083 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); |
|
3084 |
|
3085 bind(done); |
|
3086 |
|
3087 } |
|
3088 |
|
3089 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) |
|
3090 { |
|
3091 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); |
|
3092 |
|
3093 Register Rmark = Rscratch2; |
|
3094 |
|
3095 assert(Roop != Rscratch, ""); |
|
3096 assert(Roop != Rmark, ""); |
|
3097 assert(Rbox != Rscratch, ""); |
|
3098 assert(Rbox != Rmark, ""); |
|
3099 |
|
3100 Label done; |
|
3101 |
|
3102 if (UseBiasedLocking && !UseOptoBiasInlining) { |
|
3103 biased_locking_exit(Roop, Rscratch, done); |
|
3104 } |
|
3105 |
|
3106 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); |
|
3107 // If hdr is NULL, we've got recursive locking and there's nothing more to do |
|
3108 cmp(Rmark, 0); |
|
3109 b(done, eq); |
|
3110 |
|
3111 // Restore the object header |
|
3112 bool allow_fallthrough_on_failure = true; |
|
3113 bool one_shot = true; |
|
3114 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); |
|
3115 |
|
3116 bind(done); |
|
3117 |
|
3118 } |
|
3119 #endif // COMPILER2 |
|
3120 |