|
1 /* |
|
2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 * |
|
23 */ |
|
24 |
|
25 #include "precompiled.hpp" |
|
26 #ifndef _WINDOWS |
|
27 #include "alloca.h" |
|
28 #endif |
|
29 #include "asm/macroAssembler.hpp" |
|
30 #include "asm/macroAssembler.inline.hpp" |
|
31 #include "code/debugInfoRec.hpp" |
|
32 #include "code/icBuffer.hpp" |
|
33 #include "code/vtableStubs.hpp" |
|
34 #include "interpreter/interpreter.hpp" |
|
35 #include "logging/log.hpp" |
|
36 #include "memory/resourceArea.hpp" |
|
37 #include "oops/compiledICHolder.hpp" |
|
38 #include "runtime/sharedRuntime.hpp" |
|
39 #include "runtime/vframeArray.hpp" |
|
40 #include "utilities/align.hpp" |
|
41 #include "vm_version_x86.hpp" |
|
42 #include "vmreg_x86.inline.hpp" |
|
43 #ifdef COMPILER1 |
|
44 #include "c1/c1_Runtime1.hpp" |
|
45 #endif |
|
46 #ifdef COMPILER2 |
|
47 #include "opto/runtime.hpp" |
|
48 #endif |
|
49 #if INCLUDE_JVMCI |
|
50 #include "jvmci/jvmciJavaClasses.hpp" |
|
51 #endif |
|
52 |
|
53 #define __ masm-> |
|
54 |
|
55 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; |
|
56 |
|
57 class SimpleRuntimeFrame { |
|
58 |
|
59 public: |
|
60 |
|
61 // Most of the runtime stubs have this simple frame layout. |
|
62 // This class exists to make the layout shared in one place. |
|
63 // Offsets are for compiler stack slots, which are jints. |
|
64 enum layout { |
|
65 // The frame sender code expects that rbp will be in the "natural" place and |
|
66 // will override any oopMap setting for it. We must therefore force the layout |
|
67 // so that it agrees with the frame sender code. |
|
68 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, |
|
69 rbp_off2, |
|
70 return_off, return_off2, |
|
71 framesize |
|
72 }; |
|
73 }; |
|
74 |
|
75 class RegisterSaver { |
|
76 // Capture info about frame layout. Layout offsets are in jint |
|
77 // units because compiler frame slots are jints. |
|
78 #define XSAVE_AREA_BEGIN 160 |
|
79 #define XSAVE_AREA_YMM_BEGIN 576 |
|
80 #define XSAVE_AREA_ZMM_BEGIN 1152 |
|
81 #define XSAVE_AREA_UPPERBANK 1664 |
|
82 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off |
|
83 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off |
|
84 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off |
|
85 enum layout { |
|
86 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area |
|
87 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area |
|
88 DEF_XMM_OFFS(0), |
|
89 DEF_XMM_OFFS(1), |
|
90 // 2..15 are implied in range usage |
|
91 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, |
|
92 DEF_YMM_OFFS(0), |
|
93 DEF_YMM_OFFS(1), |
|
94 // 2..15 are implied in range usage |
|
95 zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, |
|
96 zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, |
|
97 DEF_ZMM_OFFS(16), |
|
98 DEF_ZMM_OFFS(17), |
|
99 // 18..31 are implied in range usage |
|
100 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), |
|
101 fpu_stateH_end, |
|
102 r15_off, r15H_off, |
|
103 r14_off, r14H_off, |
|
104 r13_off, r13H_off, |
|
105 r12_off, r12H_off, |
|
106 r11_off, r11H_off, |
|
107 r10_off, r10H_off, |
|
108 r9_off, r9H_off, |
|
109 r8_off, r8H_off, |
|
110 rdi_off, rdiH_off, |
|
111 rsi_off, rsiH_off, |
|
112 ignore_off, ignoreH_off, // extra copy of rbp |
|
113 rsp_off, rspH_off, |
|
114 rbx_off, rbxH_off, |
|
115 rdx_off, rdxH_off, |
|
116 rcx_off, rcxH_off, |
|
117 rax_off, raxH_off, |
|
118 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state |
|
119 align_off, alignH_off, |
|
120 flags_off, flagsH_off, |
|
121 // The frame sender code expects that rbp will be in the "natural" place and |
|
122 // will override any oopMap setting for it. We must therefore force the layout |
|
123 // so that it agrees with the frame sender code. |
|
124 rbp_off, rbpH_off, // copy of rbp we will restore |
|
125 return_off, returnH_off, // slot for return address |
|
126 reg_save_size // size in compiler stack slots |
|
127 }; |
|
128 |
|
129 public: |
|
130 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); |
|
131 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); |
|
132 |
|
133 // Offsets into the register save area |
|
134 // Used by deoptimization when it is managing result register |
|
135 // values on its own |
|
136 |
|
137 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } |
|
138 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } |
|
139 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } |
|
140 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } |
|
141 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } |
|
142 |
|
143 // During deoptimization only the result registers need to be restored, |
|
144 // all the other values have already been extracted. |
|
145 static void restore_result_registers(MacroAssembler* masm); |
|
146 }; |
|
147 |
|
148 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { |
|
149 int off = 0; |
|
150 int num_xmm_regs = XMMRegisterImpl::number_of_registers; |
|
151 if (UseAVX < 3) { |
|
152 num_xmm_regs = num_xmm_regs/2; |
|
153 } |
|
154 #if defined(COMPILER2) || INCLUDE_JVMCI |
|
155 if (save_vectors) { |
|
156 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); |
|
157 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); |
|
158 } |
|
159 #else |
|
160 assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); |
|
161 #endif |
|
162 |
|
163 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated |
|
164 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); |
|
165 // OopMap frame size is in compiler stack slots (jint's) not bytes or words |
|
166 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; |
|
167 // CodeBlob frame size is in words. |
|
168 int frame_size_in_words = frame_size_in_bytes / wordSize; |
|
169 *total_frame_words = frame_size_in_words; |
|
170 |
|
171 // Save registers, fpu state, and flags. |
|
172 // We assume caller has already pushed the return address onto the |
|
173 // stack, so rsp is 8-byte aligned here. |
|
174 // We push rpb twice in this sequence because we want the real rbp |
|
175 // to be under the return like a normal enter. |
|
176 |
|
177 __ enter(); // rsp becomes 16-byte aligned here |
|
178 __ push_CPU_state(); // Push a multiple of 16 bytes |
|
179 |
|
180 // push cpu state handles this on EVEX enabled targets |
|
181 if (save_vectors) { |
|
182 // Save upper half of YMM registers(0..15) |
|
183 int base_addr = XSAVE_AREA_YMM_BEGIN; |
|
184 for (int n = 0; n < 16; n++) { |
|
185 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); |
|
186 } |
|
187 if (VM_Version::supports_evex()) { |
|
188 // Save upper half of ZMM registers(0..15) |
|
189 base_addr = XSAVE_AREA_ZMM_BEGIN; |
|
190 for (int n = 0; n < 16; n++) { |
|
191 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); |
|
192 } |
|
193 // Save full ZMM registers(16..num_xmm_regs) |
|
194 base_addr = XSAVE_AREA_UPPERBANK; |
|
195 off = 0; |
|
196 int vector_len = Assembler::AVX_512bit; |
|
197 for (int n = 16; n < num_xmm_regs; n++) { |
|
198 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); |
|
199 } |
|
200 } |
|
201 } else { |
|
202 if (VM_Version::supports_evex()) { |
|
203 // Save upper bank of ZMM registers(16..31) for double/float usage |
|
204 int base_addr = XSAVE_AREA_UPPERBANK; |
|
205 off = 0; |
|
206 for (int n = 16; n < num_xmm_regs; n++) { |
|
207 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); |
|
208 } |
|
209 } |
|
210 } |
|
211 __ vzeroupper(); |
|
212 if (frame::arg_reg_save_area_bytes != 0) { |
|
213 // Allocate argument register save area |
|
214 __ subptr(rsp, frame::arg_reg_save_area_bytes); |
|
215 } |
|
216 |
|
217 // Set an oopmap for the call site. This oopmap will map all |
|
218 // oop-registers and debug-info registers as callee-saved. This |
|
219 // will allow deoptimization at this safepoint to find all possible |
|
220 // debug-info recordings, as well as let GC find all oops. |
|
221 |
|
222 OopMapSet *oop_maps = new OopMapSet(); |
|
223 OopMap* map = new OopMap(frame_size_in_slots, 0); |
|
224 |
|
225 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) |
|
226 |
|
227 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); |
|
228 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); |
|
229 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); |
|
230 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); |
|
231 // rbp location is known implicitly by the frame sender code, needs no oopmap |
|
232 // and the location where rbp was saved by is ignored |
|
233 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); |
|
234 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); |
|
235 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); |
|
236 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); |
|
237 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); |
|
238 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); |
|
239 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); |
|
240 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); |
|
241 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); |
|
242 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); |
|
243 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, |
|
244 // on EVEX enabled targets, we get it included in the xsave area |
|
245 off = xmm0_off; |
|
246 int delta = xmm1_off - off; |
|
247 for (int n = 0; n < 16; n++) { |
|
248 XMMRegister xmm_name = as_XMMRegister(n); |
|
249 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); |
|
250 off += delta; |
|
251 } |
|
252 if(UseAVX > 2) { |
|
253 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets |
|
254 off = zmm16_off; |
|
255 delta = zmm17_off - off; |
|
256 for (int n = 16; n < num_xmm_regs; n++) { |
|
257 XMMRegister zmm_name = as_XMMRegister(n); |
|
258 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); |
|
259 off += delta; |
|
260 } |
|
261 } |
|
262 |
|
263 #if defined(COMPILER2) || INCLUDE_JVMCI |
|
264 if (save_vectors) { |
|
265 off = ymm0_off; |
|
266 int delta = ymm1_off - off; |
|
267 for (int n = 0; n < 16; n++) { |
|
268 XMMRegister ymm_name = as_XMMRegister(n); |
|
269 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); |
|
270 off += delta; |
|
271 } |
|
272 } |
|
273 #endif // COMPILER2 || INCLUDE_JVMCI |
|
274 |
|
275 // %%% These should all be a waste but we'll keep things as they were for now |
|
276 if (true) { |
|
277 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); |
|
278 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); |
|
279 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); |
|
280 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); |
|
281 // rbp location is known implicitly by the frame sender code, needs no oopmap |
|
282 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); |
|
283 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); |
|
284 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); |
|
285 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); |
|
286 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); |
|
287 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); |
|
288 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); |
|
289 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); |
|
290 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); |
|
291 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); |
|
292 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, |
|
293 // on EVEX enabled targets, we get it included in the xsave area |
|
294 off = xmm0H_off; |
|
295 delta = xmm1H_off - off; |
|
296 for (int n = 0; n < 16; n++) { |
|
297 XMMRegister xmm_name = as_XMMRegister(n); |
|
298 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); |
|
299 off += delta; |
|
300 } |
|
301 if (UseAVX > 2) { |
|
302 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets |
|
303 off = zmm16H_off; |
|
304 delta = zmm17H_off - off; |
|
305 for (int n = 16; n < num_xmm_regs; n++) { |
|
306 XMMRegister zmm_name = as_XMMRegister(n); |
|
307 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); |
|
308 off += delta; |
|
309 } |
|
310 } |
|
311 } |
|
312 |
|
313 return map; |
|
314 } |
|
315 |
|
316 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { |
|
317 int num_xmm_regs = XMMRegisterImpl::number_of_registers; |
|
318 if (UseAVX < 3) { |
|
319 num_xmm_regs = num_xmm_regs/2; |
|
320 } |
|
321 if (frame::arg_reg_save_area_bytes != 0) { |
|
322 // Pop arg register save area |
|
323 __ addptr(rsp, frame::arg_reg_save_area_bytes); |
|
324 } |
|
325 |
|
326 #if defined(COMPILER2) || INCLUDE_JVMCI |
|
327 if (restore_vectors) { |
|
328 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); |
|
329 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); |
|
330 } |
|
331 #else |
|
332 assert(!restore_vectors, "vectors are generated only by C2"); |
|
333 #endif |
|
334 |
|
335 __ vzeroupper(); |
|
336 |
|
337 // On EVEX enabled targets everything is handled in pop fpu state |
|
338 if (restore_vectors) { |
|
339 // Restore upper half of YMM registers (0..15) |
|
340 int base_addr = XSAVE_AREA_YMM_BEGIN; |
|
341 for (int n = 0; n < 16; n++) { |
|
342 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); |
|
343 } |
|
344 if (VM_Version::supports_evex()) { |
|
345 // Restore upper half of ZMM registers (0..15) |
|
346 base_addr = XSAVE_AREA_ZMM_BEGIN; |
|
347 for (int n = 0; n < 16; n++) { |
|
348 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); |
|
349 } |
|
350 // Restore full ZMM registers(16..num_xmm_regs) |
|
351 base_addr = XSAVE_AREA_UPPERBANK; |
|
352 int vector_len = Assembler::AVX_512bit; |
|
353 int off = 0; |
|
354 for (int n = 16; n < num_xmm_regs; n++) { |
|
355 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); |
|
356 } |
|
357 } |
|
358 } else { |
|
359 if (VM_Version::supports_evex()) { |
|
360 // Restore upper bank of ZMM registers(16..31) for double/float usage |
|
361 int base_addr = XSAVE_AREA_UPPERBANK; |
|
362 int off = 0; |
|
363 for (int n = 16; n < num_xmm_regs; n++) { |
|
364 __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); |
|
365 } |
|
366 } |
|
367 } |
|
368 |
|
369 // Recover CPU state |
|
370 __ pop_CPU_state(); |
|
371 // Get the rbp described implicitly by the calling convention (no oopMap) |
|
372 __ pop(rbp); |
|
373 } |
|
374 |
|
375 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { |
|
376 |
|
377 // Just restore result register. Only used by deoptimization. By |
|
378 // now any callee save register that needs to be restored to a c2 |
|
379 // caller of the deoptee has been extracted into the vframeArray |
|
380 // and will be stuffed into the c2i adapter we create for later |
|
381 // restoration so only result registers need to be restored here. |
|
382 |
|
383 // Restore fp result register |
|
384 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); |
|
385 // Restore integer result register |
|
386 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); |
|
387 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); |
|
388 |
|
389 // Pop all of the register save are off the stack except the return address |
|
390 __ addptr(rsp, return_offset_in_bytes()); |
|
391 } |
|
392 |
|
393 // Is vector's size (in bytes) bigger than a size saved by default? |
|
394 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. |
|
395 bool SharedRuntime::is_wide_vector(int size) { |
|
396 return size > 16; |
|
397 } |
|
398 |
|
399 size_t SharedRuntime::trampoline_size() { |
|
400 return 16; |
|
401 } |
|
402 |
|
403 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { |
|
404 __ jump(RuntimeAddress(destination)); |
|
405 } |
|
406 |
|
407 // The java_calling_convention describes stack locations as ideal slots on |
|
408 // a frame with no abi restrictions. Since we must observe abi restrictions |
|
409 // (like the placement of the register window) the slots must be biased by |
|
410 // the following value. |
|
411 static int reg2offset_in(VMReg r) { |
|
412 // Account for saved rbp and return address |
|
413 // This should really be in_preserve_stack_slots |
|
414 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; |
|
415 } |
|
416 |
|
417 static int reg2offset_out(VMReg r) { |
|
418 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; |
|
419 } |
|
420 |
|
421 // --------------------------------------------------------------------------- |
|
422 // Read the array of BasicTypes from a signature, and compute where the |
|
423 // arguments should go. Values in the VMRegPair regs array refer to 4-byte |
|
424 // quantities. Values less than VMRegImpl::stack0 are registers, those above |
|
425 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer |
|
426 // as framesizes are fixed. |
|
427 // VMRegImpl::stack0 refers to the first slot 0(sp). |
|
428 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register |
|
429 // up to RegisterImpl::number_of_registers) are the 64-bit |
|
430 // integer registers. |
|
431 |
|
432 // Note: the INPUTS in sig_bt are in units of Java argument words, which are |
|
433 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit |
|
434 // units regardless of build. Of course for i486 there is no 64 bit build |
|
435 |
|
436 // The Java calling convention is a "shifted" version of the C ABI. |
|
437 // By skipping the first C ABI register we can call non-static jni methods |
|
438 // with small numbers of arguments without having to shuffle the arguments |
|
439 // at all. Since we control the java ABI we ought to at least get some |
|
440 // advantage out of it. |
|
441 |
|
442 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, |
|
443 VMRegPair *regs, |
|
444 int total_args_passed, |
|
445 int is_outgoing) { |
|
446 |
|
447 // Create the mapping between argument positions and |
|
448 // registers. |
|
449 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { |
|
450 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 |
|
451 }; |
|
452 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { |
|
453 j_farg0, j_farg1, j_farg2, j_farg3, |
|
454 j_farg4, j_farg5, j_farg6, j_farg7 |
|
455 }; |
|
456 |
|
457 |
|
458 uint int_args = 0; |
|
459 uint fp_args = 0; |
|
460 uint stk_args = 0; // inc by 2 each time |
|
461 |
|
462 for (int i = 0; i < total_args_passed; i++) { |
|
463 switch (sig_bt[i]) { |
|
464 case T_BOOLEAN: |
|
465 case T_CHAR: |
|
466 case T_BYTE: |
|
467 case T_SHORT: |
|
468 case T_INT: |
|
469 if (int_args < Argument::n_int_register_parameters_j) { |
|
470 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); |
|
471 } else { |
|
472 regs[i].set1(VMRegImpl::stack2reg(stk_args)); |
|
473 stk_args += 2; |
|
474 } |
|
475 break; |
|
476 case T_VOID: |
|
477 // halves of T_LONG or T_DOUBLE |
|
478 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); |
|
479 regs[i].set_bad(); |
|
480 break; |
|
481 case T_LONG: |
|
482 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); |
|
483 // fall through |
|
484 case T_OBJECT: |
|
485 case T_ARRAY: |
|
486 case T_ADDRESS: |
|
487 if (int_args < Argument::n_int_register_parameters_j) { |
|
488 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); |
|
489 } else { |
|
490 regs[i].set2(VMRegImpl::stack2reg(stk_args)); |
|
491 stk_args += 2; |
|
492 } |
|
493 break; |
|
494 case T_FLOAT: |
|
495 if (fp_args < Argument::n_float_register_parameters_j) { |
|
496 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); |
|
497 } else { |
|
498 regs[i].set1(VMRegImpl::stack2reg(stk_args)); |
|
499 stk_args += 2; |
|
500 } |
|
501 break; |
|
502 case T_DOUBLE: |
|
503 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); |
|
504 if (fp_args < Argument::n_float_register_parameters_j) { |
|
505 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); |
|
506 } else { |
|
507 regs[i].set2(VMRegImpl::stack2reg(stk_args)); |
|
508 stk_args += 2; |
|
509 } |
|
510 break; |
|
511 default: |
|
512 ShouldNotReachHere(); |
|
513 break; |
|
514 } |
|
515 } |
|
516 |
|
517 return align_up(stk_args, 2); |
|
518 } |
|
519 |
|
520 // Patch the callers callsite with entry to compiled code if it exists. |
|
521 static void patch_callers_callsite(MacroAssembler *masm) { |
|
522 Label L; |
|
523 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); |
|
524 __ jcc(Assembler::equal, L); |
|
525 |
|
526 // Save the current stack pointer |
|
527 __ mov(r13, rsp); |
|
528 // Schedule the branch target address early. |
|
529 // Call into the VM to patch the caller, then jump to compiled callee |
|
530 // rax isn't live so capture return address while we easily can |
|
531 __ movptr(rax, Address(rsp, 0)); |
|
532 |
|
533 // align stack so push_CPU_state doesn't fault |
|
534 __ andptr(rsp, -(StackAlignmentInBytes)); |
|
535 __ push_CPU_state(); |
|
536 __ vzeroupper(); |
|
537 // VM needs caller's callsite |
|
538 // VM needs target method |
|
539 // This needs to be a long call since we will relocate this adapter to |
|
540 // the codeBuffer and it may not reach |
|
541 |
|
542 // Allocate argument register save area |
|
543 if (frame::arg_reg_save_area_bytes != 0) { |
|
544 __ subptr(rsp, frame::arg_reg_save_area_bytes); |
|
545 } |
|
546 __ mov(c_rarg0, rbx); |
|
547 __ mov(c_rarg1, rax); |
|
548 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); |
|
549 |
|
550 // De-allocate argument register save area |
|
551 if (frame::arg_reg_save_area_bytes != 0) { |
|
552 __ addptr(rsp, frame::arg_reg_save_area_bytes); |
|
553 } |
|
554 |
|
555 __ vzeroupper(); |
|
556 __ pop_CPU_state(); |
|
557 // restore sp |
|
558 __ mov(rsp, r13); |
|
559 __ bind(L); |
|
560 } |
|
561 |
|
562 |
|
563 static void gen_c2i_adapter(MacroAssembler *masm, |
|
564 int total_args_passed, |
|
565 int comp_args_on_stack, |
|
566 const BasicType *sig_bt, |
|
567 const VMRegPair *regs, |
|
568 Label& skip_fixup) { |
|
569 // Before we get into the guts of the C2I adapter, see if we should be here |
|
570 // at all. We've come from compiled code and are attempting to jump to the |
|
571 // interpreter, which means the caller made a static call to get here |
|
572 // (vcalls always get a compiled target if there is one). Check for a |
|
573 // compiled target. If there is one, we need to patch the caller's call. |
|
574 patch_callers_callsite(masm); |
|
575 |
|
576 __ bind(skip_fixup); |
|
577 |
|
578 // Since all args are passed on the stack, total_args_passed * |
|
579 // Interpreter::stackElementSize is the space we need. Plus 1 because |
|
580 // we also account for the return address location since |
|
581 // we store it first rather than hold it in rax across all the shuffling |
|
582 |
|
583 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; |
|
584 |
|
585 // stack is aligned, keep it that way |
|
586 extraspace = align_up(extraspace, 2*wordSize); |
|
587 |
|
588 // Get return address |
|
589 __ pop(rax); |
|
590 |
|
591 // set senderSP value |
|
592 __ mov(r13, rsp); |
|
593 |
|
594 __ subptr(rsp, extraspace); |
|
595 |
|
596 // Store the return address in the expected location |
|
597 __ movptr(Address(rsp, 0), rax); |
|
598 |
|
599 // Now write the args into the outgoing interpreter space |
|
600 for (int i = 0; i < total_args_passed; i++) { |
|
601 if (sig_bt[i] == T_VOID) { |
|
602 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
|
603 continue; |
|
604 } |
|
605 |
|
606 // offset to start parameters |
|
607 int st_off = (total_args_passed - i) * Interpreter::stackElementSize; |
|
608 int next_off = st_off - Interpreter::stackElementSize; |
|
609 |
|
610 // Say 4 args: |
|
611 // i st_off |
|
612 // 0 32 T_LONG |
|
613 // 1 24 T_VOID |
|
614 // 2 16 T_OBJECT |
|
615 // 3 8 T_BOOL |
|
616 // - 0 return address |
|
617 // |
|
618 // However to make thing extra confusing. Because we can fit a long/double in |
|
619 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter |
|
620 // leaves one slot empty and only stores to a single slot. In this case the |
|
621 // slot that is occupied is the T_VOID slot. See I said it was confusing. |
|
622 |
|
623 VMReg r_1 = regs[i].first(); |
|
624 VMReg r_2 = regs[i].second(); |
|
625 if (!r_1->is_valid()) { |
|
626 assert(!r_2->is_valid(), ""); |
|
627 continue; |
|
628 } |
|
629 if (r_1->is_stack()) { |
|
630 // memory to memory use rax |
|
631 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; |
|
632 if (!r_2->is_valid()) { |
|
633 // sign extend?? |
|
634 __ movl(rax, Address(rsp, ld_off)); |
|
635 __ movptr(Address(rsp, st_off), rax); |
|
636 |
|
637 } else { |
|
638 |
|
639 __ movq(rax, Address(rsp, ld_off)); |
|
640 |
|
641 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG |
|
642 // T_DOUBLE and T_LONG use two slots in the interpreter |
|
643 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { |
|
644 // ld_off == LSW, ld_off+wordSize == MSW |
|
645 // st_off == MSW, next_off == LSW |
|
646 __ movq(Address(rsp, next_off), rax); |
|
647 #ifdef ASSERT |
|
648 // Overwrite the unused slot with known junk |
|
649 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); |
|
650 __ movptr(Address(rsp, st_off), rax); |
|
651 #endif /* ASSERT */ |
|
652 } else { |
|
653 __ movq(Address(rsp, st_off), rax); |
|
654 } |
|
655 } |
|
656 } else if (r_1->is_Register()) { |
|
657 Register r = r_1->as_Register(); |
|
658 if (!r_2->is_valid()) { |
|
659 // must be only an int (or less ) so move only 32bits to slot |
|
660 // why not sign extend?? |
|
661 __ movl(Address(rsp, st_off), r); |
|
662 } else { |
|
663 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG |
|
664 // T_DOUBLE and T_LONG use two slots in the interpreter |
|
665 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { |
|
666 // long/double in gpr |
|
667 #ifdef ASSERT |
|
668 // Overwrite the unused slot with known junk |
|
669 __ mov64(rax, CONST64(0xdeadffffdeadaaab)); |
|
670 __ movptr(Address(rsp, st_off), rax); |
|
671 #endif /* ASSERT */ |
|
672 __ movq(Address(rsp, next_off), r); |
|
673 } else { |
|
674 __ movptr(Address(rsp, st_off), r); |
|
675 } |
|
676 } |
|
677 } else { |
|
678 assert(r_1->is_XMMRegister(), ""); |
|
679 if (!r_2->is_valid()) { |
|
680 // only a float use just part of the slot |
|
681 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); |
|
682 } else { |
|
683 #ifdef ASSERT |
|
684 // Overwrite the unused slot with known junk |
|
685 __ mov64(rax, CONST64(0xdeadffffdeadaaac)); |
|
686 __ movptr(Address(rsp, st_off), rax); |
|
687 #endif /* ASSERT */ |
|
688 __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); |
|
689 } |
|
690 } |
|
691 } |
|
692 |
|
693 // Schedule the branch target address early. |
|
694 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); |
|
695 __ jmp(rcx); |
|
696 } |
|
697 |
|
698 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, |
|
699 address code_start, address code_end, |
|
700 Label& L_ok) { |
|
701 Label L_fail; |
|
702 __ lea(temp_reg, ExternalAddress(code_start)); |
|
703 __ cmpptr(pc_reg, temp_reg); |
|
704 __ jcc(Assembler::belowEqual, L_fail); |
|
705 __ lea(temp_reg, ExternalAddress(code_end)); |
|
706 __ cmpptr(pc_reg, temp_reg); |
|
707 __ jcc(Assembler::below, L_ok); |
|
708 __ bind(L_fail); |
|
709 } |
|
710 |
|
711 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, |
|
712 int total_args_passed, |
|
713 int comp_args_on_stack, |
|
714 const BasicType *sig_bt, |
|
715 const VMRegPair *regs) { |
|
716 |
|
717 // Note: r13 contains the senderSP on entry. We must preserve it since |
|
718 // we may do a i2c -> c2i transition if we lose a race where compiled |
|
719 // code goes non-entrant while we get args ready. |
|
720 // In addition we use r13 to locate all the interpreter args as |
|
721 // we must align the stack to 16 bytes on an i2c entry else we |
|
722 // lose alignment we expect in all compiled code and register |
|
723 // save code can segv when fxsave instructions find improperly |
|
724 // aligned stack pointer. |
|
725 |
|
726 // Adapters can be frameless because they do not require the caller |
|
727 // to perform additional cleanup work, such as correcting the stack pointer. |
|
728 // An i2c adapter is frameless because the *caller* frame, which is interpreted, |
|
729 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), |
|
730 // even if a callee has modified the stack pointer. |
|
731 // A c2i adapter is frameless because the *callee* frame, which is interpreted, |
|
732 // routinely repairs its caller's stack pointer (from sender_sp, which is set |
|
733 // up via the senderSP register). |
|
734 // In other words, if *either* the caller or callee is interpreted, we can |
|
735 // get the stack pointer repaired after a call. |
|
736 // This is why c2i and i2c adapters cannot be indefinitely composed. |
|
737 // In particular, if a c2i adapter were to somehow call an i2c adapter, |
|
738 // both caller and callee would be compiled methods, and neither would |
|
739 // clean up the stack pointer changes performed by the two adapters. |
|
740 // If this happens, control eventually transfers back to the compiled |
|
741 // caller, but with an uncorrected stack, causing delayed havoc. |
|
742 |
|
743 // Pick up the return address |
|
744 __ movptr(rax, Address(rsp, 0)); |
|
745 |
|
746 if (VerifyAdapterCalls && |
|
747 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { |
|
748 // So, let's test for cascading c2i/i2c adapters right now. |
|
749 // assert(Interpreter::contains($return_addr) || |
|
750 // StubRoutines::contains($return_addr), |
|
751 // "i2c adapter must return to an interpreter frame"); |
|
752 __ block_comment("verify_i2c { "); |
|
753 Label L_ok; |
|
754 if (Interpreter::code() != NULL) |
|
755 range_check(masm, rax, r11, |
|
756 Interpreter::code()->code_start(), Interpreter::code()->code_end(), |
|
757 L_ok); |
|
758 if (StubRoutines::code1() != NULL) |
|
759 range_check(masm, rax, r11, |
|
760 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), |
|
761 L_ok); |
|
762 if (StubRoutines::code2() != NULL) |
|
763 range_check(masm, rax, r11, |
|
764 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), |
|
765 L_ok); |
|
766 const char* msg = "i2c adapter must return to an interpreter frame"; |
|
767 __ block_comment(msg); |
|
768 __ stop(msg); |
|
769 __ bind(L_ok); |
|
770 __ block_comment("} verify_i2ce "); |
|
771 } |
|
772 |
|
773 // Must preserve original SP for loading incoming arguments because |
|
774 // we need to align the outgoing SP for compiled code. |
|
775 __ movptr(r11, rsp); |
|
776 |
|
777 // Cut-out for having no stack args. Since up to 2 int/oop args are passed |
|
778 // in registers, we will occasionally have no stack args. |
|
779 int comp_words_on_stack = 0; |
|
780 if (comp_args_on_stack) { |
|
781 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in |
|
782 // registers are below. By subtracting stack0, we either get a negative |
|
783 // number (all values in registers) or the maximum stack slot accessed. |
|
784 |
|
785 // Convert 4-byte c2 stack slots to words. |
|
786 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; |
|
787 // Round up to miminum stack alignment, in wordSize |
|
788 comp_words_on_stack = align_up(comp_words_on_stack, 2); |
|
789 __ subptr(rsp, comp_words_on_stack * wordSize); |
|
790 } |
|
791 |
|
792 |
|
793 // Ensure compiled code always sees stack at proper alignment |
|
794 __ andptr(rsp, -16); |
|
795 |
|
796 // push the return address and misalign the stack that youngest frame always sees |
|
797 // as far as the placement of the call instruction |
|
798 __ push(rax); |
|
799 |
|
800 // Put saved SP in another register |
|
801 const Register saved_sp = rax; |
|
802 __ movptr(saved_sp, r11); |
|
803 |
|
804 // Will jump to the compiled code just as if compiled code was doing it. |
|
805 // Pre-load the register-jump target early, to schedule it better. |
|
806 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); |
|
807 |
|
808 #if INCLUDE_JVMCI |
|
809 if (EnableJVMCI || UseAOT) { |
|
810 // check if this call should be routed towards a specific entry point |
|
811 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); |
|
812 Label no_alternative_target; |
|
813 __ jcc(Assembler::equal, no_alternative_target); |
|
814 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); |
|
815 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); |
|
816 __ bind(no_alternative_target); |
|
817 } |
|
818 #endif // INCLUDE_JVMCI |
|
819 |
|
820 // Now generate the shuffle code. Pick up all register args and move the |
|
821 // rest through the floating point stack top. |
|
822 for (int i = 0; i < total_args_passed; i++) { |
|
823 if (sig_bt[i] == T_VOID) { |
|
824 // Longs and doubles are passed in native word order, but misaligned |
|
825 // in the 32-bit build. |
|
826 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); |
|
827 continue; |
|
828 } |
|
829 |
|
830 // Pick up 0, 1 or 2 words from SP+offset. |
|
831 |
|
832 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), |
|
833 "scrambled load targets?"); |
|
834 // Load in argument order going down. |
|
835 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; |
|
836 // Point to interpreter value (vs. tag) |
|
837 int next_off = ld_off - Interpreter::stackElementSize; |
|
838 // |
|
839 // |
|
840 // |
|
841 VMReg r_1 = regs[i].first(); |
|
842 VMReg r_2 = regs[i].second(); |
|
843 if (!r_1->is_valid()) { |
|
844 assert(!r_2->is_valid(), ""); |
|
845 continue; |
|
846 } |
|
847 if (r_1->is_stack()) { |
|
848 // Convert stack slot to an SP offset (+ wordSize to account for return address ) |
|
849 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; |
|
850 |
|
851 // We can use r13 as a temp here because compiled code doesn't need r13 as an input |
|
852 // and if we end up going thru a c2i because of a miss a reasonable value of r13 |
|
853 // will be generated. |
|
854 if (!r_2->is_valid()) { |
|
855 // sign extend??? |
|
856 __ movl(r13, Address(saved_sp, ld_off)); |
|
857 __ movptr(Address(rsp, st_off), r13); |
|
858 } else { |
|
859 // |
|
860 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE |
|
861 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case |
|
862 // So we must adjust where to pick up the data to match the interpreter. |
|
863 // |
|
864 // Interpreter local[n] == MSW, local[n+1] == LSW however locals |
|
865 // are accessed as negative so LSW is at LOW address |
|
866 |
|
867 // ld_off is MSW so get LSW |
|
868 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? |
|
869 next_off : ld_off; |
|
870 __ movq(r13, Address(saved_sp, offset)); |
|
871 // st_off is LSW (i.e. reg.first()) |
|
872 __ movq(Address(rsp, st_off), r13); |
|
873 } |
|
874 } else if (r_1->is_Register()) { // Register argument |
|
875 Register r = r_1->as_Register(); |
|
876 assert(r != rax, "must be different"); |
|
877 if (r_2->is_valid()) { |
|
878 // |
|
879 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE |
|
880 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case |
|
881 // So we must adjust where to pick up the data to match the interpreter. |
|
882 |
|
883 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? |
|
884 next_off : ld_off; |
|
885 |
|
886 // this can be a misaligned move |
|
887 __ movq(r, Address(saved_sp, offset)); |
|
888 } else { |
|
889 // sign extend and use a full word? |
|
890 __ movl(r, Address(saved_sp, ld_off)); |
|
891 } |
|
892 } else { |
|
893 if (!r_2->is_valid()) { |
|
894 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); |
|
895 } else { |
|
896 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); |
|
897 } |
|
898 } |
|
899 } |
|
900 |
|
901 // 6243940 We might end up in handle_wrong_method if |
|
902 // the callee is deoptimized as we race thru here. If that |
|
903 // happens we don't want to take a safepoint because the |
|
904 // caller frame will look interpreted and arguments are now |
|
905 // "compiled" so it is much better to make this transition |
|
906 // invisible to the stack walking code. Unfortunately if |
|
907 // we try and find the callee by normal means a safepoint |
|
908 // is possible. So we stash the desired callee in the thread |
|
909 // and the vm will find there should this case occur. |
|
910 |
|
911 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); |
|
912 |
|
913 // put Method* where a c2i would expect should we end up there |
|
914 // only needed becaus eof c2 resolve stubs return Method* as a result in |
|
915 // rax |
|
916 __ mov(rax, rbx); |
|
917 __ jmp(r11); |
|
918 } |
|
919 |
|
920 // --------------------------------------------------------------- |
|
921 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, |
|
922 int total_args_passed, |
|
923 int comp_args_on_stack, |
|
924 const BasicType *sig_bt, |
|
925 const VMRegPair *regs, |
|
926 AdapterFingerPrint* fingerprint) { |
|
927 address i2c_entry = __ pc(); |
|
928 |
|
929 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); |
|
930 |
|
931 // ------------------------------------------------------------------------- |
|
932 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls |
|
933 // to the interpreter. The args start out packed in the compiled layout. They |
|
934 // need to be unpacked into the interpreter layout. This will almost always |
|
935 // require some stack space. We grow the current (compiled) stack, then repack |
|
936 // the args. We finally end in a jump to the generic interpreter entry point. |
|
937 // On exit from the interpreter, the interpreter will restore our SP (lest the |
|
938 // compiled code, which relys solely on SP and not RBP, get sick). |
|
939 |
|
940 address c2i_unverified_entry = __ pc(); |
|
941 Label skip_fixup; |
|
942 Label ok; |
|
943 |
|
944 Register holder = rax; |
|
945 Register receiver = j_rarg0; |
|
946 Register temp = rbx; |
|
947 |
|
948 { |
|
949 __ load_klass(temp, receiver); |
|
950 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); |
|
951 __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset())); |
|
952 __ jcc(Assembler::equal, ok); |
|
953 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); |
|
954 |
|
955 __ bind(ok); |
|
956 // Method might have been compiled since the call site was patched to |
|
957 // interpreted if that is the case treat it as a miss so we can get |
|
958 // the call site corrected. |
|
959 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); |
|
960 __ jcc(Assembler::equal, skip_fixup); |
|
961 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); |
|
962 } |
|
963 |
|
964 address c2i_entry = __ pc(); |
|
965 |
|
966 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); |
|
967 |
|
968 __ flush(); |
|
969 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); |
|
970 } |
|
971 |
|
972 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, |
|
973 VMRegPair *regs, |
|
974 VMRegPair *regs2, |
|
975 int total_args_passed) { |
|
976 assert(regs2 == NULL, "not needed on x86"); |
|
977 // We return the amount of VMRegImpl stack slots we need to reserve for all |
|
978 // the arguments NOT counting out_preserve_stack_slots. |
|
979 |
|
980 // NOTE: These arrays will have to change when c1 is ported |
|
981 #ifdef _WIN64 |
|
982 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { |
|
983 c_rarg0, c_rarg1, c_rarg2, c_rarg3 |
|
984 }; |
|
985 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { |
|
986 c_farg0, c_farg1, c_farg2, c_farg3 |
|
987 }; |
|
988 #else |
|
989 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { |
|
990 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 |
|
991 }; |
|
992 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { |
|
993 c_farg0, c_farg1, c_farg2, c_farg3, |
|
994 c_farg4, c_farg5, c_farg6, c_farg7 |
|
995 }; |
|
996 #endif // _WIN64 |
|
997 |
|
998 |
|
999 uint int_args = 0; |
|
1000 uint fp_args = 0; |
|
1001 uint stk_args = 0; // inc by 2 each time |
|
1002 |
|
1003 for (int i = 0; i < total_args_passed; i++) { |
|
1004 switch (sig_bt[i]) { |
|
1005 case T_BOOLEAN: |
|
1006 case T_CHAR: |
|
1007 case T_BYTE: |
|
1008 case T_SHORT: |
|
1009 case T_INT: |
|
1010 if (int_args < Argument::n_int_register_parameters_c) { |
|
1011 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); |
|
1012 #ifdef _WIN64 |
|
1013 fp_args++; |
|
1014 // Allocate slots for callee to stuff register args the stack. |
|
1015 stk_args += 2; |
|
1016 #endif |
|
1017 } else { |
|
1018 regs[i].set1(VMRegImpl::stack2reg(stk_args)); |
|
1019 stk_args += 2; |
|
1020 } |
|
1021 break; |
|
1022 case T_LONG: |
|
1023 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); |
|
1024 // fall through |
|
1025 case T_OBJECT: |
|
1026 case T_ARRAY: |
|
1027 case T_ADDRESS: |
|
1028 case T_METADATA: |
|
1029 if (int_args < Argument::n_int_register_parameters_c) { |
|
1030 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); |
|
1031 #ifdef _WIN64 |
|
1032 fp_args++; |
|
1033 stk_args += 2; |
|
1034 #endif |
|
1035 } else { |
|
1036 regs[i].set2(VMRegImpl::stack2reg(stk_args)); |
|
1037 stk_args += 2; |
|
1038 } |
|
1039 break; |
|
1040 case T_FLOAT: |
|
1041 if (fp_args < Argument::n_float_register_parameters_c) { |
|
1042 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); |
|
1043 #ifdef _WIN64 |
|
1044 int_args++; |
|
1045 // Allocate slots for callee to stuff register args the stack. |
|
1046 stk_args += 2; |
|
1047 #endif |
|
1048 } else { |
|
1049 regs[i].set1(VMRegImpl::stack2reg(stk_args)); |
|
1050 stk_args += 2; |
|
1051 } |
|
1052 break; |
|
1053 case T_DOUBLE: |
|
1054 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); |
|
1055 if (fp_args < Argument::n_float_register_parameters_c) { |
|
1056 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); |
|
1057 #ifdef _WIN64 |
|
1058 int_args++; |
|
1059 // Allocate slots for callee to stuff register args the stack. |
|
1060 stk_args += 2; |
|
1061 #endif |
|
1062 } else { |
|
1063 regs[i].set2(VMRegImpl::stack2reg(stk_args)); |
|
1064 stk_args += 2; |
|
1065 } |
|
1066 break; |
|
1067 case T_VOID: // Halves of longs and doubles |
|
1068 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); |
|
1069 regs[i].set_bad(); |
|
1070 break; |
|
1071 default: |
|
1072 ShouldNotReachHere(); |
|
1073 break; |
|
1074 } |
|
1075 } |
|
1076 #ifdef _WIN64 |
|
1077 // windows abi requires that we always allocate enough stack space |
|
1078 // for 4 64bit registers to be stored down. |
|
1079 if (stk_args < 8) { |
|
1080 stk_args = 8; |
|
1081 } |
|
1082 #endif // _WIN64 |
|
1083 |
|
1084 return stk_args; |
|
1085 } |
|
1086 |
|
1087 // On 64 bit we will store integer like items to the stack as |
|
1088 // 64 bits items (sparc abi) even though java would only store |
|
1089 // 32bits for a parameter. On 32bit it will simply be 32 bits |
|
1090 // So this routine will do 32->32 on 32bit and 32->64 on 64bit |
|
1091 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { |
|
1092 if (src.first()->is_stack()) { |
|
1093 if (dst.first()->is_stack()) { |
|
1094 // stack to stack |
|
1095 __ movslq(rax, Address(rbp, reg2offset_in(src.first()))); |
|
1096 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); |
|
1097 } else { |
|
1098 // stack to reg |
|
1099 __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); |
|
1100 } |
|
1101 } else if (dst.first()->is_stack()) { |
|
1102 // reg to stack |
|
1103 // Do we really have to sign extend??? |
|
1104 // __ movslq(src.first()->as_Register(), src.first()->as_Register()); |
|
1105 __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
|
1106 } else { |
|
1107 // Do we really have to sign extend??? |
|
1108 // __ movslq(dst.first()->as_Register(), src.first()->as_Register()); |
|
1109 if (dst.first() != src.first()) { |
|
1110 __ movq(dst.first()->as_Register(), src.first()->as_Register()); |
|
1111 } |
|
1112 } |
|
1113 } |
|
1114 |
|
1115 static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { |
|
1116 if (src.first()->is_stack()) { |
|
1117 if (dst.first()->is_stack()) { |
|
1118 // stack to stack |
|
1119 __ movq(rax, Address(rbp, reg2offset_in(src.first()))); |
|
1120 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); |
|
1121 } else { |
|
1122 // stack to reg |
|
1123 __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); |
|
1124 } |
|
1125 } else if (dst.first()->is_stack()) { |
|
1126 // reg to stack |
|
1127 __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
|
1128 } else { |
|
1129 if (dst.first() != src.first()) { |
|
1130 __ movq(dst.first()->as_Register(), src.first()->as_Register()); |
|
1131 } |
|
1132 } |
|
1133 } |
|
1134 |
|
1135 // An oop arg. Must pass a handle not the oop itself |
|
1136 static void object_move(MacroAssembler* masm, |
|
1137 OopMap* map, |
|
1138 int oop_handle_offset, |
|
1139 int framesize_in_slots, |
|
1140 VMRegPair src, |
|
1141 VMRegPair dst, |
|
1142 bool is_receiver, |
|
1143 int* receiver_offset) { |
|
1144 |
|
1145 // must pass a handle. First figure out the location we use as a handle |
|
1146 |
|
1147 Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register(); |
|
1148 |
|
1149 // See if oop is NULL if it is we need no handle |
|
1150 |
|
1151 if (src.first()->is_stack()) { |
|
1152 |
|
1153 // Oop is already on the stack as an argument |
|
1154 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); |
|
1155 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); |
|
1156 if (is_receiver) { |
|
1157 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; |
|
1158 } |
|
1159 |
|
1160 __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD); |
|
1161 __ lea(rHandle, Address(rbp, reg2offset_in(src.first()))); |
|
1162 // conditionally move a NULL |
|
1163 __ cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first()))); |
|
1164 } else { |
|
1165 |
|
1166 // Oop is in an a register we must store it to the space we reserve |
|
1167 // on the stack for oop_handles and pass a handle if oop is non-NULL |
|
1168 |
|
1169 const Register rOop = src.first()->as_Register(); |
|
1170 int oop_slot; |
|
1171 if (rOop == j_rarg0) |
|
1172 oop_slot = 0; |
|
1173 else if (rOop == j_rarg1) |
|
1174 oop_slot = 1; |
|
1175 else if (rOop == j_rarg2) |
|
1176 oop_slot = 2; |
|
1177 else if (rOop == j_rarg3) |
|
1178 oop_slot = 3; |
|
1179 else if (rOop == j_rarg4) |
|
1180 oop_slot = 4; |
|
1181 else { |
|
1182 assert(rOop == j_rarg5, "wrong register"); |
|
1183 oop_slot = 5; |
|
1184 } |
|
1185 |
|
1186 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; |
|
1187 int offset = oop_slot*VMRegImpl::stack_slot_size; |
|
1188 |
|
1189 map->set_oop(VMRegImpl::stack2reg(oop_slot)); |
|
1190 // Store oop in handle area, may be NULL |
|
1191 __ movptr(Address(rsp, offset), rOop); |
|
1192 if (is_receiver) { |
|
1193 *receiver_offset = offset; |
|
1194 } |
|
1195 |
|
1196 __ cmpptr(rOop, (int32_t)NULL_WORD); |
|
1197 __ lea(rHandle, Address(rsp, offset)); |
|
1198 // conditionally move a NULL from the handle area where it was just stored |
|
1199 __ cmovptr(Assembler::equal, rHandle, Address(rsp, offset)); |
|
1200 } |
|
1201 |
|
1202 // If arg is on the stack then place it otherwise it is already in correct reg. |
|
1203 if (dst.first()->is_stack()) { |
|
1204 __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); |
|
1205 } |
|
1206 } |
|
1207 |
|
1208 // A float arg may have to do float reg int reg conversion |
|
1209 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { |
|
1210 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); |
|
1211 |
|
1212 // The calling conventions assures us that each VMregpair is either |
|
1213 // all really one physical register or adjacent stack slots. |
|
1214 // This greatly simplifies the cases here compared to sparc. |
|
1215 |
|
1216 if (src.first()->is_stack()) { |
|
1217 if (dst.first()->is_stack()) { |
|
1218 __ movl(rax, Address(rbp, reg2offset_in(src.first()))); |
|
1219 __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); |
|
1220 } else { |
|
1221 // stack to reg |
|
1222 assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters"); |
|
1223 __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first()))); |
|
1224 } |
|
1225 } else if (dst.first()->is_stack()) { |
|
1226 // reg to stack |
|
1227 assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters"); |
|
1228 __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); |
|
1229 } else { |
|
1230 // reg to reg |
|
1231 // In theory these overlap but the ordering is such that this is likely a nop |
|
1232 if ( src.first() != dst.first()) { |
|
1233 __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); |
|
1234 } |
|
1235 } |
|
1236 } |
|
1237 |
|
1238 // A long move |
|
1239 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { |
|
1240 |
|
1241 // The calling conventions assures us that each VMregpair is either |
|
1242 // all really one physical register or adjacent stack slots. |
|
1243 // This greatly simplifies the cases here compared to sparc. |
|
1244 |
|
1245 if (src.is_single_phys_reg() ) { |
|
1246 if (dst.is_single_phys_reg()) { |
|
1247 if (dst.first() != src.first()) { |
|
1248 __ mov(dst.first()->as_Register(), src.first()->as_Register()); |
|
1249 } |
|
1250 } else { |
|
1251 assert(dst.is_single_reg(), "not a stack pair"); |
|
1252 __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); |
|
1253 } |
|
1254 } else if (dst.is_single_phys_reg()) { |
|
1255 assert(src.is_single_reg(), "not a stack pair"); |
|
1256 __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); |
|
1257 } else { |
|
1258 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); |
|
1259 __ movq(rax, Address(rbp, reg2offset_in(src.first()))); |
|
1260 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); |
|
1261 } |
|
1262 } |
|
1263 |
|
1264 // A double move |
|
1265 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { |
|
1266 |
|
1267 // The calling conventions assures us that each VMregpair is either |
|
1268 // all really one physical register or adjacent stack slots. |
|
1269 // This greatly simplifies the cases here compared to sparc. |
|
1270 |
|
1271 if (src.is_single_phys_reg() ) { |
|
1272 if (dst.is_single_phys_reg()) { |
|
1273 // In theory these overlap but the ordering is such that this is likely a nop |
|
1274 if ( src.first() != dst.first()) { |
|
1275 __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); |
|
1276 } |
|
1277 } else { |
|
1278 assert(dst.is_single_reg(), "not a stack pair"); |
|
1279 __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); |
|
1280 } |
|
1281 } else if (dst.is_single_phys_reg()) { |
|
1282 assert(src.is_single_reg(), "not a stack pair"); |
|
1283 __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first()))); |
|
1284 } else { |
|
1285 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); |
|
1286 __ movq(rax, Address(rbp, reg2offset_in(src.first()))); |
|
1287 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); |
|
1288 } |
|
1289 } |
|
1290 |
|
1291 |
|
1292 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { |
|
1293 // We always ignore the frame_slots arg and just use the space just below frame pointer |
|
1294 // which by this time is free to use |
|
1295 switch (ret_type) { |
|
1296 case T_FLOAT: |
|
1297 __ movflt(Address(rbp, -wordSize), xmm0); |
|
1298 break; |
|
1299 case T_DOUBLE: |
|
1300 __ movdbl(Address(rbp, -wordSize), xmm0); |
|
1301 break; |
|
1302 case T_VOID: break; |
|
1303 default: { |
|
1304 __ movptr(Address(rbp, -wordSize), rax); |
|
1305 } |
|
1306 } |
|
1307 } |
|
1308 |
|
1309 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { |
|
1310 // We always ignore the frame_slots arg and just use the space just below frame pointer |
|
1311 // which by this time is free to use |
|
1312 switch (ret_type) { |
|
1313 case T_FLOAT: |
|
1314 __ movflt(xmm0, Address(rbp, -wordSize)); |
|
1315 break; |
|
1316 case T_DOUBLE: |
|
1317 __ movdbl(xmm0, Address(rbp, -wordSize)); |
|
1318 break; |
|
1319 case T_VOID: break; |
|
1320 default: { |
|
1321 __ movptr(rax, Address(rbp, -wordSize)); |
|
1322 } |
|
1323 } |
|
1324 } |
|
1325 |
|
1326 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { |
|
1327 for ( int i = first_arg ; i < arg_count ; i++ ) { |
|
1328 if (args[i].first()->is_Register()) { |
|
1329 __ push(args[i].first()->as_Register()); |
|
1330 } else if (args[i].first()->is_XMMRegister()) { |
|
1331 __ subptr(rsp, 2*wordSize); |
|
1332 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); |
|
1333 } |
|
1334 } |
|
1335 } |
|
1336 |
|
1337 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { |
|
1338 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { |
|
1339 if (args[i].first()->is_Register()) { |
|
1340 __ pop(args[i].first()->as_Register()); |
|
1341 } else if (args[i].first()->is_XMMRegister()) { |
|
1342 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); |
|
1343 __ addptr(rsp, 2*wordSize); |
|
1344 } |
|
1345 } |
|
1346 } |
|
1347 |
|
1348 |
|
1349 static void save_or_restore_arguments(MacroAssembler* masm, |
|
1350 const int stack_slots, |
|
1351 const int total_in_args, |
|
1352 const int arg_save_area, |
|
1353 OopMap* map, |
|
1354 VMRegPair* in_regs, |
|
1355 BasicType* in_sig_bt) { |
|
1356 // if map is non-NULL then the code should store the values, |
|
1357 // otherwise it should load them. |
|
1358 int slot = arg_save_area; |
|
1359 // Save down double word first |
|
1360 for ( int i = 0; i < total_in_args; i++) { |
|
1361 if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) { |
|
1362 int offset = slot * VMRegImpl::stack_slot_size; |
|
1363 slot += VMRegImpl::slots_per_word; |
|
1364 assert(slot <= stack_slots, "overflow"); |
|
1365 if (map != NULL) { |
|
1366 __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); |
|
1367 } else { |
|
1368 __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); |
|
1369 } |
|
1370 } |
|
1371 if (in_regs[i].first()->is_Register() && |
|
1372 (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { |
|
1373 int offset = slot * VMRegImpl::stack_slot_size; |
|
1374 if (map != NULL) { |
|
1375 __ movq(Address(rsp, offset), in_regs[i].first()->as_Register()); |
|
1376 if (in_sig_bt[i] == T_ARRAY) { |
|
1377 map->set_oop(VMRegImpl::stack2reg(slot));; |
|
1378 } |
|
1379 } else { |
|
1380 __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset)); |
|
1381 } |
|
1382 slot += VMRegImpl::slots_per_word; |
|
1383 } |
|
1384 } |
|
1385 // Save or restore single word registers |
|
1386 for ( int i = 0; i < total_in_args; i++) { |
|
1387 if (in_regs[i].first()->is_Register()) { |
|
1388 int offset = slot * VMRegImpl::stack_slot_size; |
|
1389 slot++; |
|
1390 assert(slot <= stack_slots, "overflow"); |
|
1391 |
|
1392 // Value is in an input register pass we must flush it to the stack |
|
1393 const Register reg = in_regs[i].first()->as_Register(); |
|
1394 switch (in_sig_bt[i]) { |
|
1395 case T_BOOLEAN: |
|
1396 case T_CHAR: |
|
1397 case T_BYTE: |
|
1398 case T_SHORT: |
|
1399 case T_INT: |
|
1400 if (map != NULL) { |
|
1401 __ movl(Address(rsp, offset), reg); |
|
1402 } else { |
|
1403 __ movl(reg, Address(rsp, offset)); |
|
1404 } |
|
1405 break; |
|
1406 case T_ARRAY: |
|
1407 case T_LONG: |
|
1408 // handled above |
|
1409 break; |
|
1410 case T_OBJECT: |
|
1411 default: ShouldNotReachHere(); |
|
1412 } |
|
1413 } else if (in_regs[i].first()->is_XMMRegister()) { |
|
1414 if (in_sig_bt[i] == T_FLOAT) { |
|
1415 int offset = slot * VMRegImpl::stack_slot_size; |
|
1416 slot++; |
|
1417 assert(slot <= stack_slots, "overflow"); |
|
1418 if (map != NULL) { |
|
1419 __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); |
|
1420 } else { |
|
1421 __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); |
|
1422 } |
|
1423 } |
|
1424 } else if (in_regs[i].first()->is_stack()) { |
|
1425 if (in_sig_bt[i] == T_ARRAY && map != NULL) { |
|
1426 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); |
|
1427 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); |
|
1428 } |
|
1429 } |
|
1430 } |
|
1431 } |
|
1432 |
|
1433 |
|
1434 // Check GCLocker::needs_gc and enter the runtime if it's true. This |
|
1435 // keeps a new JNI critical region from starting until a GC has been |
|
1436 // forced. Save down any oops in registers and describe them in an |
|
1437 // OopMap. |
|
1438 static void check_needs_gc_for_critical_native(MacroAssembler* masm, |
|
1439 int stack_slots, |
|
1440 int total_c_args, |
|
1441 int total_in_args, |
|
1442 int arg_save_area, |
|
1443 OopMapSet* oop_maps, |
|
1444 VMRegPair* in_regs, |
|
1445 BasicType* in_sig_bt) { |
|
1446 __ block_comment("check GCLocker::needs_gc"); |
|
1447 Label cont; |
|
1448 __ cmp8(ExternalAddress((address)GCLocker::needs_gc_address()), false); |
|
1449 __ jcc(Assembler::equal, cont); |
|
1450 |
|
1451 // Save down any incoming oops and call into the runtime to halt for a GC |
|
1452 |
|
1453 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); |
|
1454 save_or_restore_arguments(masm, stack_slots, total_in_args, |
|
1455 arg_save_area, map, in_regs, in_sig_bt); |
|
1456 |
|
1457 address the_pc = __ pc(); |
|
1458 oop_maps->add_gc_map( __ offset(), map); |
|
1459 __ set_last_Java_frame(rsp, noreg, the_pc); |
|
1460 |
|
1461 __ block_comment("block_for_jni_critical"); |
|
1462 __ movptr(c_rarg0, r15_thread); |
|
1463 __ mov(r12, rsp); // remember sp |
|
1464 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows |
|
1465 __ andptr(rsp, -16); // align stack as required by ABI |
|
1466 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical))); |
|
1467 __ mov(rsp, r12); // restore sp |
|
1468 __ reinit_heapbase(); |
|
1469 |
|
1470 __ reset_last_Java_frame(false); |
|
1471 |
|
1472 save_or_restore_arguments(masm, stack_slots, total_in_args, |
|
1473 arg_save_area, NULL, in_regs, in_sig_bt); |
|
1474 __ bind(cont); |
|
1475 #ifdef ASSERT |
|
1476 if (StressCriticalJNINatives) { |
|
1477 // Stress register saving |
|
1478 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); |
|
1479 save_or_restore_arguments(masm, stack_slots, total_in_args, |
|
1480 arg_save_area, map, in_regs, in_sig_bt); |
|
1481 // Destroy argument registers |
|
1482 for (int i = 0; i < total_in_args - 1; i++) { |
|
1483 if (in_regs[i].first()->is_Register()) { |
|
1484 const Register reg = in_regs[i].first()->as_Register(); |
|
1485 __ xorptr(reg, reg); |
|
1486 } else if (in_regs[i].first()->is_XMMRegister()) { |
|
1487 __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister()); |
|
1488 } else if (in_regs[i].first()->is_FloatRegister()) { |
|
1489 ShouldNotReachHere(); |
|
1490 } else if (in_regs[i].first()->is_stack()) { |
|
1491 // Nothing to do |
|
1492 } else { |
|
1493 ShouldNotReachHere(); |
|
1494 } |
|
1495 if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) { |
|
1496 i++; |
|
1497 } |
|
1498 } |
|
1499 |
|
1500 save_or_restore_arguments(masm, stack_slots, total_in_args, |
|
1501 arg_save_area, NULL, in_regs, in_sig_bt); |
|
1502 } |
|
1503 #endif |
|
1504 } |
|
1505 |
|
1506 // Unpack an array argument into a pointer to the body and the length |
|
1507 // if the array is non-null, otherwise pass 0 for both. |
|
1508 static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { |
|
1509 Register tmp_reg = rax; |
|
1510 assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, |
|
1511 "possible collision"); |
|
1512 assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, |
|
1513 "possible collision"); |
|
1514 |
|
1515 __ block_comment("unpack_array_argument {"); |
|
1516 |
|
1517 // Pass the length, ptr pair |
|
1518 Label is_null, done; |
|
1519 VMRegPair tmp; |
|
1520 tmp.set_ptr(tmp_reg->as_VMReg()); |
|
1521 if (reg.first()->is_stack()) { |
|
1522 // Load the arg up from the stack |
|
1523 move_ptr(masm, reg, tmp); |
|
1524 reg = tmp; |
|
1525 } |
|
1526 __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); |
|
1527 __ jccb(Assembler::equal, is_null); |
|
1528 __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type))); |
|
1529 move_ptr(masm, tmp, body_arg); |
|
1530 // load the length relative to the body. |
|
1531 __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() - |
|
1532 arrayOopDesc::base_offset_in_bytes(in_elem_type))); |
|
1533 move32_64(masm, tmp, length_arg); |
|
1534 __ jmpb(done); |
|
1535 __ bind(is_null); |
|
1536 // Pass zeros |
|
1537 __ xorptr(tmp_reg, tmp_reg); |
|
1538 move_ptr(masm, tmp, body_arg); |
|
1539 move32_64(masm, tmp, length_arg); |
|
1540 __ bind(done); |
|
1541 |
|
1542 __ block_comment("} unpack_array_argument"); |
|
1543 } |
|
1544 |
|
1545 |
|
1546 // Different signatures may require very different orders for the move |
|
1547 // to avoid clobbering other arguments. There's no simple way to |
|
1548 // order them safely. Compute a safe order for issuing stores and |
|
1549 // break any cycles in those stores. This code is fairly general but |
|
1550 // it's not necessary on the other platforms so we keep it in the |
|
1551 // platform dependent code instead of moving it into a shared file. |
|
1552 // (See bugs 7013347 & 7145024.) |
|
1553 // Note that this code is specific to LP64. |
|
1554 class ComputeMoveOrder: public StackObj { |
|
1555 class MoveOperation: public ResourceObj { |
|
1556 friend class ComputeMoveOrder; |
|
1557 private: |
|
1558 VMRegPair _src; |
|
1559 VMRegPair _dst; |
|
1560 int _src_index; |
|
1561 int _dst_index; |
|
1562 bool _processed; |
|
1563 MoveOperation* _next; |
|
1564 MoveOperation* _prev; |
|
1565 |
|
1566 static int get_id(VMRegPair r) { |
|
1567 return r.first()->value(); |
|
1568 } |
|
1569 |
|
1570 public: |
|
1571 MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): |
|
1572 _src(src) |
|
1573 , _src_index(src_index) |
|
1574 , _dst(dst) |
|
1575 , _dst_index(dst_index) |
|
1576 , _next(NULL) |
|
1577 , _prev(NULL) |
|
1578 , _processed(false) { |
|
1579 } |
|
1580 |
|
1581 VMRegPair src() const { return _src; } |
|
1582 int src_id() const { return get_id(src()); } |
|
1583 int src_index() const { return _src_index; } |
|
1584 VMRegPair dst() const { return _dst; } |
|
1585 void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; } |
|
1586 int dst_index() const { return _dst_index; } |
|
1587 int dst_id() const { return get_id(dst()); } |
|
1588 MoveOperation* next() const { return _next; } |
|
1589 MoveOperation* prev() const { return _prev; } |
|
1590 void set_processed() { _processed = true; } |
|
1591 bool is_processed() const { return _processed; } |
|
1592 |
|
1593 // insert |
|
1594 void break_cycle(VMRegPair temp_register) { |
|
1595 // create a new store following the last store |
|
1596 // to move from the temp_register to the original |
|
1597 MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst()); |
|
1598 |
|
1599 // break the cycle of links and insert new_store at the end |
|
1600 // break the reverse link. |
|
1601 MoveOperation* p = prev(); |
|
1602 assert(p->next() == this, "must be"); |
|
1603 _prev = NULL; |
|
1604 p->_next = new_store; |
|
1605 new_store->_prev = p; |
|
1606 |
|
1607 // change the original store to save it's value in the temp. |
|
1608 set_dst(-1, temp_register); |
|
1609 } |
|
1610 |
|
1611 void link(GrowableArray<MoveOperation*>& killer) { |
|
1612 // link this store in front the store that it depends on |
|
1613 MoveOperation* n = killer.at_grow(src_id(), NULL); |
|
1614 if (n != NULL) { |
|
1615 assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet"); |
|
1616 _next = n; |
|
1617 n->_prev = this; |
|
1618 } |
|
1619 } |
|
1620 }; |
|
1621 |
|
1622 private: |
|
1623 GrowableArray<MoveOperation*> edges; |
|
1624 |
|
1625 public: |
|
1626 ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, |
|
1627 BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { |
|
1628 // Move operations where the dest is the stack can all be |
|
1629 // scheduled first since they can't interfere with the other moves. |
|
1630 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { |
|
1631 if (in_sig_bt[i] == T_ARRAY) { |
|
1632 c_arg--; |
|
1633 if (out_regs[c_arg].first()->is_stack() && |
|
1634 out_regs[c_arg + 1].first()->is_stack()) { |
|
1635 arg_order.push(i); |
|
1636 arg_order.push(c_arg); |
|
1637 } else { |
|
1638 if (out_regs[c_arg].first()->is_stack() || |
|
1639 in_regs[i].first() == out_regs[c_arg].first()) { |
|
1640 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]); |
|
1641 } else { |
|
1642 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); |
|
1643 } |
|
1644 } |
|
1645 } else if (in_sig_bt[i] == T_VOID) { |
|
1646 arg_order.push(i); |
|
1647 arg_order.push(c_arg); |
|
1648 } else { |
|
1649 if (out_regs[c_arg].first()->is_stack() || |
|
1650 in_regs[i].first() == out_regs[c_arg].first()) { |
|
1651 arg_order.push(i); |
|
1652 arg_order.push(c_arg); |
|
1653 } else { |
|
1654 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); |
|
1655 } |
|
1656 } |
|
1657 } |
|
1658 // Break any cycles in the register moves and emit the in the |
|
1659 // proper order. |
|
1660 GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg); |
|
1661 for (int i = 0; i < stores->length(); i++) { |
|
1662 arg_order.push(stores->at(i)->src_index()); |
|
1663 arg_order.push(stores->at(i)->dst_index()); |
|
1664 } |
|
1665 } |
|
1666 |
|
1667 // Collected all the move operations |
|
1668 void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { |
|
1669 if (src.first() == dst.first()) return; |
|
1670 edges.append(new MoveOperation(src_index, src, dst_index, dst)); |
|
1671 } |
|
1672 |
|
1673 // Walk the edges breaking cycles between moves. The result list |
|
1674 // can be walked in order to produce the proper set of loads |
|
1675 GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { |
|
1676 // Record which moves kill which values |
|
1677 GrowableArray<MoveOperation*> killer; |
|
1678 for (int i = 0; i < edges.length(); i++) { |
|
1679 MoveOperation* s = edges.at(i); |
|
1680 assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer"); |
|
1681 killer.at_put_grow(s->dst_id(), s, NULL); |
|
1682 } |
|
1683 assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL, |
|
1684 "make sure temp isn't in the registers that are killed"); |
|
1685 |
|
1686 // create links between loads and stores |
|
1687 for (int i = 0; i < edges.length(); i++) { |
|
1688 edges.at(i)->link(killer); |
|
1689 } |
|
1690 |
|
1691 // at this point, all the move operations are chained together |
|
1692 // in a doubly linked list. Processing it backwards finds |
|
1693 // the beginning of the chain, forwards finds the end. If there's |
|
1694 // a cycle it can be broken at any point, so pick an edge and walk |
|
1695 // backward until the list ends or we end where we started. |
|
1696 GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>(); |
|
1697 for (int e = 0; e < edges.length(); e++) { |
|
1698 MoveOperation* s = edges.at(e); |
|
1699 if (!s->is_processed()) { |
|
1700 MoveOperation* start = s; |
|
1701 // search for the beginning of the chain or cycle |
|
1702 while (start->prev() != NULL && start->prev() != s) { |
|
1703 start = start->prev(); |
|
1704 } |
|
1705 if (start->prev() == s) { |
|
1706 start->break_cycle(temp_register); |
|
1707 } |
|
1708 // walk the chain forward inserting to store list |
|
1709 while (start != NULL) { |
|
1710 stores->append(start); |
|
1711 start->set_processed(); |
|
1712 start = start->next(); |
|
1713 } |
|
1714 } |
|
1715 } |
|
1716 return stores; |
|
1717 } |
|
1718 }; |
|
1719 |
|
1720 static void verify_oop_args(MacroAssembler* masm, |
|
1721 const methodHandle& method, |
|
1722 const BasicType* sig_bt, |
|
1723 const VMRegPair* regs) { |
|
1724 Register temp_reg = rbx; // not part of any compiled calling seq |
|
1725 if (VerifyOops) { |
|
1726 for (int i = 0; i < method->size_of_parameters(); i++) { |
|
1727 if (sig_bt[i] == T_OBJECT || |
|
1728 sig_bt[i] == T_ARRAY) { |
|
1729 VMReg r = regs[i].first(); |
|
1730 assert(r->is_valid(), "bad oop arg"); |
|
1731 if (r->is_stack()) { |
|
1732 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); |
|
1733 __ verify_oop(temp_reg); |
|
1734 } else { |
|
1735 __ verify_oop(r->as_Register()); |
|
1736 } |
|
1737 } |
|
1738 } |
|
1739 } |
|
1740 } |
|
1741 |
|
1742 static void gen_special_dispatch(MacroAssembler* masm, |
|
1743 const methodHandle& method, |
|
1744 const BasicType* sig_bt, |
|
1745 const VMRegPair* regs) { |
|
1746 verify_oop_args(masm, method, sig_bt, regs); |
|
1747 vmIntrinsics::ID iid = method->intrinsic_id(); |
|
1748 |
|
1749 // Now write the args into the outgoing interpreter space |
|
1750 bool has_receiver = false; |
|
1751 Register receiver_reg = noreg; |
|
1752 int member_arg_pos = -1; |
|
1753 Register member_reg = noreg; |
|
1754 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); |
|
1755 if (ref_kind != 0) { |
|
1756 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument |
|
1757 member_reg = rbx; // known to be free at this point |
|
1758 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); |
|
1759 } else if (iid == vmIntrinsics::_invokeBasic) { |
|
1760 has_receiver = true; |
|
1761 } else { |
|
1762 fatal("unexpected intrinsic id %d", iid); |
|
1763 } |
|
1764 |
|
1765 if (member_reg != noreg) { |
|
1766 // Load the member_arg into register, if necessary. |
|
1767 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); |
|
1768 VMReg r = regs[member_arg_pos].first(); |
|
1769 if (r->is_stack()) { |
|
1770 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); |
|
1771 } else { |
|
1772 // no data motion is needed |
|
1773 member_reg = r->as_Register(); |
|
1774 } |
|
1775 } |
|
1776 |
|
1777 if (has_receiver) { |
|
1778 // Make sure the receiver is loaded into a register. |
|
1779 assert(method->size_of_parameters() > 0, "oob"); |
|
1780 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); |
|
1781 VMReg r = regs[0].first(); |
|
1782 assert(r->is_valid(), "bad receiver arg"); |
|
1783 if (r->is_stack()) { |
|
1784 // Porting note: This assumes that compiled calling conventions always |
|
1785 // pass the receiver oop in a register. If this is not true on some |
|
1786 // platform, pick a temp and load the receiver from stack. |
|
1787 fatal("receiver always in a register"); |
|
1788 receiver_reg = j_rarg0; // known to be free at this point |
|
1789 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); |
|
1790 } else { |
|
1791 // no data motion is needed |
|
1792 receiver_reg = r->as_Register(); |
|
1793 } |
|
1794 } |
|
1795 |
|
1796 // Figure out which address we are really jumping to: |
|
1797 MethodHandles::generate_method_handle_dispatch(masm, iid, |
|
1798 receiver_reg, member_reg, /*for_compiler_entry:*/ true); |
|
1799 } |
|
1800 |
|
1801 // --------------------------------------------------------------------------- |
|
1802 // Generate a native wrapper for a given method. The method takes arguments |
|
1803 // in the Java compiled code convention, marshals them to the native |
|
1804 // convention (handlizes oops, etc), transitions to native, makes the call, |
|
1805 // returns to java state (possibly blocking), unhandlizes any result and |
|
1806 // returns. |
|
1807 // |
|
1808 // Critical native functions are a shorthand for the use of |
|
1809 // GetPrimtiveArrayCritical and disallow the use of any other JNI |
|
1810 // functions. The wrapper is expected to unpack the arguments before |
|
1811 // passing them to the callee and perform checks before and after the |
|
1812 // native call to ensure that they GCLocker |
|
1813 // lock_critical/unlock_critical semantics are followed. Some other |
|
1814 // parts of JNI setup are skipped like the tear down of the JNI handle |
|
1815 // block and the check for pending exceptions it's impossible for them |
|
1816 // to be thrown. |
|
1817 // |
|
1818 // They are roughly structured like this: |
|
1819 // if (GCLocker::needs_gc()) |
|
1820 // SharedRuntime::block_for_jni_critical(); |
|
1821 // tranistion to thread_in_native |
|
1822 // unpack arrray arguments and call native entry point |
|
1823 // check for safepoint in progress |
|
1824 // check if any thread suspend flags are set |
|
1825 // call into JVM and possible unlock the JNI critical |
|
1826 // if a GC was suppressed while in the critical native. |
|
1827 // transition back to thread_in_Java |
|
1828 // return to caller |
|
1829 // |
|
1830 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, |
|
1831 const methodHandle& method, |
|
1832 int compile_id, |
|
1833 BasicType* in_sig_bt, |
|
1834 VMRegPair* in_regs, |
|
1835 BasicType ret_type) { |
|
1836 if (method->is_method_handle_intrinsic()) { |
|
1837 vmIntrinsics::ID iid = method->intrinsic_id(); |
|
1838 intptr_t start = (intptr_t)__ pc(); |
|
1839 int vep_offset = ((intptr_t)__ pc()) - start; |
|
1840 gen_special_dispatch(masm, |
|
1841 method, |
|
1842 in_sig_bt, |
|
1843 in_regs); |
|
1844 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period |
|
1845 __ flush(); |
|
1846 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually |
|
1847 return nmethod::new_native_nmethod(method, |
|
1848 compile_id, |
|
1849 masm->code(), |
|
1850 vep_offset, |
|
1851 frame_complete, |
|
1852 stack_slots / VMRegImpl::slots_per_word, |
|
1853 in_ByteSize(-1), |
|
1854 in_ByteSize(-1), |
|
1855 (OopMapSet*)NULL); |
|
1856 } |
|
1857 bool is_critical_native = true; |
|
1858 address native_func = method->critical_native_function(); |
|
1859 if (native_func == NULL) { |
|
1860 native_func = method->native_function(); |
|
1861 is_critical_native = false; |
|
1862 } |
|
1863 assert(native_func != NULL, "must have function"); |
|
1864 |
|
1865 // An OopMap for lock (and class if static) |
|
1866 OopMapSet *oop_maps = new OopMapSet(); |
|
1867 intptr_t start = (intptr_t)__ pc(); |
|
1868 |
|
1869 // We have received a description of where all the java arg are located |
|
1870 // on entry to the wrapper. We need to convert these args to where |
|
1871 // the jni function will expect them. To figure out where they go |
|
1872 // we convert the java signature to a C signature by inserting |
|
1873 // the hidden arguments as arg[0] and possibly arg[1] (static method) |
|
1874 |
|
1875 const int total_in_args = method->size_of_parameters(); |
|
1876 int total_c_args = total_in_args; |
|
1877 if (!is_critical_native) { |
|
1878 total_c_args += 1; |
|
1879 if (method->is_static()) { |
|
1880 total_c_args++; |
|
1881 } |
|
1882 } else { |
|
1883 for (int i = 0; i < total_in_args; i++) { |
|
1884 if (in_sig_bt[i] == T_ARRAY) { |
|
1885 total_c_args++; |
|
1886 } |
|
1887 } |
|
1888 } |
|
1889 |
|
1890 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); |
|
1891 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); |
|
1892 BasicType* in_elem_bt = NULL; |
|
1893 |
|
1894 int argc = 0; |
|
1895 if (!is_critical_native) { |
|
1896 out_sig_bt[argc++] = T_ADDRESS; |
|
1897 if (method->is_static()) { |
|
1898 out_sig_bt[argc++] = T_OBJECT; |
|
1899 } |
|
1900 |
|
1901 for (int i = 0; i < total_in_args ; i++ ) { |
|
1902 out_sig_bt[argc++] = in_sig_bt[i]; |
|
1903 } |
|
1904 } else { |
|
1905 Thread* THREAD = Thread::current(); |
|
1906 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); |
|
1907 SignatureStream ss(method->signature()); |
|
1908 for (int i = 0; i < total_in_args ; i++ ) { |
|
1909 if (in_sig_bt[i] == T_ARRAY) { |
|
1910 // Arrays are passed as int, elem* pair |
|
1911 out_sig_bt[argc++] = T_INT; |
|
1912 out_sig_bt[argc++] = T_ADDRESS; |
|
1913 Symbol* atype = ss.as_symbol(CHECK_NULL); |
|
1914 const char* at = atype->as_C_string(); |
|
1915 if (strlen(at) == 2) { |
|
1916 assert(at[0] == '[', "must be"); |
|
1917 switch (at[1]) { |
|
1918 case 'B': in_elem_bt[i] = T_BYTE; break; |
|
1919 case 'C': in_elem_bt[i] = T_CHAR; break; |
|
1920 case 'D': in_elem_bt[i] = T_DOUBLE; break; |
|
1921 case 'F': in_elem_bt[i] = T_FLOAT; break; |
|
1922 case 'I': in_elem_bt[i] = T_INT; break; |
|
1923 case 'J': in_elem_bt[i] = T_LONG; break; |
|
1924 case 'S': in_elem_bt[i] = T_SHORT; break; |
|
1925 case 'Z': in_elem_bt[i] = T_BOOLEAN; break; |
|
1926 default: ShouldNotReachHere(); |
|
1927 } |
|
1928 } |
|
1929 } else { |
|
1930 out_sig_bt[argc++] = in_sig_bt[i]; |
|
1931 in_elem_bt[i] = T_VOID; |
|
1932 } |
|
1933 if (in_sig_bt[i] != T_VOID) { |
|
1934 assert(in_sig_bt[i] == ss.type(), "must match"); |
|
1935 ss.next(); |
|
1936 } |
|
1937 } |
|
1938 } |
|
1939 |
|
1940 // Now figure out where the args must be stored and how much stack space |
|
1941 // they require. |
|
1942 int out_arg_slots; |
|
1943 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); |
|
1944 |
|
1945 // Compute framesize for the wrapper. We need to handlize all oops in |
|
1946 // incoming registers |
|
1947 |
|
1948 // Calculate the total number of stack slots we will need. |
|
1949 |
|
1950 // First count the abi requirement plus all of the outgoing args |
|
1951 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; |
|
1952 |
|
1953 // Now the space for the inbound oop handle area |
|
1954 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers |
|
1955 if (is_critical_native) { |
|
1956 // Critical natives may have to call out so they need a save area |
|
1957 // for register arguments. |
|
1958 int double_slots = 0; |
|
1959 int single_slots = 0; |
|
1960 for ( int i = 0; i < total_in_args; i++) { |
|
1961 if (in_regs[i].first()->is_Register()) { |
|
1962 const Register reg = in_regs[i].first()->as_Register(); |
|
1963 switch (in_sig_bt[i]) { |
|
1964 case T_BOOLEAN: |
|
1965 case T_BYTE: |
|
1966 case T_SHORT: |
|
1967 case T_CHAR: |
|
1968 case T_INT: single_slots++; break; |
|
1969 case T_ARRAY: // specific to LP64 (7145024) |
|
1970 case T_LONG: double_slots++; break; |
|
1971 default: ShouldNotReachHere(); |
|
1972 } |
|
1973 } else if (in_regs[i].first()->is_XMMRegister()) { |
|
1974 switch (in_sig_bt[i]) { |
|
1975 case T_FLOAT: single_slots++; break; |
|
1976 case T_DOUBLE: double_slots++; break; |
|
1977 default: ShouldNotReachHere(); |
|
1978 } |
|
1979 } else if (in_regs[i].first()->is_FloatRegister()) { |
|
1980 ShouldNotReachHere(); |
|
1981 } |
|
1982 } |
|
1983 total_save_slots = double_slots * 2 + single_slots; |
|
1984 // align the save area |
|
1985 if (double_slots != 0) { |
|
1986 stack_slots = align_up(stack_slots, 2); |
|
1987 } |
|
1988 } |
|
1989 |
|
1990 int oop_handle_offset = stack_slots; |
|
1991 stack_slots += total_save_slots; |
|
1992 |
|
1993 // Now any space we need for handlizing a klass if static method |
|
1994 |
|
1995 int klass_slot_offset = 0; |
|
1996 int klass_offset = -1; |
|
1997 int lock_slot_offset = 0; |
|
1998 bool is_static = false; |
|
1999 |
|
2000 if (method->is_static()) { |
|
2001 klass_slot_offset = stack_slots; |
|
2002 stack_slots += VMRegImpl::slots_per_word; |
|
2003 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; |
|
2004 is_static = true; |
|
2005 } |
|
2006 |
|
2007 // Plus a lock if needed |
|
2008 |
|
2009 if (method->is_synchronized()) { |
|
2010 lock_slot_offset = stack_slots; |
|
2011 stack_slots += VMRegImpl::slots_per_word; |
|
2012 } |
|
2013 |
|
2014 // Now a place (+2) to save return values or temp during shuffling |
|
2015 // + 4 for return address (which we own) and saved rbp |
|
2016 stack_slots += 6; |
|
2017 |
|
2018 // Ok The space we have allocated will look like: |
|
2019 // |
|
2020 // |
|
2021 // FP-> | | |
|
2022 // |---------------------| |
|
2023 // | 2 slots for moves | |
|
2024 // |---------------------| |
|
2025 // | lock box (if sync) | |
|
2026 // |---------------------| <- lock_slot_offset |
|
2027 // | klass (if static) | |
|
2028 // |---------------------| <- klass_slot_offset |
|
2029 // | oopHandle area | |
|
2030 // |---------------------| <- oop_handle_offset (6 java arg registers) |
|
2031 // | outbound memory | |
|
2032 // | based arguments | |
|
2033 // | | |
|
2034 // |---------------------| |
|
2035 // | | |
|
2036 // SP-> | out_preserved_slots | |
|
2037 // |
|
2038 // |
|
2039 |
|
2040 |
|
2041 // Now compute actual number of stack words we need rounding to make |
|
2042 // stack properly aligned. |
|
2043 stack_slots = align_up(stack_slots, StackAlignmentInSlots); |
|
2044 |
|
2045 int stack_size = stack_slots * VMRegImpl::stack_slot_size; |
|
2046 |
|
2047 // First thing make an ic check to see if we should even be here |
|
2048 |
|
2049 // We are free to use all registers as temps without saving them and |
|
2050 // restoring them except rbp. rbp is the only callee save register |
|
2051 // as far as the interpreter and the compiler(s) are concerned. |
|
2052 |
|
2053 |
|
2054 const Register ic_reg = rax; |
|
2055 const Register receiver = j_rarg0; |
|
2056 |
|
2057 Label hit; |
|
2058 Label exception_pending; |
|
2059 |
|
2060 assert_different_registers(ic_reg, receiver, rscratch1); |
|
2061 __ verify_oop(receiver); |
|
2062 __ load_klass(rscratch1, receiver); |
|
2063 __ cmpq(ic_reg, rscratch1); |
|
2064 __ jcc(Assembler::equal, hit); |
|
2065 |
|
2066 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); |
|
2067 |
|
2068 // Verified entry point must be aligned |
|
2069 __ align(8); |
|
2070 |
|
2071 __ bind(hit); |
|
2072 |
|
2073 int vep_offset = ((intptr_t)__ pc()) - start; |
|
2074 |
|
2075 #ifdef COMPILER1 |
|
2076 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. |
|
2077 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { |
|
2078 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); |
|
2079 } |
|
2080 #endif // COMPILER1 |
|
2081 |
|
2082 // The instruction at the verified entry point must be 5 bytes or longer |
|
2083 // because it can be patched on the fly by make_non_entrant. The stack bang |
|
2084 // instruction fits that requirement. |
|
2085 |
|
2086 // Generate stack overflow check |
|
2087 |
|
2088 if (UseStackBanging) { |
|
2089 __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); |
|
2090 } else { |
|
2091 // need a 5 byte instruction to allow MT safe patching to non-entrant |
|
2092 __ fat_nop(); |
|
2093 } |
|
2094 |
|
2095 // Generate a new frame for the wrapper. |
|
2096 __ enter(); |
|
2097 // -2 because return address is already present and so is saved rbp |
|
2098 __ subptr(rsp, stack_size - 2*wordSize); |
|
2099 |
|
2100 // Frame is now completed as far as size and linkage. |
|
2101 int frame_complete = ((intptr_t)__ pc()) - start; |
|
2102 |
|
2103 if (UseRTMLocking) { |
|
2104 // Abort RTM transaction before calling JNI |
|
2105 // because critical section will be large and will be |
|
2106 // aborted anyway. Also nmethod could be deoptimized. |
|
2107 __ xabort(0); |
|
2108 } |
|
2109 |
|
2110 #ifdef ASSERT |
|
2111 { |
|
2112 Label L; |
|
2113 __ mov(rax, rsp); |
|
2114 __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) |
|
2115 __ cmpptr(rax, rsp); |
|
2116 __ jcc(Assembler::equal, L); |
|
2117 __ stop("improperly aligned stack"); |
|
2118 __ bind(L); |
|
2119 } |
|
2120 #endif /* ASSERT */ |
|
2121 |
|
2122 |
|
2123 // We use r14 as the oop handle for the receiver/klass |
|
2124 // It is callee save so it survives the call to native |
|
2125 |
|
2126 const Register oop_handle_reg = r14; |
|
2127 |
|
2128 if (is_critical_native) { |
|
2129 check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, |
|
2130 oop_handle_offset, oop_maps, in_regs, in_sig_bt); |
|
2131 } |
|
2132 |
|
2133 // |
|
2134 // We immediately shuffle the arguments so that any vm call we have to |
|
2135 // make from here on out (sync slow path, jvmti, etc.) we will have |
|
2136 // captured the oops from our caller and have a valid oopMap for |
|
2137 // them. |
|
2138 |
|
2139 // ----------------- |
|
2140 // The Grand Shuffle |
|
2141 |
|
2142 // The Java calling convention is either equal (linux) or denser (win64) than the |
|
2143 // c calling convention. However the because of the jni_env argument the c calling |
|
2144 // convention always has at least one more (and two for static) arguments than Java. |
|
2145 // Therefore if we move the args from java -> c backwards then we will never have |
|
2146 // a register->register conflict and we don't have to build a dependency graph |
|
2147 // and figure out how to break any cycles. |
|
2148 // |
|
2149 |
|
2150 // Record esp-based slot for receiver on stack for non-static methods |
|
2151 int receiver_offset = -1; |
|
2152 |
|
2153 // This is a trick. We double the stack slots so we can claim |
|
2154 // the oops in the caller's frame. Since we are sure to have |
|
2155 // more args than the caller doubling is enough to make |
|
2156 // sure we can capture all the incoming oop args from the |
|
2157 // caller. |
|
2158 // |
|
2159 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); |
|
2160 |
|
2161 // Mark location of rbp (someday) |
|
2162 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); |
|
2163 |
|
2164 // Use eax, ebx as temporaries during any memory-memory moves we have to do |
|
2165 // All inbound args are referenced based on rbp and all outbound args via rsp. |
|
2166 |
|
2167 |
|
2168 #ifdef ASSERT |
|
2169 bool reg_destroyed[RegisterImpl::number_of_registers]; |
|
2170 bool freg_destroyed[XMMRegisterImpl::number_of_registers]; |
|
2171 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { |
|
2172 reg_destroyed[r] = false; |
|
2173 } |
|
2174 for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) { |
|
2175 freg_destroyed[f] = false; |
|
2176 } |
|
2177 |
|
2178 #endif /* ASSERT */ |
|
2179 |
|
2180 // This may iterate in two different directions depending on the |
|
2181 // kind of native it is. The reason is that for regular JNI natives |
|
2182 // the incoming and outgoing registers are offset upwards and for |
|
2183 // critical natives they are offset down. |
|
2184 GrowableArray<int> arg_order(2 * total_in_args); |
|
2185 VMRegPair tmp_vmreg; |
|
2186 tmp_vmreg.set1(rbx->as_VMReg()); |
|
2187 |
|
2188 if (!is_critical_native) { |
|
2189 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { |
|
2190 arg_order.push(i); |
|
2191 arg_order.push(c_arg); |
|
2192 } |
|
2193 } else { |
|
2194 // Compute a valid move order, using tmp_vmreg to break any cycles |
|
2195 ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); |
|
2196 } |
|
2197 |
|
2198 int temploc = -1; |
|
2199 for (int ai = 0; ai < arg_order.length(); ai += 2) { |
|
2200 int i = arg_order.at(ai); |
|
2201 int c_arg = arg_order.at(ai + 1); |
|
2202 __ block_comment(err_msg("move %d -> %d", i, c_arg)); |
|
2203 if (c_arg == -1) { |
|
2204 assert(is_critical_native, "should only be required for critical natives"); |
|
2205 // This arg needs to be moved to a temporary |
|
2206 __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); |
|
2207 in_regs[i] = tmp_vmreg; |
|
2208 temploc = i; |
|
2209 continue; |
|
2210 } else if (i == -1) { |
|
2211 assert(is_critical_native, "should only be required for critical natives"); |
|
2212 // Read from the temporary location |
|
2213 assert(temploc != -1, "must be valid"); |
|
2214 i = temploc; |
|
2215 temploc = -1; |
|
2216 } |
|
2217 #ifdef ASSERT |
|
2218 if (in_regs[i].first()->is_Register()) { |
|
2219 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); |
|
2220 } else if (in_regs[i].first()->is_XMMRegister()) { |
|
2221 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); |
|
2222 } |
|
2223 if (out_regs[c_arg].first()->is_Register()) { |
|
2224 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; |
|
2225 } else if (out_regs[c_arg].first()->is_XMMRegister()) { |
|
2226 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; |
|
2227 } |
|
2228 #endif /* ASSERT */ |
|
2229 switch (in_sig_bt[i]) { |
|
2230 case T_ARRAY: |
|
2231 if (is_critical_native) { |
|
2232 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); |
|
2233 c_arg++; |
|
2234 #ifdef ASSERT |
|
2235 if (out_regs[c_arg].first()->is_Register()) { |
|
2236 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; |
|
2237 } else if (out_regs[c_arg].first()->is_XMMRegister()) { |
|
2238 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; |
|
2239 } |
|
2240 #endif |
|
2241 break; |
|
2242 } |
|
2243 case T_OBJECT: |
|
2244 assert(!is_critical_native, "no oop arguments"); |
|
2245 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], |
|
2246 ((i == 0) && (!is_static)), |
|
2247 &receiver_offset); |
|
2248 break; |
|
2249 case T_VOID: |
|
2250 break; |
|
2251 |
|
2252 case T_FLOAT: |
|
2253 float_move(masm, in_regs[i], out_regs[c_arg]); |
|
2254 break; |
|
2255 |
|
2256 case T_DOUBLE: |
|
2257 assert( i + 1 < total_in_args && |
|
2258 in_sig_bt[i + 1] == T_VOID && |
|
2259 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); |
|
2260 double_move(masm, in_regs[i], out_regs[c_arg]); |
|
2261 break; |
|
2262 |
|
2263 case T_LONG : |
|
2264 long_move(masm, in_regs[i], out_regs[c_arg]); |
|
2265 break; |
|
2266 |
|
2267 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); |
|
2268 |
|
2269 default: |
|
2270 move32_64(masm, in_regs[i], out_regs[c_arg]); |
|
2271 } |
|
2272 } |
|
2273 |
|
2274 int c_arg; |
|
2275 |
|
2276 // Pre-load a static method's oop into r14. Used both by locking code and |
|
2277 // the normal JNI call code. |
|
2278 if (!is_critical_native) { |
|
2279 // point c_arg at the first arg that is already loaded in case we |
|
2280 // need to spill before we call out |
|
2281 c_arg = total_c_args - total_in_args; |
|
2282 |
|
2283 if (method->is_static()) { |
|
2284 |
|
2285 // load oop into a register |
|
2286 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); |
|
2287 |
|
2288 // Now handlize the static class mirror it's known not-null. |
|
2289 __ movptr(Address(rsp, klass_offset), oop_handle_reg); |
|
2290 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); |
|
2291 |
|
2292 // Now get the handle |
|
2293 __ lea(oop_handle_reg, Address(rsp, klass_offset)); |
|
2294 // store the klass handle as second argument |
|
2295 __ movptr(c_rarg1, oop_handle_reg); |
|
2296 // and protect the arg if we must spill |
|
2297 c_arg--; |
|
2298 } |
|
2299 } else { |
|
2300 // For JNI critical methods we need to save all registers in save_args. |
|
2301 c_arg = 0; |
|
2302 } |
|
2303 |
|
2304 // Change state to native (we save the return address in the thread, since it might not |
|
2305 // be pushed on the stack when we do a a stack traversal). It is enough that the pc() |
|
2306 // points into the right code segment. It does not have to be the correct return pc. |
|
2307 // We use the same pc/oopMap repeatedly when we call out |
|
2308 |
|
2309 intptr_t the_pc = (intptr_t) __ pc(); |
|
2310 oop_maps->add_gc_map(the_pc - start, map); |
|
2311 |
|
2312 __ set_last_Java_frame(rsp, noreg, (address)the_pc); |
|
2313 |
|
2314 |
|
2315 // We have all of the arguments setup at this point. We must not touch any register |
|
2316 // argument registers at this point (what if we save/restore them there are no oop? |
|
2317 |
|
2318 { |
|
2319 SkipIfEqual skip(masm, &DTraceMethodProbes, false); |
|
2320 // protect the args we've loaded |
|
2321 save_args(masm, total_c_args, c_arg, out_regs); |
|
2322 __ mov_metadata(c_rarg1, method()); |
|
2323 __ call_VM_leaf( |
|
2324 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), |
|
2325 r15_thread, c_rarg1); |
|
2326 restore_args(masm, total_c_args, c_arg, out_regs); |
|
2327 } |
|
2328 |
|
2329 // RedefineClasses() tracing support for obsolete method entry |
|
2330 if (log_is_enabled(Trace, redefine, class, obsolete)) { |
|
2331 // protect the args we've loaded |
|
2332 save_args(masm, total_c_args, c_arg, out_regs); |
|
2333 __ mov_metadata(c_rarg1, method()); |
|
2334 __ call_VM_leaf( |
|
2335 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), |
|
2336 r15_thread, c_rarg1); |
|
2337 restore_args(masm, total_c_args, c_arg, out_regs); |
|
2338 } |
|
2339 |
|
2340 // Lock a synchronized method |
|
2341 |
|
2342 // Register definitions used by locking and unlocking |
|
2343 |
|
2344 const Register swap_reg = rax; // Must use rax for cmpxchg instruction |
|
2345 const Register obj_reg = rbx; // Will contain the oop |
|
2346 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) |
|
2347 const Register old_hdr = r13; // value of old header at unlock time |
|
2348 |
|
2349 Label slow_path_lock; |
|
2350 Label lock_done; |
|
2351 |
|
2352 if (method->is_synchronized()) { |
|
2353 assert(!is_critical_native, "unhandled"); |
|
2354 |
|
2355 |
|
2356 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); |
|
2357 |
|
2358 // Get the handle (the 2nd argument) |
|
2359 __ mov(oop_handle_reg, c_rarg1); |
|
2360 |
|
2361 // Get address of the box |
|
2362 |
|
2363 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); |
|
2364 |
|
2365 // Load the oop from the handle |
|
2366 __ movptr(obj_reg, Address(oop_handle_reg, 0)); |
|
2367 |
|
2368 if (UseBiasedLocking) { |
|
2369 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock); |
|
2370 } |
|
2371 |
|
2372 // Load immediate 1 into swap_reg %rax |
|
2373 __ movl(swap_reg, 1); |
|
2374 |
|
2375 // Load (object->mark() | 1) into swap_reg %rax |
|
2376 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
2377 |
|
2378 // Save (object->mark() | 1) into BasicLock's displaced header |
|
2379 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); |
|
2380 |
|
2381 if (os::is_MP()) { |
|
2382 __ lock(); |
|
2383 } |
|
2384 |
|
2385 // src -> dest iff dest == rax else rax <- dest |
|
2386 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
2387 __ jcc(Assembler::equal, lock_done); |
|
2388 |
|
2389 // Hmm should this move to the slow path code area??? |
|
2390 |
|
2391 // Test if the oopMark is an obvious stack pointer, i.e., |
|
2392 // 1) (mark & 3) == 0, and |
|
2393 // 2) rsp <= mark < mark + os::pagesize() |
|
2394 // These 3 tests can be done by evaluating the following |
|
2395 // expression: ((mark - rsp) & (3 - os::vm_page_size())), |
|
2396 // assuming both stack pointer and pagesize have their |
|
2397 // least significant 2 bits clear. |
|
2398 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg |
|
2399 |
|
2400 __ subptr(swap_reg, rsp); |
|
2401 __ andptr(swap_reg, 3 - os::vm_page_size()); |
|
2402 |
|
2403 // Save the test result, for recursive case, the result is zero |
|
2404 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); |
|
2405 __ jcc(Assembler::notEqual, slow_path_lock); |
|
2406 |
|
2407 // Slow path will re-enter here |
|
2408 |
|
2409 __ bind(lock_done); |
|
2410 } |
|
2411 |
|
2412 |
|
2413 // Finally just about ready to make the JNI call |
|
2414 |
|
2415 |
|
2416 // get JNIEnv* which is first argument to native |
|
2417 if (!is_critical_native) { |
|
2418 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); |
|
2419 } |
|
2420 |
|
2421 // Now set thread in native |
|
2422 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); |
|
2423 |
|
2424 __ call(RuntimeAddress(native_func)); |
|
2425 |
|
2426 // Verify or restore cpu control state after JNI call |
|
2427 __ restore_cpu_control_state_after_jni(); |
|
2428 |
|
2429 // Unpack native results. |
|
2430 switch (ret_type) { |
|
2431 case T_BOOLEAN: __ c2bool(rax); break; |
|
2432 case T_CHAR : __ movzwl(rax, rax); break; |
|
2433 case T_BYTE : __ sign_extend_byte (rax); break; |
|
2434 case T_SHORT : __ sign_extend_short(rax); break; |
|
2435 case T_INT : /* nothing to do */ break; |
|
2436 case T_DOUBLE : |
|
2437 case T_FLOAT : |
|
2438 // Result is in xmm0 we'll save as needed |
|
2439 break; |
|
2440 case T_ARRAY: // Really a handle |
|
2441 case T_OBJECT: // Really a handle |
|
2442 break; // can't de-handlize until after safepoint check |
|
2443 case T_VOID: break; |
|
2444 case T_LONG: break; |
|
2445 default : ShouldNotReachHere(); |
|
2446 } |
|
2447 |
|
2448 // Switch thread to "native transition" state before reading the synchronization state. |
|
2449 // This additional state is necessary because reading and testing the synchronization |
|
2450 // state is not atomic w.r.t. GC, as this scenario demonstrates: |
|
2451 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. |
|
2452 // VM thread changes sync state to synchronizing and suspends threads for GC. |
|
2453 // Thread A is resumed to finish this native method, but doesn't block here since it |
|
2454 // didn't see any synchronization is progress, and escapes. |
|
2455 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); |
|
2456 |
|
2457 if(os::is_MP()) { |
|
2458 if (UseMembar) { |
|
2459 // Force this write out before the read below |
|
2460 __ membar(Assembler::Membar_mask_bits( |
|
2461 Assembler::LoadLoad | Assembler::LoadStore | |
|
2462 Assembler::StoreLoad | Assembler::StoreStore)); |
|
2463 } else { |
|
2464 // Write serialization page so VM thread can do a pseudo remote membar. |
|
2465 // We use the current thread pointer to calculate a thread specific |
|
2466 // offset to write to within the page. This minimizes bus traffic |
|
2467 // due to cache line collision. |
|
2468 __ serialize_memory(r15_thread, rcx); |
|
2469 } |
|
2470 } |
|
2471 |
|
2472 Label after_transition; |
|
2473 |
|
2474 // check for safepoint operation in progress and/or pending suspend requests |
|
2475 { |
|
2476 Label Continue; |
|
2477 |
|
2478 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()), |
|
2479 SafepointSynchronize::_not_synchronized); |
|
2480 |
|
2481 Label L; |
|
2482 __ jcc(Assembler::notEqual, L); |
|
2483 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); |
|
2484 __ jcc(Assembler::equal, Continue); |
|
2485 __ bind(L); |
|
2486 |
|
2487 // Don't use call_VM as it will see a possible pending exception and forward it |
|
2488 // and never return here preventing us from clearing _last_native_pc down below. |
|
2489 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are |
|
2490 // preserved and correspond to the bcp/locals pointers. So we do a runtime call |
|
2491 // by hand. |
|
2492 // |
|
2493 __ vzeroupper(); |
|
2494 save_native_result(masm, ret_type, stack_slots); |
|
2495 __ mov(c_rarg0, r15_thread); |
|
2496 __ mov(r12, rsp); // remember sp |
|
2497 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows |
|
2498 __ andptr(rsp, -16); // align stack as required by ABI |
|
2499 if (!is_critical_native) { |
|
2500 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); |
|
2501 } else { |
|
2502 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); |
|
2503 } |
|
2504 __ mov(rsp, r12); // restore sp |
|
2505 __ reinit_heapbase(); |
|
2506 // Restore any method result value |
|
2507 restore_native_result(masm, ret_type, stack_slots); |
|
2508 |
|
2509 if (is_critical_native) { |
|
2510 // The call above performed the transition to thread_in_Java so |
|
2511 // skip the transition logic below. |
|
2512 __ jmpb(after_transition); |
|
2513 } |
|
2514 |
|
2515 __ bind(Continue); |
|
2516 } |
|
2517 |
|
2518 // change thread state |
|
2519 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); |
|
2520 __ bind(after_transition); |
|
2521 |
|
2522 Label reguard; |
|
2523 Label reguard_done; |
|
2524 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_reserved_disabled); |
|
2525 __ jcc(Assembler::equal, reguard); |
|
2526 __ bind(reguard_done); |
|
2527 |
|
2528 // native result if any is live |
|
2529 |
|
2530 // Unlock |
|
2531 Label unlock_done; |
|
2532 Label slow_path_unlock; |
|
2533 if (method->is_synchronized()) { |
|
2534 |
|
2535 // Get locked oop from the handle we passed to jni |
|
2536 __ movptr(obj_reg, Address(oop_handle_reg, 0)); |
|
2537 |
|
2538 Label done; |
|
2539 |
|
2540 if (UseBiasedLocking) { |
|
2541 __ biased_locking_exit(obj_reg, old_hdr, done); |
|
2542 } |
|
2543 |
|
2544 // Simple recursive lock? |
|
2545 |
|
2546 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); |
|
2547 __ jcc(Assembler::equal, done); |
|
2548 |
|
2549 // Must save rax if if it is live now because cmpxchg must use it |
|
2550 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { |
|
2551 save_native_result(masm, ret_type, stack_slots); |
|
2552 } |
|
2553 |
|
2554 |
|
2555 // get address of the stack lock |
|
2556 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); |
|
2557 // get old displaced header |
|
2558 __ movptr(old_hdr, Address(rax, 0)); |
|
2559 |
|
2560 // Atomic swap old header if oop still contains the stack lock |
|
2561 if (os::is_MP()) { |
|
2562 __ lock(); |
|
2563 } |
|
2564 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); |
|
2565 __ jcc(Assembler::notEqual, slow_path_unlock); |
|
2566 |
|
2567 // slow path re-enters here |
|
2568 __ bind(unlock_done); |
|
2569 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { |
|
2570 restore_native_result(masm, ret_type, stack_slots); |
|
2571 } |
|
2572 |
|
2573 __ bind(done); |
|
2574 |
|
2575 } |
|
2576 { |
|
2577 SkipIfEqual skip(masm, &DTraceMethodProbes, false); |
|
2578 save_native_result(masm, ret_type, stack_slots); |
|
2579 __ mov_metadata(c_rarg1, method()); |
|
2580 __ call_VM_leaf( |
|
2581 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), |
|
2582 r15_thread, c_rarg1); |
|
2583 restore_native_result(masm, ret_type, stack_slots); |
|
2584 } |
|
2585 |
|
2586 __ reset_last_Java_frame(false); |
|
2587 |
|
2588 // Unbox oop result, e.g. JNIHandles::resolve value. |
|
2589 if (ret_type == T_OBJECT || ret_type == T_ARRAY) { |
|
2590 __ resolve_jobject(rax /* value */, |
|
2591 r15_thread /* thread */, |
|
2592 rcx /* tmp */); |
|
2593 } |
|
2594 |
|
2595 if (CheckJNICalls) { |
|
2596 // clear_pending_jni_exception_check |
|
2597 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); |
|
2598 } |
|
2599 |
|
2600 if (!is_critical_native) { |
|
2601 // reset handle block |
|
2602 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); |
|
2603 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); |
|
2604 } |
|
2605 |
|
2606 // pop our frame |
|
2607 |
|
2608 __ leave(); |
|
2609 |
|
2610 if (!is_critical_native) { |
|
2611 // Any exception pending? |
|
2612 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); |
|
2613 __ jcc(Assembler::notEqual, exception_pending); |
|
2614 } |
|
2615 |
|
2616 // Return |
|
2617 |
|
2618 __ ret(0); |
|
2619 |
|
2620 // Unexpected paths are out of line and go here |
|
2621 |
|
2622 if (!is_critical_native) { |
|
2623 // forward the exception |
|
2624 __ bind(exception_pending); |
|
2625 |
|
2626 // and forward the exception |
|
2627 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
2628 } |
|
2629 |
|
2630 // Slow path locking & unlocking |
|
2631 if (method->is_synchronized()) { |
|
2632 |
|
2633 // BEGIN Slow path lock |
|
2634 __ bind(slow_path_lock); |
|
2635 |
|
2636 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM |
|
2637 // args are (oop obj, BasicLock* lock, JavaThread* thread) |
|
2638 |
|
2639 // protect the args we've loaded |
|
2640 save_args(masm, total_c_args, c_arg, out_regs); |
|
2641 |
|
2642 __ mov(c_rarg0, obj_reg); |
|
2643 __ mov(c_rarg1, lock_reg); |
|
2644 __ mov(c_rarg2, r15_thread); |
|
2645 |
|
2646 // Not a leaf but we have last_Java_frame setup as we want |
|
2647 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); |
|
2648 restore_args(masm, total_c_args, c_arg, out_regs); |
|
2649 |
|
2650 #ifdef ASSERT |
|
2651 { Label L; |
|
2652 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); |
|
2653 __ jcc(Assembler::equal, L); |
|
2654 __ stop("no pending exception allowed on exit from monitorenter"); |
|
2655 __ bind(L); |
|
2656 } |
|
2657 #endif |
|
2658 __ jmp(lock_done); |
|
2659 |
|
2660 // END Slow path lock |
|
2661 |
|
2662 // BEGIN Slow path unlock |
|
2663 __ bind(slow_path_unlock); |
|
2664 |
|
2665 // If we haven't already saved the native result we must save it now as xmm registers |
|
2666 // are still exposed. |
|
2667 __ vzeroupper(); |
|
2668 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { |
|
2669 save_native_result(masm, ret_type, stack_slots); |
|
2670 } |
|
2671 |
|
2672 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); |
|
2673 |
|
2674 __ mov(c_rarg0, obj_reg); |
|
2675 __ mov(c_rarg2, r15_thread); |
|
2676 __ mov(r12, rsp); // remember sp |
|
2677 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows |
|
2678 __ andptr(rsp, -16); // align stack as required by ABI |
|
2679 |
|
2680 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) |
|
2681 // NOTE that obj_reg == rbx currently |
|
2682 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); |
|
2683 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); |
|
2684 |
|
2685 // args are (oop obj, BasicLock* lock, JavaThread* thread) |
|
2686 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); |
|
2687 __ mov(rsp, r12); // restore sp |
|
2688 __ reinit_heapbase(); |
|
2689 #ifdef ASSERT |
|
2690 { |
|
2691 Label L; |
|
2692 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); |
|
2693 __ jcc(Assembler::equal, L); |
|
2694 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); |
|
2695 __ bind(L); |
|
2696 } |
|
2697 #endif /* ASSERT */ |
|
2698 |
|
2699 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); |
|
2700 |
|
2701 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { |
|
2702 restore_native_result(masm, ret_type, stack_slots); |
|
2703 } |
|
2704 __ jmp(unlock_done); |
|
2705 |
|
2706 // END Slow path unlock |
|
2707 |
|
2708 } // synchronized |
|
2709 |
|
2710 // SLOW PATH Reguard the stack if needed |
|
2711 |
|
2712 __ bind(reguard); |
|
2713 __ vzeroupper(); |
|
2714 save_native_result(masm, ret_type, stack_slots); |
|
2715 __ mov(r12, rsp); // remember sp |
|
2716 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows |
|
2717 __ andptr(rsp, -16); // align stack as required by ABI |
|
2718 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); |
|
2719 __ mov(rsp, r12); // restore sp |
|
2720 __ reinit_heapbase(); |
|
2721 restore_native_result(masm, ret_type, stack_slots); |
|
2722 // and continue |
|
2723 __ jmp(reguard_done); |
|
2724 |
|
2725 |
|
2726 |
|
2727 __ flush(); |
|
2728 |
|
2729 nmethod *nm = nmethod::new_native_nmethod(method, |
|
2730 compile_id, |
|
2731 masm->code(), |
|
2732 vep_offset, |
|
2733 frame_complete, |
|
2734 stack_slots / VMRegImpl::slots_per_word, |
|
2735 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), |
|
2736 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), |
|
2737 oop_maps); |
|
2738 |
|
2739 if (is_critical_native) { |
|
2740 nm->set_lazy_critical_native(true); |
|
2741 } |
|
2742 |
|
2743 return nm; |
|
2744 |
|
2745 } |
|
2746 |
|
2747 // this function returns the adjust size (in number of words) to a c2i adapter |
|
2748 // activation for use during deoptimization |
|
2749 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { |
|
2750 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; |
|
2751 } |
|
2752 |
|
2753 |
|
2754 uint SharedRuntime::out_preserve_stack_slots() { |
|
2755 return 0; |
|
2756 } |
|
2757 |
|
2758 //------------------------------generate_deopt_blob---------------------------- |
|
2759 void SharedRuntime::generate_deopt_blob() { |
|
2760 // Allocate space for the code |
|
2761 ResourceMark rm; |
|
2762 // Setup code generation tools |
|
2763 int pad = 0; |
|
2764 #if INCLUDE_JVMCI |
|
2765 if (EnableJVMCI || UseAOT) { |
|
2766 pad += 512; // Increase the buffer size when compiling for JVMCI |
|
2767 } |
|
2768 #endif |
|
2769 CodeBuffer buffer("deopt_blob", 2048+pad, 1024); |
|
2770 MacroAssembler* masm = new MacroAssembler(&buffer); |
|
2771 int frame_size_in_words; |
|
2772 OopMap* map = NULL; |
|
2773 OopMapSet *oop_maps = new OopMapSet(); |
|
2774 |
|
2775 // ------------- |
|
2776 // This code enters when returning to a de-optimized nmethod. A return |
|
2777 // address has been pushed on the the stack, and return values are in |
|
2778 // registers. |
|
2779 // If we are doing a normal deopt then we were called from the patched |
|
2780 // nmethod from the point we returned to the nmethod. So the return |
|
2781 // address on the stack is wrong by NativeCall::instruction_size |
|
2782 // We will adjust the value so it looks like we have the original return |
|
2783 // address on the stack (like when we eagerly deoptimized). |
|
2784 // In the case of an exception pending when deoptimizing, we enter |
|
2785 // with a return address on the stack that points after the call we patched |
|
2786 // into the exception handler. We have the following register state from, |
|
2787 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). |
|
2788 // rax: exception oop |
|
2789 // rbx: exception handler |
|
2790 // rdx: throwing pc |
|
2791 // So in this case we simply jam rdx into the useless return address and |
|
2792 // the stack looks just like we want. |
|
2793 // |
|
2794 // At this point we need to de-opt. We save the argument return |
|
2795 // registers. We call the first C routine, fetch_unroll_info(). This |
|
2796 // routine captures the return values and returns a structure which |
|
2797 // describes the current frame size and the sizes of all replacement frames. |
|
2798 // The current frame is compiled code and may contain many inlined |
|
2799 // functions, each with their own JVM state. We pop the current frame, then |
|
2800 // push all the new frames. Then we call the C routine unpack_frames() to |
|
2801 // populate these frames. Finally unpack_frames() returns us the new target |
|
2802 // address. Notice that callee-save registers are BLOWN here; they have |
|
2803 // already been captured in the vframeArray at the time the return PC was |
|
2804 // patched. |
|
2805 address start = __ pc(); |
|
2806 Label cont; |
|
2807 |
|
2808 // Prolog for non exception case! |
|
2809 |
|
2810 // Save everything in sight. |
|
2811 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); |
|
2812 |
|
2813 // Normal deoptimization. Save exec mode for unpack_frames. |
|
2814 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved |
|
2815 __ jmp(cont); |
|
2816 |
|
2817 int reexecute_offset = __ pc() - start; |
|
2818 #if INCLUDE_JVMCI && !defined(COMPILER1) |
|
2819 if (EnableJVMCI && UseJVMCICompiler) { |
|
2820 // JVMCI does not use this kind of deoptimization |
|
2821 __ should_not_reach_here(); |
|
2822 } |
|
2823 #endif |
|
2824 |
|
2825 // Reexecute case |
|
2826 // return address is the pc describes what bci to do re-execute at |
|
2827 |
|
2828 // No need to update map as each call to save_live_registers will produce identical oopmap |
|
2829 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); |
|
2830 |
|
2831 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved |
|
2832 __ jmp(cont); |
|
2833 |
|
2834 #if INCLUDE_JVMCI |
|
2835 Label after_fetch_unroll_info_call; |
|
2836 int implicit_exception_uncommon_trap_offset = 0; |
|
2837 int uncommon_trap_offset = 0; |
|
2838 |
|
2839 if (EnableJVMCI || UseAOT) { |
|
2840 implicit_exception_uncommon_trap_offset = __ pc() - start; |
|
2841 |
|
2842 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); |
|
2843 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD); |
|
2844 |
|
2845 uncommon_trap_offset = __ pc() - start; |
|
2846 |
|
2847 // Save everything in sight. |
|
2848 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); |
|
2849 // fetch_unroll_info needs to call last_java_frame() |
|
2850 __ set_last_Java_frame(noreg, noreg, NULL); |
|
2851 |
|
2852 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); |
|
2853 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); |
|
2854 |
|
2855 __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute); |
|
2856 __ mov(c_rarg0, r15_thread); |
|
2857 __ movl(c_rarg2, r14); // exec mode |
|
2858 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); |
|
2859 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); |
|
2860 |
|
2861 __ reset_last_Java_frame(false); |
|
2862 |
|
2863 __ jmp(after_fetch_unroll_info_call); |
|
2864 } // EnableJVMCI |
|
2865 #endif // INCLUDE_JVMCI |
|
2866 |
|
2867 int exception_offset = __ pc() - start; |
|
2868 |
|
2869 // Prolog for exception case |
|
2870 |
|
2871 // all registers are dead at this entry point, except for rax, and |
|
2872 // rdx which contain the exception oop and exception pc |
|
2873 // respectively. Set them in TLS and fall thru to the |
|
2874 // unpack_with_exception_in_tls entry point. |
|
2875 |
|
2876 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); |
|
2877 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); |
|
2878 |
|
2879 int exception_in_tls_offset = __ pc() - start; |
|
2880 |
|
2881 // new implementation because exception oop is now passed in JavaThread |
|
2882 |
|
2883 // Prolog for exception case |
|
2884 // All registers must be preserved because they might be used by LinearScan |
|
2885 // Exceptiop oop and throwing PC are passed in JavaThread |
|
2886 // tos: stack at point of call to method that threw the exception (i.e. only |
|
2887 // args are on the stack, no return address) |
|
2888 |
|
2889 // make room on stack for the return address |
|
2890 // It will be patched later with the throwing pc. The correct value is not |
|
2891 // available now because loading it from memory would destroy registers. |
|
2892 __ push(0); |
|
2893 |
|
2894 // Save everything in sight. |
|
2895 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); |
|
2896 |
|
2897 // Now it is safe to overwrite any register |
|
2898 |
|
2899 // Deopt during an exception. Save exec mode for unpack_frames. |
|
2900 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved |
|
2901 |
|
2902 // load throwing pc from JavaThread and patch it as the return address |
|
2903 // of the current frame. Then clear the field in JavaThread |
|
2904 |
|
2905 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); |
|
2906 __ movptr(Address(rbp, wordSize), rdx); |
|
2907 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); |
|
2908 |
|
2909 #ifdef ASSERT |
|
2910 // verify that there is really an exception oop in JavaThread |
|
2911 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); |
|
2912 __ verify_oop(rax); |
|
2913 |
|
2914 // verify that there is no pending exception |
|
2915 Label no_pending_exception; |
|
2916 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); |
|
2917 __ testptr(rax, rax); |
|
2918 __ jcc(Assembler::zero, no_pending_exception); |
|
2919 __ stop("must not have pending exception here"); |
|
2920 __ bind(no_pending_exception); |
|
2921 #endif |
|
2922 |
|
2923 __ bind(cont); |
|
2924 |
|
2925 // Call C code. Need thread and this frame, but NOT official VM entry |
|
2926 // crud. We cannot block on this call, no GC can happen. |
|
2927 // |
|
2928 // UnrollBlock* fetch_unroll_info(JavaThread* thread) |
|
2929 |
|
2930 // fetch_unroll_info needs to call last_java_frame(). |
|
2931 |
|
2932 __ set_last_Java_frame(noreg, noreg, NULL); |
|
2933 #ifdef ASSERT |
|
2934 { Label L; |
|
2935 __ cmpptr(Address(r15_thread, |
|
2936 JavaThread::last_Java_fp_offset()), |
|
2937 (int32_t)0); |
|
2938 __ jcc(Assembler::equal, L); |
|
2939 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); |
|
2940 __ bind(L); |
|
2941 } |
|
2942 #endif // ASSERT |
|
2943 __ mov(c_rarg0, r15_thread); |
|
2944 __ movl(c_rarg1, r14); // exec_mode |
|
2945 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); |
|
2946 |
|
2947 // Need to have an oopmap that tells fetch_unroll_info where to |
|
2948 // find any register it might need. |
|
2949 oop_maps->add_gc_map(__ pc() - start, map); |
|
2950 |
|
2951 __ reset_last_Java_frame(false); |
|
2952 |
|
2953 #if INCLUDE_JVMCI |
|
2954 if (EnableJVMCI || UseAOT) { |
|
2955 __ bind(after_fetch_unroll_info_call); |
|
2956 } |
|
2957 #endif |
|
2958 |
|
2959 // Load UnrollBlock* into rdi |
|
2960 __ mov(rdi, rax); |
|
2961 |
|
2962 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); |
|
2963 Label noException; |
|
2964 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? |
|
2965 __ jcc(Assembler::notEqual, noException); |
|
2966 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); |
|
2967 // QQQ this is useless it was NULL above |
|
2968 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); |
|
2969 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD); |
|
2970 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); |
|
2971 |
|
2972 __ verify_oop(rax); |
|
2973 |
|
2974 // Overwrite the result registers with the exception results. |
|
2975 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); |
|
2976 // I think this is useless |
|
2977 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); |
|
2978 |
|
2979 __ bind(noException); |
|
2980 |
|
2981 // Only register save data is on the stack. |
|
2982 // Now restore the result registers. Everything else is either dead |
|
2983 // or captured in the vframeArray. |
|
2984 RegisterSaver::restore_result_registers(masm); |
|
2985 |
|
2986 // All of the register save area has been popped of the stack. Only the |
|
2987 // return address remains. |
|
2988 |
|
2989 // Pop all the frames we must move/replace. |
|
2990 // |
|
2991 // Frame picture (youngest to oldest) |
|
2992 // 1: self-frame (no frame link) |
|
2993 // 2: deopting frame (no frame link) |
|
2994 // 3: caller of deopting frame (could be compiled/interpreted). |
|
2995 // |
|
2996 // Note: by leaving the return address of self-frame on the stack |
|
2997 // and using the size of frame 2 to adjust the stack |
|
2998 // when we are done the return to frame 3 will still be on the stack. |
|
2999 |
|
3000 // Pop deoptimized frame |
|
3001 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); |
|
3002 __ addptr(rsp, rcx); |
|
3003 |
|
3004 // rsp should be pointing at the return address to the caller (3) |
|
3005 |
|
3006 // Pick up the initial fp we should save |
|
3007 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) |
|
3008 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
|
3009 |
|
3010 #ifdef ASSERT |
|
3011 // Compilers generate code that bang the stack by as much as the |
|
3012 // interpreter would need. So this stack banging should never |
|
3013 // trigger a fault. Verify that it does not on non product builds. |
|
3014 if (UseStackBanging) { |
|
3015 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); |
|
3016 __ bang_stack_size(rbx, rcx); |
|
3017 } |
|
3018 #endif |
|
3019 |
|
3020 // Load address of array of frame pcs into rcx |
|
3021 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); |
|
3022 |
|
3023 // Trash the old pc |
|
3024 __ addptr(rsp, wordSize); |
|
3025 |
|
3026 // Load address of array of frame sizes into rsi |
|
3027 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); |
|
3028 |
|
3029 // Load counter into rdx |
|
3030 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); |
|
3031 |
|
3032 // Now adjust the caller's stack to make up for the extra locals |
|
3033 // but record the original sp so that we can save it in the skeletal interpreter |
|
3034 // frame and the stack walking of interpreter_sender will get the unextended sp |
|
3035 // value and not the "real" sp value. |
|
3036 |
|
3037 const Register sender_sp = r8; |
|
3038 |
|
3039 __ mov(sender_sp, rsp); |
|
3040 __ movl(rbx, Address(rdi, |
|
3041 Deoptimization::UnrollBlock:: |
|
3042 caller_adjustment_offset_in_bytes())); |
|
3043 __ subptr(rsp, rbx); |
|
3044 |
|
3045 // Push interpreter frames in a loop |
|
3046 Label loop; |
|
3047 __ bind(loop); |
|
3048 __ movptr(rbx, Address(rsi, 0)); // Load frame size |
|
3049 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand |
|
3050 __ pushptr(Address(rcx, 0)); // Save return address |
|
3051 __ enter(); // Save old & set new ebp |
|
3052 __ subptr(rsp, rbx); // Prolog |
|
3053 // This value is corrected by layout_activation_impl |
|
3054 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); |
|
3055 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable |
|
3056 __ mov(sender_sp, rsp); // Pass sender_sp to next frame |
|
3057 __ addptr(rsi, wordSize); // Bump array pointer (sizes) |
|
3058 __ addptr(rcx, wordSize); // Bump array pointer (pcs) |
|
3059 __ decrementl(rdx); // Decrement counter |
|
3060 __ jcc(Assembler::notZero, loop); |
|
3061 __ pushptr(Address(rcx, 0)); // Save final return address |
|
3062 |
|
3063 // Re-push self-frame |
|
3064 __ enter(); // Save old & set new ebp |
|
3065 |
|
3066 // Allocate a full sized register save area. |
|
3067 // Return address and rbp are in place, so we allocate two less words. |
|
3068 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); |
|
3069 |
|
3070 // Restore frame locals after moving the frame |
|
3071 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); |
|
3072 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); |
|
3073 |
|
3074 // Call C code. Need thread but NOT official VM entry |
|
3075 // crud. We cannot block on this call, no GC can happen. Call should |
|
3076 // restore return values to their stack-slots with the new SP. |
|
3077 // |
|
3078 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) |
|
3079 |
|
3080 // Use rbp because the frames look interpreted now |
|
3081 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. |
|
3082 // Don't need the precise return PC here, just precise enough to point into this code blob. |
|
3083 address the_pc = __ pc(); |
|
3084 __ set_last_Java_frame(noreg, rbp, the_pc); |
|
3085 |
|
3086 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI |
|
3087 __ mov(c_rarg0, r15_thread); |
|
3088 __ movl(c_rarg1, r14); // second arg: exec_mode |
|
3089 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); |
|
3090 // Revert SP alignment after call since we're going to do some SP relative addressing below |
|
3091 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); |
|
3092 |
|
3093 // Set an oopmap for the call site |
|
3094 // Use the same PC we used for the last java frame |
|
3095 oop_maps->add_gc_map(the_pc - start, |
|
3096 new OopMap( frame_size_in_words, 0 )); |
|
3097 |
|
3098 // Clear fp AND pc |
|
3099 __ reset_last_Java_frame(true); |
|
3100 |
|
3101 // Collect return values |
|
3102 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); |
|
3103 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); |
|
3104 // I think this is useless (throwing pc?) |
|
3105 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); |
|
3106 |
|
3107 // Pop self-frame. |
|
3108 __ leave(); // Epilog |
|
3109 |
|
3110 // Jump to interpreter |
|
3111 __ ret(0); |
|
3112 |
|
3113 // Make sure all code is generated |
|
3114 masm->flush(); |
|
3115 |
|
3116 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); |
|
3117 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); |
|
3118 #if INCLUDE_JVMCI |
|
3119 if (EnableJVMCI || UseAOT) { |
|
3120 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); |
|
3121 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); |
|
3122 } |
|
3123 #endif |
|
3124 } |
|
3125 |
|
3126 #ifdef COMPILER2 |
|
3127 //------------------------------generate_uncommon_trap_blob-------------------- |
|
3128 void SharedRuntime::generate_uncommon_trap_blob() { |
|
3129 // Allocate space for the code |
|
3130 ResourceMark rm; |
|
3131 // Setup code generation tools |
|
3132 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); |
|
3133 MacroAssembler* masm = new MacroAssembler(&buffer); |
|
3134 |
|
3135 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); |
|
3136 |
|
3137 address start = __ pc(); |
|
3138 |
|
3139 if (UseRTMLocking) { |
|
3140 // Abort RTM transaction before possible nmethod deoptimization. |
|
3141 __ xabort(0); |
|
3142 } |
|
3143 |
|
3144 // Push self-frame. We get here with a return address on the |
|
3145 // stack, so rsp is 8-byte aligned until we allocate our frame. |
|
3146 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! |
|
3147 |
|
3148 // No callee saved registers. rbp is assumed implicitly saved |
|
3149 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); |
|
3150 |
|
3151 // compiler left unloaded_class_index in j_rarg0 move to where the |
|
3152 // runtime expects it. |
|
3153 __ movl(c_rarg1, j_rarg0); |
|
3154 |
|
3155 __ set_last_Java_frame(noreg, noreg, NULL); |
|
3156 |
|
3157 // Call C code. Need thread but NOT official VM entry |
|
3158 // crud. We cannot block on this call, no GC can happen. Call should |
|
3159 // capture callee-saved registers as well as return values. |
|
3160 // Thread is in rdi already. |
|
3161 // |
|
3162 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); |
|
3163 |
|
3164 __ mov(c_rarg0, r15_thread); |
|
3165 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); |
|
3166 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); |
|
3167 |
|
3168 // Set an oopmap for the call site |
|
3169 OopMapSet* oop_maps = new OopMapSet(); |
|
3170 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); |
|
3171 |
|
3172 // location of rbp is known implicitly by the frame sender code |
|
3173 |
|
3174 oop_maps->add_gc_map(__ pc() - start, map); |
|
3175 |
|
3176 __ reset_last_Java_frame(false); |
|
3177 |
|
3178 // Load UnrollBlock* into rdi |
|
3179 __ mov(rdi, rax); |
|
3180 |
|
3181 #ifdef ASSERT |
|
3182 { Label L; |
|
3183 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), |
|
3184 (int32_t)Deoptimization::Unpack_uncommon_trap); |
|
3185 __ jcc(Assembler::equal, L); |
|
3186 __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); |
|
3187 __ bind(L); |
|
3188 } |
|
3189 #endif |
|
3190 |
|
3191 // Pop all the frames we must move/replace. |
|
3192 // |
|
3193 // Frame picture (youngest to oldest) |
|
3194 // 1: self-frame (no frame link) |
|
3195 // 2: deopting frame (no frame link) |
|
3196 // 3: caller of deopting frame (could be compiled/interpreted). |
|
3197 |
|
3198 // Pop self-frame. We have no frame, and must rely only on rax and rsp. |
|
3199 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! |
|
3200 |
|
3201 // Pop deoptimized frame (int) |
|
3202 __ movl(rcx, Address(rdi, |
|
3203 Deoptimization::UnrollBlock:: |
|
3204 size_of_deoptimized_frame_offset_in_bytes())); |
|
3205 __ addptr(rsp, rcx); |
|
3206 |
|
3207 // rsp should be pointing at the return address to the caller (3) |
|
3208 |
|
3209 // Pick up the initial fp we should save |
|
3210 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) |
|
3211 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); |
|
3212 |
|
3213 #ifdef ASSERT |
|
3214 // Compilers generate code that bang the stack by as much as the |
|
3215 // interpreter would need. So this stack banging should never |
|
3216 // trigger a fault. Verify that it does not on non product builds. |
|
3217 if (UseStackBanging) { |
|
3218 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); |
|
3219 __ bang_stack_size(rbx, rcx); |
|
3220 } |
|
3221 #endif |
|
3222 |
|
3223 // Load address of array of frame pcs into rcx (address*) |
|
3224 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); |
|
3225 |
|
3226 // Trash the return pc |
|
3227 __ addptr(rsp, wordSize); |
|
3228 |
|
3229 // Load address of array of frame sizes into rsi (intptr_t*) |
|
3230 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); |
|
3231 |
|
3232 // Counter |
|
3233 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) |
|
3234 |
|
3235 // Now adjust the caller's stack to make up for the extra locals but |
|
3236 // record the original sp so that we can save it in the skeletal |
|
3237 // interpreter frame and the stack walking of interpreter_sender |
|
3238 // will get the unextended sp value and not the "real" sp value. |
|
3239 |
|
3240 const Register sender_sp = r8; |
|
3241 |
|
3242 __ mov(sender_sp, rsp); |
|
3243 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) |
|
3244 __ subptr(rsp, rbx); |
|
3245 |
|
3246 // Push interpreter frames in a loop |
|
3247 Label loop; |
|
3248 __ bind(loop); |
|
3249 __ movptr(rbx, Address(rsi, 0)); // Load frame size |
|
3250 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand |
|
3251 __ pushptr(Address(rcx, 0)); // Save return address |
|
3252 __ enter(); // Save old & set new rbp |
|
3253 __ subptr(rsp, rbx); // Prolog |
|
3254 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), |
|
3255 sender_sp); // Make it walkable |
|
3256 // This value is corrected by layout_activation_impl |
|
3257 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); |
|
3258 __ mov(sender_sp, rsp); // Pass sender_sp to next frame |
|
3259 __ addptr(rsi, wordSize); // Bump array pointer (sizes) |
|
3260 __ addptr(rcx, wordSize); // Bump array pointer (pcs) |
|
3261 __ decrementl(rdx); // Decrement counter |
|
3262 __ jcc(Assembler::notZero, loop); |
|
3263 __ pushptr(Address(rcx, 0)); // Save final return address |
|
3264 |
|
3265 // Re-push self-frame |
|
3266 __ enter(); // Save old & set new rbp |
|
3267 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); |
|
3268 // Prolog |
|
3269 |
|
3270 // Use rbp because the frames look interpreted now |
|
3271 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. |
|
3272 // Don't need the precise return PC here, just precise enough to point into this code blob. |
|
3273 address the_pc = __ pc(); |
|
3274 __ set_last_Java_frame(noreg, rbp, the_pc); |
|
3275 |
|
3276 // Call C code. Need thread but NOT official VM entry |
|
3277 // crud. We cannot block on this call, no GC can happen. Call should |
|
3278 // restore return values to their stack-slots with the new SP. |
|
3279 // Thread is in rdi already. |
|
3280 // |
|
3281 // BasicType unpack_frames(JavaThread* thread, int exec_mode); |
|
3282 |
|
3283 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI |
|
3284 __ mov(c_rarg0, r15_thread); |
|
3285 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); |
|
3286 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); |
|
3287 |
|
3288 // Set an oopmap for the call site |
|
3289 // Use the same PC we used for the last java frame |
|
3290 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); |
|
3291 |
|
3292 // Clear fp AND pc |
|
3293 __ reset_last_Java_frame(true); |
|
3294 |
|
3295 // Pop self-frame. |
|
3296 __ leave(); // Epilog |
|
3297 |
|
3298 // Jump to interpreter |
|
3299 __ ret(0); |
|
3300 |
|
3301 // Make sure all code is generated |
|
3302 masm->flush(); |
|
3303 |
|
3304 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, |
|
3305 SimpleRuntimeFrame::framesize >> 1); |
|
3306 } |
|
3307 #endif // COMPILER2 |
|
3308 |
|
3309 |
|
3310 //------------------------------generate_handler_blob------ |
|
3311 // |
|
3312 // Generate a special Compile2Runtime blob that saves all registers, |
|
3313 // and setup oopmap. |
|
3314 // |
|
3315 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { |
|
3316 assert(StubRoutines::forward_exception_entry() != NULL, |
|
3317 "must be generated before"); |
|
3318 |
|
3319 ResourceMark rm; |
|
3320 OopMapSet *oop_maps = new OopMapSet(); |
|
3321 OopMap* map; |
|
3322 |
|
3323 // Allocate space for the code. Setup code generation tools. |
|
3324 CodeBuffer buffer("handler_blob", 2048, 1024); |
|
3325 MacroAssembler* masm = new MacroAssembler(&buffer); |
|
3326 |
|
3327 address start = __ pc(); |
|
3328 address call_pc = NULL; |
|
3329 int frame_size_in_words; |
|
3330 bool cause_return = (poll_type == POLL_AT_RETURN); |
|
3331 bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); |
|
3332 |
|
3333 if (UseRTMLocking) { |
|
3334 // Abort RTM transaction before calling runtime |
|
3335 // because critical section will be large and will be |
|
3336 // aborted anyway. Also nmethod could be deoptimized. |
|
3337 __ xabort(0); |
|
3338 } |
|
3339 |
|
3340 // Make room for return address (or push it again) |
|
3341 if (!cause_return) { |
|
3342 __ push(rbx); |
|
3343 } |
|
3344 |
|
3345 // Save registers, fpu state, and flags |
|
3346 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); |
|
3347 |
|
3348 // The following is basically a call_VM. However, we need the precise |
|
3349 // address of the call in order to generate an oopmap. Hence, we do all the |
|
3350 // work outselves. |
|
3351 |
|
3352 __ set_last_Java_frame(noreg, noreg, NULL); |
|
3353 |
|
3354 // The return address must always be correct so that frame constructor never |
|
3355 // sees an invalid pc. |
|
3356 |
|
3357 if (!cause_return) { |
|
3358 // overwrite the dummy value we pushed on entry |
|
3359 __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset())); |
|
3360 __ movptr(Address(rbp, wordSize), c_rarg0); |
|
3361 } |
|
3362 |
|
3363 // Do the call |
|
3364 __ mov(c_rarg0, r15_thread); |
|
3365 __ call(RuntimeAddress(call_ptr)); |
|
3366 |
|
3367 // Set an oopmap for the call site. This oopmap will map all |
|
3368 // oop-registers and debug-info registers as callee-saved. This |
|
3369 // will allow deoptimization at this safepoint to find all possible |
|
3370 // debug-info recordings, as well as let GC find all oops. |
|
3371 |
|
3372 oop_maps->add_gc_map( __ pc() - start, map); |
|
3373 |
|
3374 Label noException; |
|
3375 |
|
3376 __ reset_last_Java_frame(false); |
|
3377 |
|
3378 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
3379 __ jcc(Assembler::equal, noException); |
|
3380 |
|
3381 // Exception pending |
|
3382 |
|
3383 RegisterSaver::restore_live_registers(masm, save_vectors); |
|
3384 |
|
3385 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
3386 |
|
3387 // No exception case |
|
3388 __ bind(noException); |
|
3389 |
|
3390 // Normal exit, restore registers and exit. |
|
3391 RegisterSaver::restore_live_registers(masm, save_vectors); |
|
3392 |
|
3393 __ ret(0); |
|
3394 |
|
3395 // Make sure all code is generated |
|
3396 masm->flush(); |
|
3397 |
|
3398 // Fill-out other meta info |
|
3399 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); |
|
3400 } |
|
3401 |
|
3402 // |
|
3403 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss |
|
3404 // |
|
3405 // Generate a stub that calls into vm to find out the proper destination |
|
3406 // of a java call. All the argument registers are live at this point |
|
3407 // but since this is generic code we don't know what they are and the caller |
|
3408 // must do any gc of the args. |
|
3409 // |
|
3410 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { |
|
3411 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); |
|
3412 |
|
3413 // allocate space for the code |
|
3414 ResourceMark rm; |
|
3415 |
|
3416 CodeBuffer buffer(name, 1000, 512); |
|
3417 MacroAssembler* masm = new MacroAssembler(&buffer); |
|
3418 |
|
3419 int frame_size_in_words; |
|
3420 |
|
3421 OopMapSet *oop_maps = new OopMapSet(); |
|
3422 OopMap* map = NULL; |
|
3423 |
|
3424 int start = __ offset(); |
|
3425 |
|
3426 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); |
|
3427 |
|
3428 int frame_complete = __ offset(); |
|
3429 |
|
3430 __ set_last_Java_frame(noreg, noreg, NULL); |
|
3431 |
|
3432 __ mov(c_rarg0, r15_thread); |
|
3433 |
|
3434 __ call(RuntimeAddress(destination)); |
|
3435 |
|
3436 |
|
3437 // Set an oopmap for the call site. |
|
3438 // We need this not only for callee-saved registers, but also for volatile |
|
3439 // registers that the compiler might be keeping live across a safepoint. |
|
3440 |
|
3441 oop_maps->add_gc_map( __ offset() - start, map); |
|
3442 |
|
3443 // rax contains the address we are going to jump to assuming no exception got installed |
|
3444 |
|
3445 // clear last_Java_sp |
|
3446 __ reset_last_Java_frame(false); |
|
3447 // check for pending exceptions |
|
3448 Label pending; |
|
3449 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
|
3450 __ jcc(Assembler::notEqual, pending); |
|
3451 |
|
3452 // get the returned Method* |
|
3453 __ get_vm_result_2(rbx, r15_thread); |
|
3454 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); |
|
3455 |
|
3456 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); |
|
3457 |
|
3458 RegisterSaver::restore_live_registers(masm); |
|
3459 |
|
3460 // We are back the the original state on entry and ready to go. |
|
3461 |
|
3462 __ jmp(rax); |
|
3463 |
|
3464 // Pending exception after the safepoint |
|
3465 |
|
3466 __ bind(pending); |
|
3467 |
|
3468 RegisterSaver::restore_live_registers(masm); |
|
3469 |
|
3470 // exception pending => remove activation and forward to exception handler |
|
3471 |
|
3472 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); |
|
3473 |
|
3474 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); |
|
3475 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); |
|
3476 |
|
3477 // ------------- |
|
3478 // make sure all code is generated |
|
3479 masm->flush(); |
|
3480 |
|
3481 // return the blob |
|
3482 // frame_size_words or bytes?? |
|
3483 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); |
|
3484 } |
|
3485 |
|
3486 |
|
3487 //------------------------------Montgomery multiplication------------------------ |
|
3488 // |
|
3489 |
|
3490 #ifndef _WINDOWS |
|
3491 |
|
3492 #define ASM_SUBTRACT |
|
3493 |
|
3494 #ifdef ASM_SUBTRACT |
|
3495 // Subtract 0:b from carry:a. Return carry. |
|
3496 static unsigned long |
|
3497 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { |
|
3498 long i = 0, cnt = len; |
|
3499 unsigned long tmp; |
|
3500 asm volatile("clc; " |
|
3501 "0: ; " |
|
3502 "mov (%[b], %[i], 8), %[tmp]; " |
|
3503 "sbb %[tmp], (%[a], %[i], 8); " |
|
3504 "inc %[i]; dec %[cnt]; " |
|
3505 "jne 0b; " |
|
3506 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " |
|
3507 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) |
|
3508 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) |
|
3509 : "memory"); |
|
3510 return tmp; |
|
3511 } |
|
3512 #else // ASM_SUBTRACT |
|
3513 typedef int __attribute__((mode(TI))) int128; |
|
3514 |
|
3515 // Subtract 0:b from carry:a. Return carry. |
|
3516 static unsigned long |
|
3517 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) { |
|
3518 int128 tmp = 0; |
|
3519 int i; |
|
3520 for (i = 0; i < len; i++) { |
|
3521 tmp += a[i]; |
|
3522 tmp -= b[i]; |
|
3523 a[i] = tmp; |
|
3524 tmp >>= 64; |
|
3525 assert(-1 <= tmp && tmp <= 0, "invariant"); |
|
3526 } |
|
3527 return tmp + carry; |
|
3528 } |
|
3529 #endif // ! ASM_SUBTRACT |
|
3530 |
|
3531 // Multiply (unsigned) Long A by Long B, accumulating the double- |
|
3532 // length result into the accumulator formed of T0, T1, and T2. |
|
3533 #define MACC(A, B, T0, T1, T2) \ |
|
3534 do { \ |
|
3535 unsigned long hi, lo; \ |
|
3536 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ |
|
3537 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ |
|
3538 : "r"(A), "a"(B) : "cc"); \ |
|
3539 } while(0) |
|
3540 |
|
3541 // As above, but add twice the double-length result into the |
|
3542 // accumulator. |
|
3543 #define MACC2(A, B, T0, T1, T2) \ |
|
3544 do { \ |
|
3545 unsigned long hi, lo; \ |
|
3546 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ |
|
3547 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ |
|
3548 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ |
|
3549 : "r"(A), "a"(B) : "cc"); \ |
|
3550 } while(0) |
|
3551 |
|
3552 // Fast Montgomery multiplication. The derivation of the algorithm is |
|
3553 // in A Cryptographic Library for the Motorola DSP56000, |
|
3554 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. |
|
3555 |
|
3556 static void __attribute__((noinline)) |
|
3557 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], |
|
3558 unsigned long m[], unsigned long inv, int len) { |
|
3559 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator |
|
3560 int i; |
|
3561 |
|
3562 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); |
|
3563 |
|
3564 for (i = 0; i < len; i++) { |
|
3565 int j; |
|
3566 for (j = 0; j < i; j++) { |
|
3567 MACC(a[j], b[i-j], t0, t1, t2); |
|
3568 MACC(m[j], n[i-j], t0, t1, t2); |
|
3569 } |
|
3570 MACC(a[i], b[0], t0, t1, t2); |
|
3571 m[i] = t0 * inv; |
|
3572 MACC(m[i], n[0], t0, t1, t2); |
|
3573 |
|
3574 assert(t0 == 0, "broken Montgomery multiply"); |
|
3575 |
|
3576 t0 = t1; t1 = t2; t2 = 0; |
|
3577 } |
|
3578 |
|
3579 for (i = len; i < 2*len; i++) { |
|
3580 int j; |
|
3581 for (j = i-len+1; j < len; j++) { |
|
3582 MACC(a[j], b[i-j], t0, t1, t2); |
|
3583 MACC(m[j], n[i-j], t0, t1, t2); |
|
3584 } |
|
3585 m[i-len] = t0; |
|
3586 t0 = t1; t1 = t2; t2 = 0; |
|
3587 } |
|
3588 |
|
3589 while (t0) |
|
3590 t0 = sub(m, n, t0, len); |
|
3591 } |
|
3592 |
|
3593 // Fast Montgomery squaring. This uses asymptotically 25% fewer |
|
3594 // multiplies so it should be up to 25% faster than Montgomery |
|
3595 // multiplication. However, its loop control is more complex and it |
|
3596 // may actually run slower on some machines. |
|
3597 |
|
3598 static void __attribute__((noinline)) |
|
3599 montgomery_square(unsigned long a[], unsigned long n[], |
|
3600 unsigned long m[], unsigned long inv, int len) { |
|
3601 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator |
|
3602 int i; |
|
3603 |
|
3604 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); |
|
3605 |
|
3606 for (i = 0; i < len; i++) { |
|
3607 int j; |
|
3608 int end = (i+1)/2; |
|
3609 for (j = 0; j < end; j++) { |
|
3610 MACC2(a[j], a[i-j], t0, t1, t2); |
|
3611 MACC(m[j], n[i-j], t0, t1, t2); |
|
3612 } |
|
3613 if ((i & 1) == 0) { |
|
3614 MACC(a[j], a[j], t0, t1, t2); |
|
3615 } |
|
3616 for (; j < i; j++) { |
|
3617 MACC(m[j], n[i-j], t0, t1, t2); |
|
3618 } |
|
3619 m[i] = t0 * inv; |
|
3620 MACC(m[i], n[0], t0, t1, t2); |
|
3621 |
|
3622 assert(t0 == 0, "broken Montgomery square"); |
|
3623 |
|
3624 t0 = t1; t1 = t2; t2 = 0; |
|
3625 } |
|
3626 |
|
3627 for (i = len; i < 2*len; i++) { |
|
3628 int start = i-len+1; |
|
3629 int end = start + (len - start)/2; |
|
3630 int j; |
|
3631 for (j = start; j < end; j++) { |
|
3632 MACC2(a[j], a[i-j], t0, t1, t2); |
|
3633 MACC(m[j], n[i-j], t0, t1, t2); |
|
3634 } |
|
3635 if ((i & 1) == 0) { |
|
3636 MACC(a[j], a[j], t0, t1, t2); |
|
3637 } |
|
3638 for (; j < len; j++) { |
|
3639 MACC(m[j], n[i-j], t0, t1, t2); |
|
3640 } |
|
3641 m[i-len] = t0; |
|
3642 t0 = t1; t1 = t2; t2 = 0; |
|
3643 } |
|
3644 |
|
3645 while (t0) |
|
3646 t0 = sub(m, n, t0, len); |
|
3647 } |
|
3648 |
|
3649 // Swap words in a longword. |
|
3650 static unsigned long swap(unsigned long x) { |
|
3651 return (x << 32) | (x >> 32); |
|
3652 } |
|
3653 |
|
3654 // Copy len longwords from s to d, word-swapping as we go. The |
|
3655 // destination array is reversed. |
|
3656 static void reverse_words(unsigned long *s, unsigned long *d, int len) { |
|
3657 d += len; |
|
3658 while(len-- > 0) { |
|
3659 d--; |
|
3660 *d = swap(*s); |
|
3661 s++; |
|
3662 } |
|
3663 } |
|
3664 |
|
3665 // The threshold at which squaring is advantageous was determined |
|
3666 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. |
|
3667 #define MONTGOMERY_SQUARING_THRESHOLD 64 |
|
3668 |
|
3669 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, |
|
3670 jint len, jlong inv, |
|
3671 jint *m_ints) { |
|
3672 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); |
|
3673 int longwords = len/2; |
|
3674 |
|
3675 // Make very sure we don't use so much space that the stack might |
|
3676 // overflow. 512 jints corresponds to an 16384-bit integer and |
|
3677 // will use here a total of 8k bytes of stack space. |
|
3678 int total_allocation = longwords * sizeof (unsigned long) * 4; |
|
3679 guarantee(total_allocation <= 8192, "must be"); |
|
3680 unsigned long *scratch = (unsigned long *)alloca(total_allocation); |
|
3681 |
|
3682 // Local scratch arrays |
|
3683 unsigned long |
|
3684 *a = scratch + 0 * longwords, |
|
3685 *b = scratch + 1 * longwords, |
|
3686 *n = scratch + 2 * longwords, |
|
3687 *m = scratch + 3 * longwords; |
|
3688 |
|
3689 reverse_words((unsigned long *)a_ints, a, longwords); |
|
3690 reverse_words((unsigned long *)b_ints, b, longwords); |
|
3691 reverse_words((unsigned long *)n_ints, n, longwords); |
|
3692 |
|
3693 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); |
|
3694 |
|
3695 reverse_words(m, (unsigned long *)m_ints, longwords); |
|
3696 } |
|
3697 |
|
3698 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, |
|
3699 jint len, jlong inv, |
|
3700 jint *m_ints) { |
|
3701 assert(len % 2 == 0, "array length in montgomery_square must be even"); |
|
3702 int longwords = len/2; |
|
3703 |
|
3704 // Make very sure we don't use so much space that the stack might |
|
3705 // overflow. 512 jints corresponds to an 16384-bit integer and |
|
3706 // will use here a total of 6k bytes of stack space. |
|
3707 int total_allocation = longwords * sizeof (unsigned long) * 3; |
|
3708 guarantee(total_allocation <= 8192, "must be"); |
|
3709 unsigned long *scratch = (unsigned long *)alloca(total_allocation); |
|
3710 |
|
3711 // Local scratch arrays |
|
3712 unsigned long |
|
3713 *a = scratch + 0 * longwords, |
|
3714 *n = scratch + 1 * longwords, |
|
3715 *m = scratch + 2 * longwords; |
|
3716 |
|
3717 reverse_words((unsigned long *)a_ints, a, longwords); |
|
3718 reverse_words((unsigned long *)n_ints, n, longwords); |
|
3719 |
|
3720 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { |
|
3721 ::montgomery_square(a, n, m, (unsigned long)inv, longwords); |
|
3722 } else { |
|
3723 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); |
|
3724 } |
|
3725 |
|
3726 reverse_words(m, (unsigned long *)m_ints, longwords); |
|
3727 } |
|
3728 |
|
3729 #endif // WINDOWS |
|
3730 |
|
3731 #ifdef COMPILER2 |
|
3732 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame |
|
3733 // |
|
3734 //------------------------------generate_exception_blob--------------------------- |
|
3735 // creates exception blob at the end |
|
3736 // Using exception blob, this code is jumped from a compiled method. |
|
3737 // (see emit_exception_handler in x86_64.ad file) |
|
3738 // |
|
3739 // Given an exception pc at a call we call into the runtime for the |
|
3740 // handler in this method. This handler might merely restore state |
|
3741 // (i.e. callee save registers) unwind the frame and jump to the |
|
3742 // exception handler for the nmethod if there is no Java level handler |
|
3743 // for the nmethod. |
|
3744 // |
|
3745 // This code is entered with a jmp. |
|
3746 // |
|
3747 // Arguments: |
|
3748 // rax: exception oop |
|
3749 // rdx: exception pc |
|
3750 // |
|
3751 // Results: |
|
3752 // rax: exception oop |
|
3753 // rdx: exception pc in caller or ??? |
|
3754 // destination: exception handler of caller |
|
3755 // |
|
3756 // Note: the exception pc MUST be at a call (precise debug information) |
|
3757 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. |
|
3758 // |
|
3759 |
|
3760 void OptoRuntime::generate_exception_blob() { |
|
3761 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); |
|
3762 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); |
|
3763 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); |
|
3764 |
|
3765 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); |
|
3766 |
|
3767 // Allocate space for the code |
|
3768 ResourceMark rm; |
|
3769 // Setup code generation tools |
|
3770 CodeBuffer buffer("exception_blob", 2048, 1024); |
|
3771 MacroAssembler* masm = new MacroAssembler(&buffer); |
|
3772 |
|
3773 |
|
3774 address start = __ pc(); |
|
3775 |
|
3776 // Exception pc is 'return address' for stack walker |
|
3777 __ push(rdx); |
|
3778 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog |
|
3779 |
|
3780 // Save callee-saved registers. See x86_64.ad. |
|
3781 |
|
3782 // rbp is an implicitly saved callee saved register (i.e., the calling |
|
3783 // convention will save/restore it in the prolog/epilog). Other than that |
|
3784 // there are no callee save registers now that adapter frames are gone. |
|
3785 |
|
3786 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); |
|
3787 |
|
3788 // Store exception in Thread object. We cannot pass any arguments to the |
|
3789 // handle_exception call, since we do not want to make any assumption |
|
3790 // about the size of the frame where the exception happened in. |
|
3791 // c_rarg0 is either rdi (Linux) or rcx (Windows). |
|
3792 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); |
|
3793 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); |
|
3794 |
|
3795 // This call does all the hard work. It checks if an exception handler |
|
3796 // exists in the method. |
|
3797 // If so, it returns the handler address. |
|
3798 // If not, it prepares for stack-unwinding, restoring the callee-save |
|
3799 // registers of the frame being removed. |
|
3800 // |
|
3801 // address OptoRuntime::handle_exception_C(JavaThread* thread) |
|
3802 |
|
3803 // At a method handle call, the stack may not be properly aligned |
|
3804 // when returning with an exception. |
|
3805 address the_pc = __ pc(); |
|
3806 __ set_last_Java_frame(noreg, noreg, the_pc); |
|
3807 __ mov(c_rarg0, r15_thread); |
|
3808 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack |
|
3809 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); |
|
3810 |
|
3811 // Set an oopmap for the call site. This oopmap will only be used if we |
|
3812 // are unwinding the stack. Hence, all locations will be dead. |
|
3813 // Callee-saved registers will be the same as the frame above (i.e., |
|
3814 // handle_exception_stub), since they were restored when we got the |
|
3815 // exception. |
|
3816 |
|
3817 OopMapSet* oop_maps = new OopMapSet(); |
|
3818 |
|
3819 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); |
|
3820 |
|
3821 __ reset_last_Java_frame(false); |
|
3822 |
|
3823 // Restore callee-saved registers |
|
3824 |
|
3825 // rbp is an implicitly saved callee-saved register (i.e., the calling |
|
3826 // convention will save restore it in prolog/epilog) Other than that |
|
3827 // there are no callee save registers now that adapter frames are gone. |
|
3828 |
|
3829 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); |
|
3830 |
|
3831 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog |
|
3832 __ pop(rdx); // No need for exception pc anymore |
|
3833 |
|
3834 // rax: exception handler |
|
3835 |
|
3836 // We have a handler in rax (could be deopt blob). |
|
3837 __ mov(r8, rax); |
|
3838 |
|
3839 // Get the exception oop |
|
3840 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); |
|
3841 // Get the exception pc in case we are deoptimized |
|
3842 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); |
|
3843 #ifdef ASSERT |
|
3844 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD); |
|
3845 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD); |
|
3846 #endif |
|
3847 // Clear the exception oop so GC no longer processes it as a root. |
|
3848 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD); |
|
3849 |
|
3850 // rax: exception oop |
|
3851 // r8: exception handler |
|
3852 // rdx: exception pc |
|
3853 // Jump to handler |
|
3854 |
|
3855 __ jmp(r8); |
|
3856 |
|
3857 // Make sure all code is generated |
|
3858 masm->flush(); |
|
3859 |
|
3860 // Set exception blob |
|
3861 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); |
|
3862 } |
|
3863 #endif // COMPILER2 |